diff --git a/src/engine/transaction.h b/src/engine/transaction.h index 52628d13..5e15cdfc 100644 --- a/src/engine/transaction.h +++ b/src/engine/transaction.h @@ -1,140 +1,141 @@ /* * This file is part of the KDE Baloo project. * Copyright (C) 2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ #ifndef BALOO_TRANSACTION_H #define BALOO_TRANSACTION_H #include "databasedbis.h" #include "mtimedb.h" #include "postingdb.h" #include "writetransaction.h" #include "documenttimedb.h" +#include #include #include namespace Baloo { class Database; class Document; class PostingIterator; class EngineQuery; class DatabaseSize; class DBState; class BALOO_ENGINE_EXPORT Transaction { public: enum TransactionType { ReadOnly, ReadWrite }; Transaction(const Database& db, TransactionType type); Transaction(Database* db, TransactionType type); ~Transaction(); // // Getters // bool hasDocument(quint64 id) const; bool inPhaseOne(quint64 id) const; bool hasFailed(quint64 id) const; QVector failedIds(quint64 limit) const; QByteArray documentUrl(quint64 id) const; /** * This method is not cheap, and does not stat the filesystem in order to convert the path * \p path into an id. */ quint64 documentId(const QByteArray& path) const; QVector childrenDocumentId(quint64 parentId) const; QByteArray documentData(quint64 id) const; DocumentTimeDB::TimeInfo documentTimeInfo(quint64 id) const; QVector exec(const EngineQuery& query, int limit = -1) const; PostingIterator* postingIterator(const EngineQuery& query) const; PostingIterator* postingCompIterator(const QByteArray& prefix, qlonglong value, PostingDB::Comparator com) const; PostingIterator* mTimeIter(quint32 mtime, MTimeDB::Comparator com) const; PostingIterator* mTimeRangeIter(quint32 beginTime, quint32 endTime) const; PostingIterator* docUrlIter(quint64 id) const; QVector fetchPhaseOneIds(int size) const; uint phaseOneSize() const; uint size() const; QVector fetchTermsStartingWith(const QByteArray& term) const; // // Introspecing document data // QVector documentTerms(quint64 docId) const; QVector documentFileNameTerms(quint64 docId) const; QVector documentXattrTerms(quint64 docId) const; DatabaseSize dbSize(); // // Transaction handling // void commit(); void abort(); bool hasChanges() const; // // Write Methods // void addDocument(const Document& doc); void removeDocument(quint64 id); void removeRecursively(quint64 parentId); void addFailed(quint64 id); - template - void removeRecursively(quint64 id, Functor shouldDelete) { + bool removeRecursively(quint64 parentId, std::function shouldDelete) + { Q_ASSERT(m_txn); Q_ASSERT(m_writeTrans); - m_writeTrans->removeRecursively(id, shouldDelete); + return m_writeTrans->removeRecursively(parentId, shouldDelete); } void replaceDocument(const Document& doc, DocumentOperations operations); void setPhaseOne(quint64 id); void removePhaseOne(quint64 id); // Debugging void checkFsTree(); void checkTermsDbinPostingDb(); void checkPostingDbinTermsDb(); private: Transaction(const Transaction& rhs) = delete; const DatabaseDbis& m_dbis; MDB_txn *m_txn = nullptr; MDB_env *m_env = nullptr; WriteTransaction *m_writeTrans = nullptr; friend class DatabaseSanitizerImpl; friend class DBState; // for testing }; } #endif // BALOO_TRANSACTION_H diff --git a/src/engine/writetransaction.cpp b/src/engine/writetransaction.cpp index f0172330..e73e55ff 100644 --- a/src/engine/writetransaction.cpp +++ b/src/engine/writetransaction.cpp @@ -1,320 +1,341 @@ /* This file is part of the KDE Baloo project. * Copyright (C) 2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ #include "writetransaction.h" #include "transaction.h" #include "document.h" #include "postingdb.h" #include "documentdb.h" #include "documenturldb.h" #include "documentiddb.h" #include "positiondb.h" #include "documenttimedb.h" #include "documentdatadb.h" #include "mtimedb.h" #include "idutils.h" using namespace Baloo; void WriteTransaction::addDocument(const Document& doc) { quint64 id = doc.id(); DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn); DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn); DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn); DocumentTimeDB docTimeDB(m_dbis.docTimeDbi, m_txn); DocumentDataDB docDataDB(m_dbis.docDataDbi, m_txn); DocumentIdDB contentIndexingDB(m_dbis.contentIndexingDbi, m_txn); MTimeDB mtimeDB(m_dbis.mtimeDbi, m_txn); DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); Q_ASSERT(!documentTermsDB.contains(id)); Q_ASSERT(!documentXattrTermsDB.contains(id)); Q_ASSERT(!documentFileNameTermsDB.contains(id)); Q_ASSERT(!docTimeDB.contains(id)); Q_ASSERT(!docDataDB.contains(id)); Q_ASSERT(!contentIndexingDB.contains(id)); if (!docUrlDB.put(id, doc.url())) { return; } QVector docTerms = addTerms(id, doc.m_terms); documentTermsDB.put(id, docTerms); QVector docXattrTerms = addTerms(id, doc.m_xattrTerms); if (!docXattrTerms.isEmpty()) documentXattrTermsDB.put(id, docXattrTerms); QVector docFileNameTerms = addTerms(id, doc.m_fileNameTerms); if (!docFileNameTerms.isEmpty()) documentFileNameTermsDB.put(id, docFileNameTerms); if (doc.contentIndexing()) { contentIndexingDB.put(doc.id()); } DocumentTimeDB::TimeInfo info; info.mTime = doc.m_mTime; info.cTime = doc.m_cTime; docTimeDB.put(id, info); mtimeDB.put(doc.m_mTime, id); if (!doc.m_data.isEmpty()) { docDataDB.put(id, doc.m_data); } } QVector WriteTransaction::addTerms(quint64 id, const QMap& terms) { QVector termList; termList.reserve(terms.size()); m_pendingOperations.reserve(m_pendingOperations.size() + terms.size()); QMapIterator it(terms); while (it.hasNext()) { const QByteArray& term = it.next().key(); termList.append(term); Operation op; op.type = AddId; op.data.docId = id; op.data.positions = it.value().positions; m_pendingOperations[term].append(op); } return termList; } void WriteTransaction::removeDocument(quint64 id) { DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn); DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn); DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn); DocumentTimeDB docTimeDB(m_dbis.docTimeDbi, m_txn); DocumentDataDB docDataDB(m_dbis.docDataDbi, m_txn); DocumentIdDB contentIndexingDB(m_dbis.contentIndexingDbi, m_txn); DocumentIdDB failedIndexingDB(m_dbis.failedIdDbi, m_txn); MTimeDB mtimeDB(m_dbis.mtimeDbi, m_txn); DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); removeTerms(id, documentTermsDB.get(id)); removeTerms(id, documentXattrTermsDB.get(id)); removeTerms(id, documentFileNameTermsDB.get(id)); documentTermsDB.del(id); documentXattrTermsDB.del(id); documentFileNameTermsDB.del(id); docUrlDB.del(id, [&docTimeDB](quint64 id) { return !docTimeDB.contains(id); }); contentIndexingDB.del(id); failedIndexingDB.del(id); DocumentTimeDB::TimeInfo info = docTimeDB.get(id); docTimeDB.del(id); mtimeDB.del(info.mTime, id); docDataDB.del(id); } void WriteTransaction::removeTerms(quint64 id, const QVector& terms) { for (const QByteArray& term : terms) { Operation op; op.type = RemoveId; op.data.docId = id; m_pendingOperations[term].append(op); } } void WriteTransaction::removeRecursively(quint64 parentId) { DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); const QVector children = docUrlDB.getChildren(parentId); for (quint64 id : children) { if (id) { removeRecursively(id); } } removeDocument(parentId); } +bool WriteTransaction::removeRecursively(quint64 parentId, std::function shouldDelete) +{ + DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); + + if (parentId && !shouldDelete(parentId)) { + return false; + } + + bool isEmpty = true; + const QVector children = docUrlDB.getChildren(parentId); + for (quint64 id : children) { + isEmpty &= removeRecursively(id, shouldDelete); + } + // refetch + if (isEmpty && docUrlDB.getChildren(parentId).isEmpty()) { + removeDocument(parentId); + return true; + } + return false; +} + void WriteTransaction::replaceDocument(const Document& doc, DocumentOperations operations) { DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn); DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn); DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn); DocumentTimeDB docTimeDB(m_dbis.docTimeDbi, m_txn); DocumentDataDB docDataDB(m_dbis.docDataDbi, m_txn); DocumentIdDB contentIndexingDB(m_dbis.contentIndexingDbi, m_txn); MTimeDB mtimeDB(m_dbis.mtimeDbi, m_txn); DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); const quint64 id = doc.id(); if (operations & DocumentTerms) { Q_ASSERT(!doc.m_terms.isEmpty()); QVector prevTerms = documentTermsDB.get(id); QVector docTerms = replaceTerms(id, prevTerms, doc.m_terms); if (docTerms != prevTerms) { documentTermsDB.put(id, docTerms); } } if (operations & XAttrTerms) { QVector prevTerms = documentXattrTermsDB.get(id); QVector docXattrTerms = replaceTerms(id, prevTerms, doc.m_xattrTerms); if (docXattrTerms != prevTerms) { if (!docXattrTerms.isEmpty()) documentXattrTermsDB.put(id, docXattrTerms); else documentXattrTermsDB.del(id); } } if (operations & FileNameTerms) { QVector prevTerms = documentFileNameTermsDB.get(id); QVector docFileNameTerms = replaceTerms(id, prevTerms, doc.m_fileNameTerms); if (docFileNameTerms != prevTerms) { if (!docFileNameTerms.isEmpty()) documentFileNameTermsDB.put(id, docFileNameTerms); else documentFileNameTermsDB.del(id); } } if (doc.contentIndexing()) { contentIndexingDB.put(doc.id()); } if (operations & DocumentTime) { DocumentTimeDB::TimeInfo info = docTimeDB.get(id); if (info.mTime != doc.m_mTime) { mtimeDB.del(info.mTime, id); mtimeDB.put(doc.m_mTime, id); } info.mTime = doc.m_mTime; info.cTime = doc.m_cTime; docTimeDB.put(id, info); } if (operations & DocumentData) { if (!doc.m_data.isEmpty()) { docDataDB.put(id, doc.m_data); } else { docDataDB.del(id); } } if (operations & DocumentUrl) { docUrlDB.replace(id, doc.url(), [&docTimeDB](quint64 id) { return !docTimeDB.contains(id); });; } } QVector< QByteArray > WriteTransaction::replaceTerms(quint64 id, const QVector& prevTerms, const QMap& terms) { m_pendingOperations.reserve(m_pendingOperations.size() + prevTerms.size()); for (const QByteArray& term : prevTerms) { Operation op; op.type = RemoveId; op.data.docId = id; m_pendingOperations[term].append(op); } return addTerms(id, terms); } void WriteTransaction::commit() { PostingDB postingDB(m_dbis.postingDbi, m_txn); PositionDB positionDB(m_dbis.positionDBi, m_txn); QHashIterator > iter(m_pendingOperations); while (iter.hasNext()) { iter.next(); const QByteArray& term = iter.key(); const QVector operations = iter.value(); PostingList list = postingDB.get(term); bool fetchedPositionList = false; QVector positionList; for (const Operation& op : operations) { quint64 id = op.data.docId; if (op.type == AddId) { sortedIdInsert(list, id); if (!op.data.positions.isEmpty()) { if (!fetchedPositionList) { positionList = positionDB.get(term); fetchedPositionList = true; } sortedIdInsert(positionList, op.data); } } else { sortedIdRemove(list, id); if (!fetchedPositionList) { positionList = positionDB.get(term); fetchedPositionList = true; } sortedIdRemove(positionList, PositionInfo(id)); } } if (!list.isEmpty()) { postingDB.put(term, list); } else { postingDB.del(term); } if (fetchedPositionList) { if (!positionList.isEmpty()) { positionDB.put(term, positionList); } else { positionDB.del(term); } } } m_pendingOperations.clear(); } diff --git a/src/engine/writetransaction.h b/src/engine/writetransaction.h index 8cf53768..4e7651ac 100644 --- a/src/engine/writetransaction.h +++ b/src/engine/writetransaction.h @@ -1,110 +1,96 @@ /* * This file is part of the KDE Baloo project. * Copyright (C) 2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ #ifndef BALOO_WRITETRANSACTION_H #define BALOO_WRITETRANSACTION_H #include "positioninfo.h" #include "document.h" #include "documentoperations.h" #include "databasedbis.h" #include "documenturldb.h" +#include namespace Baloo { class BALOO_ENGINE_EXPORT WriteTransaction { public: WriteTransaction(DatabaseDbis dbis, MDB_txn* txn) : m_txn(txn) , m_dbis(dbis) {} void addDocument(const Document& doc); void removeDocument(quint64 id); /** * Remove the document with id \p parentId and all its children. */ void removeRecursively(quint64 parentId); /** * Goes through every document in the database, and remove the ones for which \p shouldDelete * returns false. It starts searching from \p parentId, which can be 0 to search * through everything. * * \arg shouldDelete takes a quint64 as a parameter + * \ret true if the document (and all its children) has been removed * * This function should typically be called when there are no other ReadTransaction in process * as that would otherwise balloon the size of the database. */ - template - void removeRecursively(quint64 parentId, Functor shouldDelete) { - DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); - - if (!shouldDelete(parentId)) { - return; - } - - const QVector children = docUrlDB.getChildren(parentId); - for (quint64 id : children) { - removeRecursively(id, shouldDelete); - } - // refetch - if (docUrlDB.getChildren(parentId).isEmpty()) { - removeDocument(parentId); - } - } + bool removeRecursively(quint64 parentId, std::function shouldDelete); void replaceDocument(const Document& doc, DocumentOperations operations); void commit(); bool hasChanges() const { return !m_pendingOperations.isEmpty(); } enum OperationType { AddId, RemoveId }; struct Operation { OperationType type; PositionInfo data; }; private: /* * Adds an 'addId' operation to the pending queue for each term. * Returns the list of all the terms. */ QVector addTerms(quint64 id, const QMap& terms); QVector replaceTerms(quint64 id, const QVector& prevTerms, const QMap& terms); void removeTerms(quint64 id, const QVector& terms); QHash > m_pendingOperations; MDB_txn* m_txn; DatabaseDbis m_dbis; }; } Q_DECLARE_TYPEINFO(Baloo::WriteTransaction::Operation, Q_MOVABLE_TYPE); #endif // BALOO_WRITETRANSACTION_H