diff --git a/autotests/unit/engine/phraseanditeratortest.cpp b/autotests/unit/engine/phraseanditeratortest.cpp index b1df45be..f571f1f5 100644 --- a/autotests/unit/engine/phraseanditeratortest.cpp +++ b/autotests/unit/engine/phraseanditeratortest.cpp @@ -1,114 +1,114 @@ /* This file is part of the KDE Baloo project. * Copyright (C) 2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ #include "phraseanditerator.h" #include "vectorpositioninfoiterator.h" #include "positioninfo.h" #include using namespace Baloo; class PhraseAndIteratorTest : public QObject { Q_OBJECT private Q_SLOTS: void test(); void testNullIterators(); }; void PhraseAndIteratorTest::test() { // Term 1 PositionInfo pi1; pi1.docId = 2; pi1.positions = {5, 9}; PositionInfo pi2; pi2.docId = 4; pi2.positions = {4, 2}; QVector vec1; vec1 << pi1 << pi2; // Term 2 PositionInfo pi3; pi3.docId = 2; pi3.positions = {6, 7}; PositionInfo pi4; pi4.docId = 4; pi4.positions = {6, 2}; PositionInfo pi5; pi5.docId = 7; pi5.positions = {1, 4, 2}; QVector vec2; vec2 << pi3 << pi4 << pi5; VectorPositionInfoIterator* it1 = new VectorPositionInfoIterator(vec1); VectorPositionInfoIterator* it2 = new VectorPositionInfoIterator(vec2); - QVector vec = {it1, it2}; + QVector vec = {it1, it2}; PhraseAndIterator it(vec); QCOMPARE(it.docId(), static_cast(0)); // The Query is "term1 term2". term1 must appear one position before term2 QVector result = {2}; for (quint64 val : result) { QCOMPARE(it.next(), static_cast(val)); QCOMPARE(it.docId(), static_cast(val)); } QCOMPARE(it.next(), static_cast(0)); QCOMPARE(it.docId(), static_cast(0)); } void PhraseAndIteratorTest::testNullIterators() { // Term 1 PositionInfo pi1; pi1.docId = 2; pi1.positions = {5, 9}; QVector vec1; vec1 << pi1; // Term 2 PositionInfo pi2; pi2.docId = 2; pi2.positions = {6, 7}; QVector vec2; vec2 << pi2; VectorPositionInfoIterator* it1 = new VectorPositionInfoIterator(vec1); VectorPositionInfoIterator* it2 = new VectorPositionInfoIterator(vec2); - QVector vec = {it1, nullptr, it2}; + QVector vec = {it1, nullptr, it2}; PhraseAndIterator it(vec); QCOMPARE(it.docId(), static_cast(0)); QCOMPARE(it.next(), static_cast(0)); QCOMPARE(it.docId(), static_cast(0)); } QTEST_MAIN(PhraseAndIteratorTest) #include "phraseanditeratortest.moc" diff --git a/autotests/unit/engine/positiondbtest.cpp b/autotests/unit/engine/positiondbtest.cpp index b042c16b..5e63851e 100644 --- a/autotests/unit/engine/positiondbtest.cpp +++ b/autotests/unit/engine/positiondbtest.cpp @@ -1,87 +1,87 @@ /* This file is part of the KDE Baloo project. * Copyright (C) 2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ #include "positiondb.h" #include "positioninfo.h" -#include "postingiterator.h" +#include "vectorpositioninfoiterator.h" #include "singledbtest.h" using namespace Baloo; class PositionDBTest : public SingleDBTest { Q_OBJECT private Q_SLOTS: void test() { PositionDB db(PositionDB::create(m_txn), m_txn); QByteArray word("fire"); PositionInfo pos1; pos1.docId = 1; pos1.positions = QVector() << 1 << 5 << 6; PositionInfo pos2; pos2.docId = 5; pos2.positions = QVector() << 41 << 96 << 116; QVector list = {pos1, pos2}; db.put(word, list); QVector res = db.get(word); QCOMPARE(res, list); } void testIter() { PositionDB db(PositionDB::create(m_txn), m_txn); QByteArray word("fire"); PositionInfo pos1; pos1.docId = 1; pos1.positions = QVector() << 1 << 5 << 6; PositionInfo pos2; pos2.docId = 5; pos2.positions = QVector() << 41 << 96 << 116; QVector list = {pos1, pos2}; db.put(word, list); - QScopedPointer it{db.iter(word)}; + QScopedPointer it{db.iter(word)}; QCOMPARE(it->docId(), static_cast(0)); QVERIFY(it->positions().isEmpty()); QCOMPARE(it->next(), static_cast(1)); QCOMPARE(it->docId(), static_cast(1)); QCOMPARE(it->positions(), pos1.positions); QCOMPARE(it->next(), static_cast(5)); QCOMPARE(it->docId(), static_cast(5)); QCOMPARE(it->positions(), pos2.positions); QCOMPARE(it->next(), static_cast(0)); QCOMPARE(it->docId(), static_cast(0)); QVERIFY(it->positions().isEmpty()); } }; QTEST_MAIN(PositionDBTest) #include "positiondbtest.moc" diff --git a/src/engine/phraseanditerator.cpp b/src/engine/phraseanditerator.cpp index ddcb3973..85991c0f 100644 --- a/src/engine/phraseanditerator.cpp +++ b/src/engine/phraseanditerator.cpp @@ -1,121 +1,122 @@ /* This file is part of the KDE Baloo project. * Copyright (C) 2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ #include "phraseanditerator.h" +#include "positioninfo.h" using namespace Baloo; -PhraseAndIterator::PhraseAndIterator(const QVector& iterators) +PhraseAndIterator::PhraseAndIterator(const QVector& iterators) : m_iterators(iterators) , m_docId(0) { if (m_iterators.contains(nullptr)) { qDeleteAll(m_iterators); m_iterators.clear(); } } PhraseAndIterator::~PhraseAndIterator() { qDeleteAll(m_iterators); } quint64 PhraseAndIterator::docId() const { return m_docId; } bool PhraseAndIterator::checkIfPositionsMatch() { QVector< QVector > positionList; positionList.reserve(m_iterators.size()); // All the iterators should have the same value for (int i = 0; i < m_iterators.size(); i++) { - PostingIterator* iter = m_iterators[i]; + auto* iter = m_iterators[i]; Q_ASSERT(iter->docId() == m_docId); QVector pi = iter->positions(); for (int j = 0; j < pi.size(); j++) { pi[j] -= i; } positionList << pi; } // Intersect all these positions QVector vec = positionList[0]; for (int l = 1; l < positionList.size(); l++) { QVector newVec = positionList[l]; int i = 0; int j = 0; QVector finalVec; while (i < vec.size() && j < newVec.size()) { if (vec[i] == newVec[j]) { finalVec << vec[i]; i++; j++; } else if (vec[i] < newVec[j]) { i++; } else { j++; } } vec = finalVec; } return !vec.isEmpty(); } quint64 PhraseAndIterator::next() { if (m_iterators.isEmpty()) { m_docId = 0; return 0; } if (m_iterators[0]->next() == 0) { m_docId = 0; return 0; } m_docId = m_iterators[0]->docId(); for (int i = 1; i < m_iterators.size(); i++) { PostingIterator* iter = m_iterators[i]; if (iter->docId() == 0 && iter->next() == 0) { m_docId = 0; return 0; } iter->skipTo(m_docId); if (m_docId != iter->docId()) { return next(); } } if (checkIfPositionsMatch()) return m_docId; else return next(); } diff --git a/src/engine/phraseanditerator.h b/src/engine/phraseanditerator.h index 0b8a176c..4391b1d0 100644 --- a/src/engine/phraseanditerator.h +++ b/src/engine/phraseanditerator.h @@ -1,47 +1,48 @@ /* This file is part of the KDE Baloo project. * Copyright (C) 2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ #ifndef BALOO_PHRASEANDITERATOR_H #define BALOO_PHRASEANDITERATOR_H #include "postingiterator.h" +#include "vectorpositioninfoiterator.h" #include namespace Baloo { class BALOO_ENGINE_EXPORT PhraseAndIterator : public PostingIterator { public: - explicit PhraseAndIterator(const QVector& iterators); + explicit PhraseAndIterator(const QVector& iterators); ~PhraseAndIterator(); quint64 next() override; quint64 docId() const override; private: - QVector m_iterators; + QVector m_iterators; quint64 m_docId; bool checkIfPositionsMatch(); }; } #endif // BALOO_PHRASEANDITERATOR_H diff --git a/src/engine/positiondb.cpp b/src/engine/positiondb.cpp index 6b8f2b57..fc7fae5f 100644 --- a/src/engine/positiondb.cpp +++ b/src/engine/positiondb.cpp @@ -1,206 +1,172 @@ /* * This file is part of the KDE Baloo project. * Copyright (C) 2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ #include "enginedebug.h" #include "positiondb.h" #include "positioncodec.h" #include "positioninfo.h" #include "postingiterator.h" +#include "vectorpositioninfoiterator.h" using namespace Baloo; PositionDB::PositionDB(MDB_dbi dbi, MDB_txn* txn) : m_txn(txn) , m_dbi(dbi) { Q_ASSERT(txn != nullptr); Q_ASSERT(dbi != 0); } PositionDB::~PositionDB() { } MDB_dbi PositionDB::create(MDB_txn* txn) { MDB_dbi dbi = 0; int rc = mdb_dbi_open(txn, "positiondb", MDB_CREATE, &dbi); if (rc) { qCWarning(ENGINE) << "PositionDB::create" << mdb_strerror(rc); return 0; } return dbi; } MDB_dbi PositionDB::open(MDB_txn* txn) { MDB_dbi dbi = 0; int rc = mdb_dbi_open(txn, "positiondb", 0, &dbi); if (rc) { qCWarning(ENGINE) << "PositionDB::open" << mdb_strerror(rc); return 0; } return dbi; } void PositionDB::put(const QByteArray& term, const QVector& list) { Q_ASSERT(!term.isEmpty()); Q_ASSERT(!list.isEmpty()); MDB_val key; key.mv_size = term.size(); key.mv_data = static_cast(const_cast(term.constData())); PositionCodec codec; QByteArray data = codec.encode(list); MDB_val val; val.mv_size = data.size(); val.mv_data = static_cast(data.data()); int rc = mdb_put(m_txn, m_dbi, &key, &val, 0); if (rc) { qCWarning(ENGINE) << "PositionDB::put" << mdb_strerror(rc); } } QVector PositionDB::get(const QByteArray& term) { Q_ASSERT(!term.isEmpty()); MDB_val key; key.mv_size = term.size(); key.mv_data = static_cast(const_cast(term.constData())); MDB_val val{0, nullptr}; int rc = mdb_get(m_txn, m_dbi, &key, &val); if (rc) { if (rc != MDB_NOTFOUND) { qCDebug(ENGINE) << "PositionDB::get" << term << mdb_strerror(rc); } return QVector(); } QByteArray data = QByteArray::fromRawData(static_cast(val.mv_data), val.mv_size); PositionCodec codec; return codec.decode(data); } void PositionDB::del(const QByteArray& term) { Q_ASSERT(!term.isEmpty()); MDB_val key; key.mv_size = term.size(); key.mv_data = static_cast(const_cast(term.constData())); int rc = mdb_del(m_txn, m_dbi, &key, nullptr); if (rc != 0 && rc != MDB_NOTFOUND) { qCDebug(ENGINE) << "PositionDB::del" << term << mdb_strerror(rc); } } // // Query // -class DBPositionIterator : public PostingIterator { -public: - DBPositionIterator(char* data, uint size) - : m_pos(-1) - { - PositionCodec codec; - m_vec = codec.decode(QByteArray(static_cast(data), size)); - } - - quint64 next() override { - m_pos++; - if (m_pos >= m_vec.size()) { - return 0; - } - - return m_vec[m_pos].docId; - } - - quint64 docId() const override { - if (m_pos < 0 || m_pos >= m_vec.size()) { - return 0; - } - return m_vec[m_pos].docId; - } - - QVector positions() override { - if (m_pos < 0 || m_pos >= m_vec.size()) { - return QVector(); - } - return m_vec[m_pos].positions; - } - -private: - QVector m_vec; - int m_pos; -}; - -PostingIterator* PositionDB::iter(const QByteArray& term) +VectorPositionInfoIterator* PositionDB::iter(const QByteArray& term) { Q_ASSERT(!term.isEmpty()); MDB_val key; key.mv_size = term.size(); key.mv_data = static_cast(const_cast(term.constData())); MDB_val val{0, nullptr}; int rc = mdb_get(m_txn, m_dbi, &key, &val); if (rc) { qCDebug(ENGINE) << "PositionDB::iter" << term << mdb_strerror(rc); return nullptr; } - return new DBPositionIterator(static_cast(val.mv_data), val.mv_size); + PositionCodec codec; + QByteArray ba(static_cast(val.mv_data), val.mv_size); + return new VectorPositionInfoIterator(codec.decode(ba)); } QMap> PositionDB::toTestMap() const { MDB_cursor* cursor; mdb_cursor_open(m_txn, m_dbi, &cursor); MDB_val key = {0, nullptr}; MDB_val val; QMap> map; while (1) { int rc = mdb_cursor_get(cursor, &key, &val, MDB_NEXT); if (rc) { qCDebug(ENGINE) << "PostingDB::toTestMap" << mdb_strerror(rc); break; } const QByteArray ba(static_cast(key.mv_data), key.mv_size); const QVector vinfo = PositionCodec().decode(QByteArray(static_cast(val.mv_data), val.mv_size)); map.insert(ba, vinfo); } mdb_cursor_close(cursor); return map; } diff --git a/src/engine/positiondb.h b/src/engine/positiondb.h index 64be2a99..49864039 100644 --- a/src/engine/positiondb.h +++ b/src/engine/positiondb.h @@ -1,60 +1,60 @@ /* * This file is part of the KDE Baloo project. * Copyright (C) 2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ #ifndef BALOO_POSITIONDB_H #define BALOO_POSITIONDB_H #include "engine_export.h" #include #include #include #include namespace Baloo { class PositionInfo; -class PostingIterator; +class VectorPositionInfoIterator; class BALOO_ENGINE_EXPORT PositionDB { public: explicit PositionDB(MDB_dbi dbi, MDB_txn* txn); ~PositionDB(); static MDB_dbi create(MDB_txn* txn); static MDB_dbi open(MDB_txn* txn); void put(const QByteArray& term, const QVector& list); QVector get(const QByteArray& term); void del(const QByteArray& term); - PostingIterator* iter(const QByteArray& term); + VectorPositionInfoIterator* iter(const QByteArray& term); QMap> toTestMap() const; private: MDB_txn* m_txn; MDB_dbi m_dbi; }; } #endif // BALOO_POSITIONDB_H diff --git a/src/engine/postingiterator.cpp b/src/engine/postingiterator.cpp index 5a654dfb..b5fd2aa8 100644 --- a/src/engine/postingiterator.cpp +++ b/src/engine/postingiterator.cpp @@ -1,40 +1,35 @@ /* * * Copyright (C) 2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ #include "postingiterator.h" using namespace Baloo; PostingIterator::~PostingIterator() { } quint64 PostingIterator::skipTo(quint64 id) { while (docId() && docId() < id) { next(); } return docId(); } - -QVector PostingIterator::positions() -{ - return QVector(); -} diff --git a/src/engine/postingiterator.h b/src/engine/postingiterator.h index ec98c786..ad48c21b 100644 --- a/src/engine/postingiterator.h +++ b/src/engine/postingiterator.h @@ -1,48 +1,46 @@ /* * * Copyright (C) 2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ #ifndef BALOO_POSTINGITERATOR_H #define BALOO_POSTINGITERATOR_H #include #include "engine_export.h" namespace Baloo { /** * A PostingIterator is an abstract base class which can be used to iterate * over all the "postings" or "documents" which are particular term appears. * * All PostingIterators should iterate over a list of non-decreasing document ids. */ class BALOO_ENGINE_EXPORT PostingIterator { public: virtual ~PostingIterator(); virtual quint64 next() = 0; virtual quint64 docId() const = 0; virtual quint64 skipTo(quint64 docId); - - virtual QVector positions(); }; } #endif // BALOO_POSTINGITERATOR_H diff --git a/src/engine/transaction.cpp b/src/engine/transaction.cpp index 124b219b..4e9fd766 100644 --- a/src/engine/transaction.cpp +++ b/src/engine/transaction.cpp @@ -1,645 +1,646 @@ /* * This file is part of the KDE Baloo project. * Copyright (C) 2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ #include "transaction.h" #include "postingdb.h" #include "documentdb.h" #include "documenturldb.h" #include "documentiddb.h" #include "positiondb.h" #include "documentdatadb.h" #include "mtimedb.h" #include "document.h" #include "enginequery.h" #include "andpostingiterator.h" #include "orpostingiterator.h" #include "phraseanditerator.h" #include "writetransaction.h" #include "idutils.h" #include "database.h" #include "databasesize.h" #include "enginedebug.h" #include #include using namespace Baloo; Transaction::Transaction(const Database& db, Transaction::TransactionType type) : m_dbis(db.m_dbis) , m_env(db.m_env) , m_writeTrans(nullptr) { uint flags = type == ReadOnly ? MDB_RDONLY : 0; int rc = mdb_txn_begin(db.m_env, nullptr, flags, &m_txn); if (rc) { qCDebug(ENGINE) << "Transaction" << mdb_strerror(rc); return; } if (type == ReadWrite) { m_writeTrans = new WriteTransaction(m_dbis, m_txn); } } Transaction::Transaction(Database* db, Transaction::TransactionType type) : Transaction(*db, type) { } Transaction::~Transaction() { if (m_writeTrans) { qWarning(ENGINE) << "Closing an active WriteTransaction without calling abort/commit"; } if (m_txn) { abort(); } } bool Transaction::hasDocument(quint64 id) const { Q_ASSERT(id > 0); IdFilenameDB idFilenameDb(m_dbis.idFilenameDbi, m_txn); return idFilenameDb.contains(id); } bool Transaction::inPhaseOne(quint64 id) const { Q_ASSERT(id > 0); DocumentIdDB contentIndexingDb(m_dbis.contentIndexingDbi, m_txn); return contentIndexingDb.contains(id); } bool Transaction::hasFailed(quint64 id) const { Q_ASSERT(id > 0); DocumentIdDB failedIdDb(m_dbis.failedIdDbi, m_txn); return failedIdDb.contains(id); } QVector Transaction::failedIds(quint64 limit) const { DocumentIdDB failedIdDb(m_dbis.failedIdDbi, m_txn); return failedIdDb.fetchItems(limit); } QByteArray Transaction::documentUrl(quint64 id) const { Q_ASSERT(m_txn); Q_ASSERT(id > 0); DocumentUrlDB docUrlDb(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); return docUrlDb.get(id); } quint64 Transaction::documentId(const QByteArray& path) const { Q_ASSERT(m_txn); Q_ASSERT(!path.isEmpty()); DocumentUrlDB docUrlDb(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); QList li = path.split('/'); quint64 parentId = 0; for (const QByteArray& fileName : li) { if (fileName.isEmpty()) { continue; } parentId = docUrlDb.getId(parentId, fileName); if (!parentId) { return 0; } } return parentId; } QVector Transaction::childrenDocumentId(quint64 parentId) const { DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); return docUrlDB.getChildren(parentId); } DocumentTimeDB::TimeInfo Transaction::documentTimeInfo(quint64 id) const { Q_ASSERT(m_txn); DocumentTimeDB docTimeDb(m_dbis.docTimeDbi, m_txn); return docTimeDb.get(id); } QByteArray Transaction::documentData(quint64 id) const { Q_ASSERT(m_txn); Q_ASSERT(id > 0); DocumentDataDB docDataDb(m_dbis.docDataDbi, m_txn); return docDataDb.get(id); } bool Transaction::hasChanges() const { Q_ASSERT(m_txn); if (!m_writeTrans) { qCWarning(ENGINE) << "m_writeTrans is null"; return false; } return m_writeTrans->hasChanges(); } QVector Transaction::fetchPhaseOneIds(int size) const { Q_ASSERT(m_txn); Q_ASSERT(size > 0); DocumentIdDB contentIndexingDb(m_dbis.contentIndexingDbi, m_txn); return contentIndexingDb.fetchItems(size); } QVector Transaction::fetchTermsStartingWith(const QByteArray& term) const { Q_ASSERT(term.size() > 0); PostingDB postingDb(m_dbis.postingDbi, m_txn); return postingDb.fetchTermsStartingWith(term); } uint Transaction::phaseOneSize() const { Q_ASSERT(m_txn); DocumentIdDB contentIndexingDb(m_dbis.contentIndexingDbi, m_txn); return contentIndexingDb.size(); } uint Transaction::size() const { Q_ASSERT(m_txn); DocumentDB docTermsDb(m_dbis.docTermsDbi, m_txn); return docTermsDb.size(); } // // Write Operations // void Transaction::setPhaseOne(quint64 id) { Q_ASSERT(m_txn); Q_ASSERT(id > 0); Q_ASSERT(m_writeTrans); DocumentIdDB contentIndexingDb(m_dbis.contentIndexingDbi, m_txn); contentIndexingDb.put(id); } void Transaction::removePhaseOne(quint64 id) { Q_ASSERT(m_txn); Q_ASSERT(id > 0); Q_ASSERT(m_writeTrans); DocumentIdDB contentIndexingDb(m_dbis.contentIndexingDbi, m_txn); contentIndexingDb.del(id); } void Transaction::addFailed(quint64 id) { Q_ASSERT(m_txn); Q_ASSERT(id > 0); Q_ASSERT(m_writeTrans); DocumentIdDB failedIdDb(m_dbis.failedIdDbi, m_txn); failedIdDb.put(id); } void Transaction::addDocument(const Document& doc) { Q_ASSERT(m_txn); Q_ASSERT(doc.id() > 0); if (!m_writeTrans) { qCWarning(ENGINE) << "m_writeTrans is null"; return; } m_writeTrans->addDocument(doc); } void Transaction::removeDocument(quint64 id) { Q_ASSERT(m_txn); Q_ASSERT(id > 0); if (!m_writeTrans) { qCWarning(ENGINE) << "m_writeTrans is null"; return; } m_writeTrans->removeDocument(id); } void Transaction::removeRecursively(quint64 id) { Q_ASSERT(m_txn); Q_ASSERT(id > 0); if (!m_writeTrans) { qCWarning(ENGINE) << "m_writeTrans is null"; return; } m_writeTrans->removeRecursively(id); } void Transaction::replaceDocument(const Document& doc, DocumentOperations operations) { Q_ASSERT(m_txn); Q_ASSERT(doc.id() > 0); Q_ASSERT(m_writeTrans); if (!hasDocument(doc.id())) { qCDebug(ENGINE) << "Transaction::replaceDocument" << "Document does not exist"; } if (!m_writeTrans) { qCWarning(ENGINE) << "m_writeTrans is null"; return; } m_writeTrans->replaceDocument(doc, operations); } void Transaction::commit() { Q_ASSERT(m_txn); if (!m_writeTrans) { qCWarning(ENGINE) << "m_writeTrans is null"; return; } m_writeTrans->commit(); delete m_writeTrans; m_writeTrans = nullptr; int rc = mdb_txn_commit(m_txn); if (rc) { qCWarning(ENGINE) << "Transaction::commit" << mdb_strerror(rc); } m_txn = nullptr; } void Transaction::abort() { Q_ASSERT(m_txn); mdb_txn_abort(m_txn); m_txn = nullptr; delete m_writeTrans; m_writeTrans = nullptr; } // // Queries // PostingIterator* Transaction::postingIterator(const EngineQuery& query) const { PostingDB postingDb(m_dbis.postingDbi, m_txn); PositionDB positionDb(m_dbis.positionDBi, m_txn); if (query.leaf()) { if (query.op() == EngineQuery::Equal) { return postingDb.iter(query.term()); } else if (query.op() == EngineQuery::StartsWith) { return postingDb.prefixIter(query.term()); } else { Q_ASSERT(0); } } const auto subQueries = query.subQueries(); if (subQueries.isEmpty()) { return nullptr; } - QVector vec; - vec.reserve(subQueries.size()); - if (query.op() == EngineQuery::Phrase) { + QVector vec; + vec.reserve(subQueries.size()); for (const EngineQuery& q : subQueries) { if (!q.leaf()) { qCDebug(ENGINE) << "Transaction::toPostingIterator" << "Phrase queries must contain leaf queries"; continue; } vec << positionDb.iter(q.term()); } return new PhraseAndIterator(vec); } + QVector vec; + vec.reserve(subQueries.size()); for (const EngineQuery& q : subQueries) { auto iterator = postingIterator(q); if (iterator) { vec << iterator; } else if (query.op() == EngineQuery::And) { return nullptr; } } if (vec.empty()) { return nullptr; } else if (vec.size() == 1) { return vec.takeFirst(); } if (query.op() == EngineQuery::And) { return new AndPostingIterator(vec); } else if (query.op() == EngineQuery::Or) { return new OrPostingIterator(vec); } Q_ASSERT(0); return nullptr; } PostingIterator* Transaction::postingCompIterator(const QByteArray& prefix, qlonglong value, PostingDB::Comparator com) const { PostingDB postingDb(m_dbis.postingDbi, m_txn); return postingDb.compIter(prefix, value, com); } PostingIterator* Transaction::mTimeIter(quint32 mtime, MTimeDB::Comparator com) const { MTimeDB mTimeDb(m_dbis.mtimeDbi, m_txn); return mTimeDb.iter(mtime, com); } PostingIterator* Transaction::mTimeRangeIter(quint32 beginTime, quint32 endTime) const { MTimeDB mTimeDb(m_dbis.mtimeDbi, m_txn); return mTimeDb.iterRange(beginTime, endTime); } PostingIterator* Transaction::docUrlIter(quint64 id) const { DocumentUrlDB docUrlDb(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); return docUrlDb.iter(id); } QVector Transaction::exec(const EngineQuery& query, int limit) const { Q_ASSERT(m_txn); QVector results; PostingIterator* it = postingIterator(query); if (!it) { return results; } while (it->next() && limit) { results << it->docId(); limit--; } return results; } // // Introspection // QVector Transaction::documentTerms(quint64 docId) const { Q_ASSERT(docId); DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn); return documentTermsDB.get(docId); } QVector Transaction::documentFileNameTerms(quint64 docId) const { Q_ASSERT(docId); DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn); return documentFileNameTermsDB.get(docId); } QVector Transaction::documentXattrTerms(quint64 docId) const { Q_ASSERT(docId); DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn); return documentXattrTermsDB.get(docId); } // // File Size // static size_t dbiSize(MDB_txn* txn, MDB_dbi dbi) { MDB_stat stat; mdb_stat(txn, dbi, &stat); return (stat.ms_branch_pages + stat.ms_leaf_pages + stat.ms_overflow_pages) * stat.ms_psize; } DatabaseSize Transaction::dbSize() { DatabaseSize dbSize; dbSize.postingDb = dbiSize(m_txn, m_dbis.postingDbi); dbSize.positionDb = dbiSize(m_txn, m_dbis.positionDBi); dbSize.docTerms = dbiSize(m_txn, m_dbis.docTermsDbi); dbSize.docFilenameTerms = dbiSize(m_txn, m_dbis.docFilenameTermsDbi); dbSize.docXattrTerms = dbiSize(m_txn, m_dbis.docXattrTermsDbi); dbSize.idTree = dbiSize(m_txn, m_dbis.idTreeDbi); dbSize.idFilename = dbiSize(m_txn, m_dbis.idFilenameDbi); dbSize.docTime = dbiSize(m_txn, m_dbis.docTimeDbi); dbSize.docData = dbiSize(m_txn, m_dbis.docDataDbi); dbSize.contentIndexingIds = dbiSize(m_txn, m_dbis.contentIndexingDbi); dbSize.failedIds = dbiSize(m_txn, m_dbis.failedIdDbi); dbSize.mtimeDb = dbiSize(m_txn, m_dbis.mtimeDbi); dbSize.expectedSize = dbSize.postingDb + dbSize.positionDb + dbSize.docTerms + dbSize.docFilenameTerms + dbSize.docXattrTerms + dbSize.idTree + dbSize.idFilename + dbSize.docTime + dbSize.docData + dbSize.contentIndexingIds + dbSize.failedIds + dbSize.mtimeDb; MDB_envinfo info; mdb_env_info(m_env, &info); dbSize.actualSize = info.me_last_pgno * 4096; // TODO: separate page size return dbSize; } // // Debugging // void Transaction::checkFsTree() { DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn); DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn); DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn); DocumentUrlDB docUrlDb(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn); PostingDB postingDb(m_dbis.postingDbi, m_txn); const auto map = postingDb.toTestMap(); QSet allIds; for (const auto& list : map) { for (quint64 id : list) { allIds << id; } } QTextStream out(stdout); out << "Total Document IDs: " << allIds.size() << endl; int count = 0; for (quint64 id: qAsConst(allIds)) { QByteArray url = docUrlDb.get(id); if (url.isEmpty()) { auto terms = documentTermsDB.get(id); auto fileNameTerms = documentFileNameTermsDB.get(id); auto xAttrTerms = documentXattrTermsDB.get(id); // Lets reverse enginer the terms QList newTerms; QMapIterator it(map); while (it.hasNext()) { it.next(); if (it.value().contains(id)) { newTerms << it.key(); } } out << "Missing filePath for " << id << endl; out << "\tPostingDB Terms: "; for (const QByteArray& term : qAsConst(newTerms)) { out << term << " "; } out << endl; out << "\tDocumentTermsDB: "; for (const QByteArray& term : terms) { out << term << " "; } out << endl; out << "\tFileNameTermsDB: "; for (const QByteArray& term : fileNameTerms) { out << term << " "; } out << endl; out << "\tXAttrTermsDB: "; for (const QByteArray& term : xAttrTerms) { out << term << " "; } out << endl; count++; } else if (!QFileInfo::exists(QString::fromUtf8(url))) { out << "FilePath " << url << " for " << id << " does not exist"<< endl; count++; } } out << "Invalid Entries: " << count << " (" << count * 100.0 / allIds.size() << "%)" << endl; } void Transaction::checkTermsDbinPostingDb() { DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn); DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn); DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn); PostingDB postingDb(m_dbis.postingDbi, m_txn); // Iterate over each document, and fetch all terms // check if each term maps to its own id in the posting db const auto map = postingDb.toTestMap(); QSet allIds; for (const auto& list : map) { for (quint64 id : list) { allIds << id; } } QTextStream out(stdout); out << "PostingDB check .." << endl; for (quint64 id : qAsConst(allIds)) { QVector terms = documentTermsDB.get(id); terms += documentXattrTermsDB.get(id); terms += documentFileNameTermsDB.get(id); for (const QByteArray& term : qAsConst(terms)) { PostingList plist = postingDb.get(term); if (!plist.contains(id)) { out << id << " is missing term " << term << endl; } } } } void Transaction::checkPostingDbinTermsDb() { DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn); DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn); DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn); PostingDB postingDb(m_dbis.postingDbi, m_txn); QMap map = postingDb.toTestMap(); QMapIterator it(map); QTextStream out(stdout); out << "DocumentTermsDB check .." << endl; while (it.hasNext()) { it.next(); const QByteArray& term = it.key(); const PostingList& list = it.value(); for (quint64 id : list) { if (documentTermsDB.get(id).contains(term)) { continue; } if (documentFileNameTermsDB.get(id).contains(term)) { continue; } if (documentXattrTermsDB.get(id).contains(term)) { continue; } out << id << " is missing " << QString::fromUtf8(term) << " from document terms db" << endl; } } } diff --git a/src/engine/vectorpositioninfoiterator.cpp b/src/engine/vectorpositioninfoiterator.cpp index 00b1c9ad..bfe84163 100644 --- a/src/engine/vectorpositioninfoiterator.cpp +++ b/src/engine/vectorpositioninfoiterator.cpp @@ -1,63 +1,63 @@ /* This file is part of the KDE Baloo project. * Copyright (C) 2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ #include "vectorpositioninfoiterator.h" #include "positioninfo.h" using namespace Baloo; VectorPositionInfoIterator::VectorPositionInfoIterator(const QVector& vector) : m_vector(vector) , m_pos(-1) { } quint64 VectorPositionInfoIterator::next() { - if (m_pos >= m_vector.size() - 1) { + m_pos++; + if (m_pos >= m_vector.size()) { m_pos = m_vector.size(); m_vector.clear(); return 0; } - m_pos++; return m_vector[m_pos].docId; } quint64 VectorPositionInfoIterator::docId() const { if (m_pos < 0 || m_pos >= m_vector.size()) { return 0; } return m_vector[m_pos].docId; } QVector VectorPositionInfoIterator::positions() { if (m_pos < 0 || m_pos >= m_vector.size()) { return QVector(); } return m_vector[m_pos].positions; } diff --git a/src/engine/vectorpositioninfoiterator.h b/src/engine/vectorpositioninfoiterator.h index f37dc1d7..85f2f0b2 100644 --- a/src/engine/vectorpositioninfoiterator.h +++ b/src/engine/vectorpositioninfoiterator.h @@ -1,44 +1,44 @@ /* This file is part of the KDE Baloo project. * Copyright (C) 2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ #ifndef BALOO_VECTORPOSITIONINFOITERATOR_H #define BALOO_VECTORPOSITIONINFOITERATOR_H #include "postingiterator.h" #include "positiondb.h" namespace Baloo { class BALOO_ENGINE_EXPORT VectorPositionInfoIterator : public PostingIterator { public: explicit VectorPositionInfoIterator(const QVector& vector); quint64 docId() const override; quint64 next() override; - QVector positions() override; + QVector positions(); private: QVector m_vector; int m_pos; }; } #endif // BALOO_VECTORPOSITIONINFOITERATOR_H