diff --git a/autotests/integration/querytest.cpp b/autotests/integration/querytest.cpp index c3cdf04a..dd517567 100644 --- a/autotests/integration/querytest.cpp +++ b/autotests/integration/querytest.cpp @@ -1,349 +1,361 @@ /* This file is part of the KDE Baloo project. * Copyright (C) 2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ #include "database.h" #include "transaction.h" #include "document.h" #include "termgenerator.h" #include "enginequery.h" #include "idutils.h" #include #include using namespace Baloo; class SortedIdVector : public QVector { public: SortedIdVector(const QVector& list) : QVector(list) { std::sort(begin(), end()); } SortedIdVector(std::initializer_list args) : SortedIdVector(QVector(args)) {} }; char *toString(const QVector &idlist) { QByteArray text("IDs["); text += QByteArray::number(idlist.size()) + "]:"; for (auto id : idlist) { text += " " + QByteArray::number(id, 16); } return qstrdup(text.data()); } class QueryTest : public QObject { Q_OBJECT private Q_SLOTS: void initTestCase() { dir.reset(new QTemporaryDir()); auto touchFile = [](const QString& path) { QFile file(path); file.open(QIODevice::WriteOnly); file.write("data"); file.close(); return filePathToId(QFile::encodeName(path)); }; m_id1 = touchFile(dir->path() + "/file1.txt"); m_id2 = touchFile(dir->path() + "/file2"); m_id3 = touchFile(dir->path() + "/file3"); m_id4 = touchFile(dir->path() + "/file4"); m_id7 = touchFile(dir->path() + "/file7_lazy"); m_id8 = touchFile(dir->path() + "/file8_dog"); m_id5 = touchFile(dir->path() + "/tagFile1"); m_id6 = touchFile(dir->path() + "/tagFile2"); } void init() { dbDir = new QTemporaryDir(); db = new Database(dbDir->path()); db->open(Database::CreateDatabase); insertDocuments(); } void cleanup() { delete db; delete dbDir; } void testTermEqual(); void testTermStartsWith(); void testTermAnd(); void testTermOr(); void testTermPhrase_data(); void testTermPhrase(); void testTagTermAnd_data(); void testTagTermAnd(); void testTagTermPhrase_data(); void testTagTermPhrase(); private: QScopedPointer dir; QTemporaryDir* dbDir; Database* db; void insertDocuments(); void addDocument(Transaction* tr,const QString& text, quint64 id, const QString& url) { Document doc; doc.setUrl(QFile::encodeName(url)); QString fileName = url.mid(url.lastIndexOf('/') + 1); TermGenerator tg(doc); tg.indexText(text); tg.indexFileNameText(fileName); - tg.indexFileNameText(fileName, QByteArrayLiteral("F")); doc.setId(id); doc.setMTime(1); doc.setCTime(2); tr->addDocument(doc); } void renameDocument(Transaction* tr, quint64 id, const QString& newName) { Document doc; TermGenerator tg(doc); tg.indexFileNameText(newName); - tg.indexFileNameText(newName, QByteArrayLiteral("F")); doc.setId(id); tr->replaceDocument(doc, FileNameTerms); } void insertTagDocuments(); void addTagDocument(Transaction* tr,const QStringList& tags, quint64 id, const QString& url) { Document doc; doc.setUrl(QFile::encodeName(url)); QString fileName = url.mid(url.lastIndexOf('/') + 1); TermGenerator tg(doc); tg.indexText("text/plain", QByteArray("M")); for (const QString& tag : tags) { tg.indexXattrText(tag, QByteArray("TA")); } tg.indexFileNameText(fileName); doc.setId(id); doc.setMTime(3); doc.setCTime(4); tr->addDocument(doc); } quint64 m_id1; quint64 m_id2; quint64 m_id3; quint64 m_id4; quint64 m_id5; quint64 m_id6; quint64 m_id7; quint64 m_id8; }; void QueryTest::insertDocuments() { Transaction tr(db, Transaction::ReadWrite); addDocument(&tr, QStringLiteral("The quick brown fox jumped over the crazy dog"), m_id1, dir->path() + "/file1.txt"); addDocument(&tr, QStringLiteral("The quick brown fox jumped over the lazy dog"), m_id7, dir->path() + "/file7_lazy"); addDocument(&tr, QStringLiteral("A quick brown fox ran around a easy dog"), m_id8, dir->path() + "/file8_dog"); addDocument(&tr, QStringLiteral("The night is dark and full of terror"), m_id2, dir->path() + "/file2"); addDocument(&tr, QStringLiteral("Don't feel sorry for yourself. Only assholes do that"), m_id3, dir->path() + "/file3"); addDocument(&tr, QStringLiteral("Only the dead stay 17 forever. crazy"), m_id4, dir->path() + "/file4"); renameDocument(&tr, m_id8, QStringLiteral("file8_easy")); tr.commit(); } void QueryTest::insertTagDocuments() { Transaction tr(db, Transaction::ReadWrite); addTagDocument(&tr, {"One", "Two", "Three", "Four", "F1"}, m_id5, dir->path() + "/tagFile1"); addTagDocument(&tr, {"One", "Two-Three", "Four", "F2"}, m_id6, dir->path() + "/tagFile2"); tr.commit(); } void QueryTest::testTermEqual() { EngineQuery q("the"); QVector result = SortedIdVector{m_id1, m_id2, m_id4, m_id7}; Transaction tr(db, Transaction::ReadOnly); QCOMPARE(tr.exec(q), result); } void QueryTest::testTermStartsWith() { EngineQuery q("for", EngineQuery::StartsWith); QVector result = SortedIdVector{m_id3, m_id4}; Transaction tr(db, Transaction::ReadOnly); QCOMPARE(tr.exec(q), result); } void QueryTest::testTermAnd() { QVector queries; queries << EngineQuery("for"); queries << EngineQuery("sorry"); EngineQuery q(queries, EngineQuery::And); QVector result = {m_id3}; Transaction tr(db, Transaction::ReadOnly); QCOMPARE(tr.exec(q), result); } void QueryTest::testTermOr() { QVector queries; queries << EngineQuery("over"); queries << EngineQuery("terror"); EngineQuery q(queries, EngineQuery::Or); QVector result = SortedIdVector{m_id1, m_id2, m_id7}; Transaction tr(db, Transaction::ReadOnly); QCOMPARE(tr.exec(q), result); } void QueryTest::testTermPhrase_data() { QTest::addColumn("phrase"); QTest::addColumn>("contentMatches"); QTest::addColumn>("filenameMatches"); QTest::addColumn("failReason"); auto addRow = [](const char* name, const QByteArrayList& phrase, const QVector contentMatches, const QVector filenameMatches, const QString& failureReason) { QTest::addRow("%s", name) << phrase << contentMatches << filenameMatches << failureReason; }; + // Content matches addRow("Crazy dog", {"crazy", "dog"}, SortedIdVector{ m_id1 }, {}, ""); addRow("Lazy dog", {"lazy", "dog"}, SortedIdVector{ m_id7 }, {}, ""); addRow("Brown fox", {"brown", "fox"}, SortedIdVector{ m_id1, m_id7, m_id8 }, {}, ""); - addRow("Crazy dog file 1", {"file1"}, SortedIdVector{ m_id1 }, SortedIdVector{ m_id1 }, ""); - addRow("Crazy dog file 2", {"file1", "txt"}, SortedIdVector{ m_id1 }, SortedIdVector{ m_id1 }, ""); - addRow("Lazy dog file 1", {"file7"}, SortedIdVector{ m_id7 }, SortedIdVector{ m_id7 }, ""); - addRow("Lazy dog file 2", {"file7", "lazy"}, SortedIdVector{ m_id7 }, SortedIdVector{ m_id7 }, "Content shadows filename"); - addRow("Lazy dog file 3", {"dog"}, SortedIdVector{ m_id1, m_id7, m_id8 }, SortedIdVector{ m_id8 }, "Filename shadows content"); + addRow("Dog", {"dog"}, SortedIdVector{ m_id1, m_id7, m_id8 }, {}, ""); + // Filename matches + addRow("Crazy dog file 1", {"file1"}, {}, SortedIdVector{ m_id1 }, ""); + addRow("Crazy dog file 2", {"file1", "txt"}, {}, SortedIdVector{ m_id1 }, ""); + addRow("Lazy dog file 1", {"file7"}, {}, SortedIdVector{ m_id7 }, ""); + addRow("Lazy dog file 2", {"file7", "lazy"}, {}, SortedIdVector{ m_id7 }, ""); + // Matches content and filename + addRow("Lazy both", {"lazy"}, { m_id7 }, { m_id7 }, ""); + addRow("Easy both", {"easy"}, { m_id8 }, { m_id8 }, ""); } void QueryTest::testTermPhrase() { QFETCH(QByteArrayList, phrase); QFETCH(QVector, contentMatches); QFETCH(QVector, filenameMatches); QFETCH(QString, failReason); QVector queries; for (const QByteArray& term : phrase) { queries << EngineQuery(term); } EngineQuery q(queries, EngineQuery::Phrase); Transaction tr(db, Transaction::ReadOnly); if (!failReason.isEmpty()) { QEXPECT_FAIL("", qPrintable(failReason), Continue); } QCOMPARE(tr.exec(q), contentMatches); + + queries.clear(); + const QByteArray fPrefix = QByteArrayLiteral("F"); + for (QByteArray term : phrase) { + term = fPrefix + term; + queries << EngineQuery(term); + } + EngineQuery qf(queries, EngineQuery::Phrase); + QCOMPARE(tr.exec(qf), filenameMatches); } void QueryTest::testTagTermAnd_data() { QTest::addColumn("terms"); QTest::addColumn>("matchIds"); QTest::addRow("Simple match") << QByteArrayList({"one", "four"}) << QVector { m_id5, m_id6 }; QTest::addRow("Only one") << QByteArrayList({"one", "f1"}) << QVector { m_id5 }; QTest::addRow("Also from phrase") << QByteArrayList({"two", "three"}) << QVector { m_id5, m_id6 }; } void QueryTest::testTagTermAnd() { insertTagDocuments(); QFETCH(QByteArrayList, terms); QFETCH(QVector, matchIds); QByteArray prefix{"TA"}; QVector queries; for (const QByteArray& term : terms) { queries << EngineQuery(prefix + term); } EngineQuery q(queries, EngineQuery::And); Transaction tr(db, Transaction::ReadOnly); QCOMPARE(tr.exec(q), matchIds); } void QueryTest::testTagTermPhrase_data() { QTest::addColumn("terms"); QTest::addColumn>("matchIds"); QTest::addRow("Simple match") << QByteArrayList({"one"}) << QVector { m_id5, m_id6 }; QTest::addRow("Apart") << QByteArrayList({"two", "four"}) << QVector { }; QTest::addRow("Adjacent") << QByteArrayList({"three", "four"}) << QVector { }; QTest::addRow("Only phrase") << QByteArrayList({"two", "three"}) << QVector { m_id6 }; } void QueryTest::testTagTermPhrase() { insertTagDocuments(); QFETCH(QByteArrayList, terms); QFETCH(QVector, matchIds); QByteArray prefix{"TA"}; QVector queries; for (const QByteArray& term : terms) { queries << EngineQuery(prefix + term); } EngineQuery q(queries, EngineQuery::Phrase); Transaction tr(db, Transaction::ReadOnly); auto res = tr.exec(q); QCOMPARE(res, matchIds); } QTEST_MAIN(QueryTest) #include "querytest.moc" diff --git a/src/engine/termgenerator.cpp b/src/engine/termgenerator.cpp index a935d33f..f8c20bb0 100644 --- a/src/engine/termgenerator.cpp +++ b/src/engine/termgenerator.cpp @@ -1,149 +1,145 @@ /* * This file is part of the KDE Baloo project. * Copyright (C) 2014-2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ #include "termgenerator.h" #include "document.h" #include #include using namespace Baloo; TermGenerator::TermGenerator(Document& doc) : m_doc(doc) , m_position(1) { } void TermGenerator::indexText(const QString& text) { indexText(text, QByteArray()); } QByteArrayList TermGenerator::termList(const QString& text_) { QString text(text_); text.replace(QLatin1Char('_'), QLatin1Char(' ')); int start = 0; int end = 0; QByteArrayList list; QTextBoundaryFinder bf(QTextBoundaryFinder::Word, text); for (; bf.position() != -1; bf.toNextBoundary()) { if (bf.boundaryReasons() & QTextBoundaryFinder::StartOfItem) { start = bf.position(); continue; } else if (bf.boundaryReasons() & QTextBoundaryFinder::EndOfItem) { end = bf.position(); QString str = text.mid(start, end - start); // Remove all accents. It is important to call toLower after normalization, // since some exotic unicode symbols can remain uppercase const QString denormalized = str.normalized(QString::NormalizationForm_KD).toLower(); QString cleanString; cleanString.reserve(denormalized.size()); for (const QChar& ch : denormalized) { auto cat = ch.category(); if (cat != QChar::Mark_NonSpacing && cat != QChar::Mark_SpacingCombining && cat != QChar::Mark_Enclosing) { cleanString.append(ch); } } str = cleanString.normalized(QString::NormalizationForm_KC); if (!str.isEmpty()) { // Truncate the string to avoid arbitrarily long terms list << str.leftRef(maxTermSize).toUtf8(); } } } return list; } void TermGenerator::indexText(const QString& text, const QByteArray& prefix) { const QByteArrayList terms = termList(text); if (terms.size() == 1) { QByteArray finalArr = prefix + terms[0]; m_doc.addTerm(finalArr); return; } for (const QByteArray& term : terms) { QByteArray finalArr = prefix + term; m_doc.addPositionTerm(finalArr, m_position); m_position++; } m_position++; } -void TermGenerator::indexFileNameText(const QString& text, const QByteArray& prefix) +void TermGenerator::indexFileNameText(const QString& text) { + const QByteArray prefix = QByteArrayLiteral("F"); const QByteArrayList terms = termList(text); if (terms.size() == 1) { QByteArray finalArr = prefix + terms[0]; m_doc.addFileNameTerm(finalArr); return; } for (const QByteArray& term : terms) { QByteArray finalArr = prefix + term; m_doc.addFileNamePositionTerm(finalArr, m_position); m_position++; } m_position++; } -void TermGenerator::indexFileNameText(const QString& text) -{ - indexFileNameText(text, QByteArray()); -} - void TermGenerator::indexXattrText(const QString& text, const QByteArray& prefix) { const QByteArrayList terms = termList(text); if (terms.size() == 1) { QByteArray finalArr = prefix + terms[0]; m_doc.addXattrTerm(finalArr); return; } for (const QByteArray& term : terms) { QByteArray finalArr = prefix + term; m_doc.addXattrPositionTerm(finalArr, m_position); m_position++; } m_position++; } int TermGenerator::position() const { return m_position; } void TermGenerator::setPosition(int position) { m_position = position; } diff --git a/src/engine/termgenerator.h b/src/engine/termgenerator.h index ea7a1978..1860ab9e 100644 --- a/src/engine/termgenerator.h +++ b/src/engine/termgenerator.h @@ -1,60 +1,59 @@ /* * This file is part of the KDE Baloo project. * Copyright (C) 2014-2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ #ifndef BALOO_TERMGENERATOR_H #define BALOO_TERMGENERATOR_H #include #include #include "engine_export.h" #include "document.h" namespace Baloo { class BALOO_ENGINE_EXPORT TermGenerator { public: explicit TermGenerator(Document& doc); void setDocument(Document& doc) { m_doc = doc; } void indexText(const QString& text); void indexText(const QString& text, const QByteArray& prefix); void indexXattrText(const QString& text, const QByteArray& prefix); void indexFileNameText(const QString& text); - void indexFileNameText(const QString& text, const QByteArray& prefix); void setPosition(int position); int position() const; static QByteArrayList termList(const QString& text); // Trim all terms to this size const static int maxTermSize = 25; private: Document& m_doc; int m_position; }; } #endif // BALOO_TERMGENERATOR_H diff --git a/src/file/basicindexingjob.cpp b/src/file/basicindexingjob.cpp index e6bf5e78..2abb68f8 100644 --- a/src/file/basicindexingjob.cpp +++ b/src/file/basicindexingjob.cpp @@ -1,223 +1,222 @@ /* * This file is part of the KDE Baloo Project * Copyright (C) 2013-2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) version 3, or any * later version accepted by the membership of KDE e.V. (or its * successor approved by the membership of KDE e.V.), which shall * act as a proxy defined in Section 6 of version 3 of the license. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * */ #include "basicindexingjob.h" #include "termgenerator.h" #include "idutils.h" #include #include #include #include using namespace Baloo; BasicIndexingJob::BasicIndexingJob(const QString& filePath, const QString& mimetype, IndexingLevel level) : m_filePath(filePath) , m_mimetype(mimetype) , m_indexingLevel(level) { } namespace { void indexXAttr(const QString& url, Document& doc) { KFileMetaData::UserMetaData userMetaData(url); TermGenerator tg(doc); const QStringList tags = userMetaData.tags(); for (const QString& tag : tags) { tg.indexXattrText(tag, QByteArray("TA")); doc.addXattrTerm(QByteArray("TAG-") + tag.toUtf8()); } int rating = userMetaData.rating(); if (rating) { doc.addXattrTerm(QByteArray("R") + QByteArray::number(rating)); } QString comment = userMetaData.userComment(); if (!comment.isEmpty()) { tg.indexXattrText(comment, QByteArray("C")); } } QVector typesForMimeType(const QString& mimeType) { using namespace KFileMetaData; QVector types; types.reserve(2); // Basic types if (mimeType.startsWith(QLatin1String("audio/"))) types << Type::Audio; if (mimeType.startsWith(QLatin1String("video/"))) types << Type::Video; if (mimeType.startsWith(QLatin1String("image/"))) types << Type::Image; if (mimeType.startsWith(QLatin1String("text/"))) types << Type::Text; if (mimeType.contains(QLatin1String("document"))) types << Type::Document; if (mimeType.contains(QLatin1String("powerpoint"))) { types << Type::Presentation; types << Type::Document; } if (mimeType.contains(QLatin1String("excel"))) { types << Type::Spreadsheet; types << Type::Document; } static QMultiHash typeMapper = { {"text/plain", Type::Document}, // MS Office {"application/msword", Type::Document}, {"application/x-scribus", Type::Document}, // The old pre-XML MS Office formats are already covered by the excel/powerpoint "contains" above: // - application/vnd.ms-powerpoint // - application/vnd.ms-excel // "openxmlformats-officedocument" and "opendocument" contain "document", i.e. already have Type::Document // - application/vnd.openxmlformats-officedocument.wordprocessingml.document // - application/vnd.openxmlformats-officedocument.spreadsheetml.sheet // - application/vnd.openxmlformats-officedocument.presentationml.presentation // - application/vnd.oasis.opendocument.text // - application/vnd.oasis.opendocument.spreadsheet // - application/vnd.oasis.opendocument.presentation // Office 2007 {"application/vnd.openxmlformats-officedocument.presentationml.presentation", Type::Presentation}, {"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", Type::Spreadsheet}, // Open Document Formats - https://en.wikipedia.org/wiki/OpenDocument_technical_specification {"application/vnd.oasis.opendocument.presentation", Type::Presentation}, {"application/vnd.oasis.opendocument.spreadsheet", Type::Spreadsheet}, {"application/pdf", Type::Document}, {"application/postscript", Type::Document}, {"application/x-dvi", Type::Document}, {"application/rtf", Type::Document}, // EBooks {"application/epub+zip", Type::Document}, {"application/x-mobipocket-ebook", Type::Document}, // Archives - https://en.wikipedia.org/wiki/List_of_archive_formats {"application/x-tar", Type::Archive}, {"application/x-bzip2", Type::Archive}, {"application/x-gzip", Type::Archive}, {"application/x-lzip", Type::Archive}, {"application/x-lzma", Type::Archive}, {"application/x-lzop", Type::Archive}, {"application/x-compress", Type::Archive}, {"application/x-7z-compressed", Type::Archive}, {"application/x-ace-compressed", Type::Archive}, {"application/x-astrotite-afa", Type::Archive}, {"application/x-alz-compressed", Type::Archive}, {"application/vnd.android.package-archive", Type::Archive}, {"application/x-arj", Type::Archive}, {"application/vnd.ms-cab-compressed", Type::Archive}, {"application/x-cfs-compressed", Type::Archive}, {"application/x-dar", Type::Archive}, {"application/x-lzh", Type::Archive}, {"application/x-lzx", Type::Archive}, {"application/x-rar-compressed", Type::Archive}, {"application/x-stuffit", Type::Archive}, {"application/x-stuffitx", Type::Archive}, {"application/x-gtar", Type::Archive}, {"application/zip", Type::Archive}, // WPS office {"application/wps-office.doc", Type::Document}, {"application/wps-office.xls", Type::Document}, {"application/wps-office.xls", Type::Spreadsheet}, {"application/wps-office.pot", Type::Document}, {"application/wps-office.pot", Type::Presentation}, {"application/wps-office.wps", Type::Document}, {"application/wps-office.docx", Type::Document}, {"application/wps-office.xlsx", Type::Document}, {"application/wps-office.xlsx", Type::Spreadsheet}, {"application/wps-office.pptx", Type::Document}, {"application/wps-office.pptx", Type::Presentation}, // Other {"text/markdown", Type::Document}, {"image/vnd.djvu+multipage", Type::Document}, {"application/x-lyx", Type::Document} }; auto hashIt = typeMapper.find(mimeType); while (hashIt != typeMapper.end() && hashIt.key() == mimeType) { types.append(hashIt.value()); ++hashIt; } return types; } } // namespace BasicIndexingJob::~BasicIndexingJob() { } bool BasicIndexingJob::index() { const QByteArray url = QFile::encodeName(m_filePath); QT_STATBUF statBuf; if (filePathToStat(url, statBuf) != 0) { return false; } Document doc; doc.setId(statBufToId(statBuf)); doc.setUrl(url); QString fileName = url.mid(url.lastIndexOf('/') + 1); TermGenerator tg(doc); tg.indexFileNameText(fileName); - tg.indexFileNameText(fileName, QByteArray("F")); tg.indexText(m_mimetype, QByteArray("M")); // (Content) Modification time, Metadata (e.g. XAttr) change time doc.setMTime(statBuf.st_mtime); doc.setCTime(statBuf.st_ctime); if (S_ISDIR(statBuf.st_mode)) { static const QByteArray type = QByteArray("T") + QByteArray::number(static_cast(KFileMetaData::Type::Folder)); doc.addTerm(type); // For folders we do not need to go through file indexing, so we do not set contentIndexing } else { if (m_indexingLevel == MarkForContentIndexing) { doc.setContentIndexing(true); } // Types const QVector tList = typesForMimeType(m_mimetype); for (KFileMetaData::Type::Type type : tList) { QByteArray num = QByteArray::number(static_cast(type)); doc.addTerm(QByteArray("T") + num); } } indexXAttr(m_filePath, doc); m_doc = doc; return true; } diff --git a/src/lib/searchstore.cpp b/src/lib/searchstore.cpp index dba0a13e..f231974e 100644 --- a/src/lib/searchstore.cpp +++ b/src/lib/searchstore.cpp @@ -1,421 +1,425 @@ /* * This file is part of the KDE Baloo Project * Copyright (C) 2013-2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) version 3, or any * later version accepted by the membership of KDE e.V. (or its * successor approved by the membership of KDE e.V.), which shall * act as a proxy defined in Section 6 of version 3 of the license. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * */ #include "baloodebug.h" #include "searchstore.h" #include "term.h" #include "global.h" #include "baloodebug.h" #include "database.h" #include "transaction.h" #include "enginequery.h" #include "queryparser.h" #include "termgenerator.h" #include "andpostingiterator.h" #include "orpostingiterator.h" #include "idutils.h" #include #include #include #include #include #include #include #include namespace Baloo { namespace { QPair calculateTimeRange(const QDateTime& dt, Term::Comparator com) { Q_ASSERT(dt.isValid()); quint32 timet = dt.toSecsSinceEpoch(); if (com == Term::LessEqual) { return {0, timet}; } if (com == Term::Less) { return {0, timet - 1}; } if (com == Term::GreaterEqual) { return {timet, std::numeric_limits::max()}; } if (com == Term::Greater) { return {timet + 1, std::numeric_limits::max()}; } if (com == Term::Equal) { timet = QDateTime(dt.date()).toSecsSinceEpoch(); return {timet, timet + 24 * 60 * 60 - 1}; } Q_ASSERT_X(0, __func__, "mtime query must contain a valid comparator"); return {0, 0}; } struct InternalProperty { const char* propertyName; const char* prefix; QVariant::Type valueType; }; constexpr std::array internalProperties {{ { "content", "", QVariant::String }, { "filename", "F", QVariant::String }, { "mimetype", "M", QVariant::String }, { "rating", "R", QVariant::Int }, { "tag", "TAG-", QVariant::String }, { "tags", "TA", QVariant::String }, { "usercomment", "C", QVariant::String } }}; std::pair propertyInfo(const QByteArray& property) { auto it = std::find_if(std::begin(internalProperties), std::end(internalProperties), [&property] (const InternalProperty& entry) { return property == entry.propertyName; }); if (it != std::end(internalProperties)) { return { (*it).prefix, (*it).valueType }; } else { KFileMetaData::PropertyInfo pi = KFileMetaData::PropertyInfo::fromName(property); if (pi.property() == KFileMetaData::Property::Empty) { return { QByteArray(), QVariant::Invalid }; } int propPrefix = static_cast(pi.property()); return { 'X' + QByteArray::number(propPrefix) + '-', pi.valueType() }; } } } SearchStore::SearchStore() : m_db(nullptr) { m_db = globalDatabaseInstance(); if (!m_db->open(Database::ReadOnlyDatabase)) { m_db = nullptr; } } SearchStore::~SearchStore() { } // Return the result with-in [offset, offset + limit) QStringList SearchStore::exec(const Term& term, uint offset, int limit, bool sortResults) { if (!m_db || !m_db->isOpen()) { return QStringList(); } Transaction tr(m_db, Transaction::ReadOnly); QScopedPointer it(constructQuery(&tr, term)); if (!it) { return QStringList(); } if (sortResults) { QVector> resultIds; while (it->next()) { quint64 id = it->docId(); quint32 mtime = tr.documentTimeInfo(id).mTime; resultIds << std::pair{id, mtime}; Q_ASSERT(id > 0); } // Not enough results within range, no need to sort. if (offset >= static_cast(resultIds.size())) { return QStringList(); } auto compFunc = [](const std::pair& lhs, const std::pair& rhs) { return lhs.second > rhs.second; }; std::sort(resultIds.begin(), resultIds.end(), compFunc); if (limit < 0) { limit = resultIds.size(); } QStringList results; const uint end = qMin(static_cast(resultIds.size()), offset + static_cast(limit)); results.reserve(end - offset); for (uint i = offset; i < end; i++) { const quint64 id = resultIds[i].first; const QString filePath = tr.documentUrl(id); results << filePath; } return results; } else { QStringList results; uint ulimit = limit < 0 ? UINT_MAX : limit; while (offset && it->next()) { offset--; } while (ulimit && it->next()) { quint64 id = it->docId(); Q_ASSERT(id > 0); results << tr.documentUrl(it->docId()); Q_ASSERT(!results.last().isEmpty()); ulimit--; } return results; } } PostingIterator* SearchStore::constructQuery(Transaction* tr, const Term& term) { Q_ASSERT(tr); if (term.operation() == Term::And || term.operation() == Term::Or) { const QList subTerms = term.subTerms(); QVector vec; vec.reserve(subTerms.size()); for (const Term& t : subTerms) { auto iterator = constructQuery(tr, t); // constructQuery returns a nullptr to signal an empty list if (iterator) { vec << iterator; } else if (term.operation() == Term::And) { return nullptr; } } if (vec.isEmpty()) { return nullptr; } else if (vec.size() == 1) { return vec.takeFirst(); } if (term.operation() == Term::And) { return new AndPostingIterator(vec); } else { return new OrPostingIterator(vec); } } if (term.value().isNull()) { return nullptr; } Q_ASSERT(term.value().isValid()); Q_ASSERT(term.comparator() != Term::Auto); Q_ASSERT(term.comparator() == Term::Contains ? term.value().type() == QVariant::String : true); const QVariant value = term.value(); const QByteArray property = term.property().toLower().toUtf8(); if (property == "type" || property == "kind") { EngineQuery q = constructTypeQuery(value.toString()); return tr->postingIterator(q); } else if (property == "includefolder") { const QFileInfo fi(value.toString()); const QByteArray folder = QFile::encodeName(fi.canonicalFilePath()); if (folder.isEmpty()) { return nullptr; } if (!folder.startsWith('/')) { return nullptr; } quint64 id = filePathToId(folder); if (!id) { qCDebug(BALOO) << "Folder" << value.toString() << "does not exist"; return nullptr; } return tr->docUrlIter(id); } else if (property == "modified" || property == "mtime") { if (value.type() == QVariant::ByteArray) { // Used by Baloo::Query QByteArray ba = value.toByteArray(); Q_ASSERT(ba.size() >= 4); int year = ba.mid(0, 4).toInt(); int month = ba.mid(4, 2).toInt(); int day = ba.mid(6, 2).toInt(); Q_ASSERT(year); // uses 0 to represent whole month or whole year month = month >= 0 && month <= 12 ? month : 0; day = day >= 0 && day <= 31 ? day : 0; QDate startDate(year, month ? month : 1, day ? day : 1); QDate endDate(startDate); if (month == 0) { endDate.setDate(endDate.year(), 12, 31); } else if (day == 0) { endDate.setDate(endDate.year(), endDate.month(), endDate.daysInMonth()); } return tr->mTimeRangeIter(QDateTime(startDate).toSecsSinceEpoch(), QDateTime(endDate, QTime(23, 59, 59)).toSecsSinceEpoch()); } else if (value.type() == QVariant::String) { const QDateTime dt = value.toDateTime(); QPair timerange = calculateTimeRange(dt, term.comparator()); if ((timerange.first == 0) && (timerange.second == 0)) { return nullptr; } return tr->mTimeRangeIter(timerange.first, timerange.second); } else { Q_ASSERT_X(0, "SearchStore::constructQuery", "modified property must contain date/datetime values"); return nullptr; } } else if (property == "tag") { if (term.comparator() == Term::Equal) { const QByteArray prefix = "TAG-"; EngineQuery q = EngineQuery(prefix + value.toByteArray()); return tr->postingIterator(q); } else if (term.comparator() == Term::Contains) { const QByteArray prefix = "TA"; EngineQuery q = constructEqualsQuery(prefix, value.toString()); return tr->postingIterator(q); } else { Q_ASSERT(0); return nullptr; } + } else if (property == "") { + Term cterm(QStringLiteral("content"), term.value(), term.comparator()); + Term fterm(QStringLiteral("filename"), term.value(), term.comparator()); + return constructQuery(tr, Term{cterm, Term::Operation::Or, fterm}); } QByteArray prefix; QVariant::Type valueType = QVariant::String; if (!property.isEmpty()) { std::tie(prefix, valueType) = propertyInfo(property); if (valueType == QVariant::Invalid) { return nullptr; } } auto com = term.comparator(); if (com == Term::Contains && valueType == QVariant::Int) { com = Term::Equal; } if (com == Term::Contains) { EngineQuery q = constructContainsQuery(prefix, value.toString()); return tr->postingIterator(q); } if (com == Term::Equal) { EngineQuery q = constructEqualsQuery(prefix, value.toString()); return tr->postingIterator(q); } PostingDB::Comparator pcom; if (com == Term::Greater || com == Term::GreaterEqual) { pcom = PostingDB::GreaterEqual; } else if (com == Term::Less || com == Term::LessEqual) { pcom = PostingDB::LessEqual; } // FIXME -- has to be kept in sync with the code from // Baloo::Result::add if (valueType == QVariant::Int) { qlonglong intVal = value.toLongLong(); if (term.comparator() == Term::Greater) { intVal++; } else if (term.comparator() == Term::Less) { intVal--; } return tr->postingCompIterator(prefix, intVal, pcom); } else if (valueType == QVariant::Double) { double dVal = value.toDouble(); return tr->postingCompIterator(prefix, dVal, pcom); } else if (valueType == QVariant::DateTime) { QDateTime dt = value.toDateTime(); const QByteArray ba = dt.toString(Qt::ISODate).toUtf8(); return tr->postingCompIterator(prefix, ba, pcom); } else { qCDebug(BALOO) << "Comparison must be with an integer"; } return nullptr; } EngineQuery SearchStore::constructContainsQuery(const QByteArray& prefix, const QString& value) { QueryParser parser; return parser.parseQuery(value, prefix); } EngineQuery SearchStore::constructEqualsQuery(const QByteArray& prefix, const QString& value) { // We use the TermGenerator to normalize the words in the value and to // split it into other words. If we split the words, we then add them as a // phrase query. const QByteArrayList terms = TermGenerator::termList(value); QVector queries; queries.reserve(terms.size()); for (const QByteArray& term : terms) { QByteArray arr = prefix + term; // FIXME - compatibility hack, to find truncated terms with old // DBs, remove on next DB bump if (arr.size() > 25) { queries << EngineQuery(arr.left(25), EngineQuery::StartsWith); } else { queries << EngineQuery(arr); } } if (queries.isEmpty()) { return EngineQuery(); } else if (queries.size() == 1) { return queries.first(); } else { return EngineQuery(queries, EngineQuery::Phrase); } } EngineQuery SearchStore::constructTypeQuery(const QString& value) { Q_ASSERT(!value.isEmpty()); KFileMetaData::TypeInfo ti = KFileMetaData::TypeInfo::fromName(value); if (ti == KFileMetaData::Type::Empty) { qCDebug(BALOO) << "Type" << value << "does not exist"; return EngineQuery(); } int num = static_cast(ti.type()); return EngineQuery('T' + QByteArray::number(num)); } } // namespace Baloo