diff --git a/autotests/unit/engine/queryparsertest.cpp b/autotests/unit/engine/queryparsertest.cpp index 145f0d84..a78eebc6 100644 --- a/autotests/unit/engine/queryparsertest.cpp +++ b/autotests/unit/engine/queryparsertest.cpp @@ -1,235 +1,251 @@ /* * This file is part of the KDE Baloo Project * Copyright (C) 2014-2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ #include "queryparser.h" #include "enginequery.h" #include #include using namespace Baloo; class QueryParserTest : public QObject { Q_OBJECT private Q_SLOTS: void testSinglePrefixWord(); void testSimpleQuery(); void testPhraseSearch(); void testPhraseSearchOnly(); + void testUnderscorePhrase(); void testPhraseSearch_sameLimiter(); void testPhraseSearchEmail(); void testAccentSearch(); void testUnderscoreSplitting(); void testAutoExpand(); void testUnicodeLowering(); }; void QueryParserTest::testSinglePrefixWord() { QueryParser parser; parser.setAutoExapandSize(1); EngineQuery query = parser.parseQuery("The", "F"); EngineQuery q("Fthe", EngineQuery::StartsWith, 1); QCOMPARE(query, q); } void QueryParserTest::testSimpleQuery() { QueryParser parser; parser.setAutoExapandSize(1); EngineQuery query = parser.parseQuery("The song of Ice and Fire"); QVector queries; queries << EngineQuery("the", EngineQuery::StartsWith, 1); queries << EngineQuery("song", EngineQuery::StartsWith, 2); queries << EngineQuery("of", EngineQuery::StartsWith, 3); queries << EngineQuery("ice", EngineQuery::StartsWith, 4); queries << EngineQuery("and", EngineQuery::StartsWith, 5); queries << EngineQuery("fire", EngineQuery::StartsWith, 6); EngineQuery q(queries, EngineQuery::And); QCOMPARE(query, q); } void QueryParserTest::testPhraseSearch() { QueryParser parser; EngineQuery query = parser.parseQuery("The \"song of Ice\" Fire"); QVector phraseQueries; phraseQueries << EngineQuery("song", 2); phraseQueries << EngineQuery("of", 3); phraseQueries << EngineQuery("ice", 4); QVector queries; queries << EngineQuery("the", EngineQuery::StartsWith, 1); queries << EngineQuery(phraseQueries, EngineQuery::Phrase); queries << EngineQuery("fire", EngineQuery::StartsWith, 5); EngineQuery q(queries, EngineQuery::And); QCOMPARE(query, q); } void QueryParserTest::testPhraseSearchOnly() { QueryParser parser; EngineQuery query = parser.parseQuery("/opt/pro"); QVector queries; queries << EngineQuery("opt", 1); queries << EngineQuery("pro", 2); EngineQuery q(queries, EngineQuery::Phrase); QCOMPARE(query, q); } +void QueryParserTest::testUnderscorePhrase() +{ + QueryParser parser; + + EngineQuery query = parser.parseQuery("foo_bar.png"); + + QVector queries; + queries << EngineQuery("foo", 1); + queries << EngineQuery("bar", 2); + queries << EngineQuery("png", 3); + + EngineQuery q(queries, EngineQuery::Phrase); + QCOMPARE(query, q); +} + void QueryParserTest::testPhraseSearch_sameLimiter() { QueryParser parser; parser.setAutoExapandSize(1); EngineQuery query = parser.parseQuery("The \"song of Ice' and Fire"); QVector queries; queries << EngineQuery("the", EngineQuery::StartsWith, 1); queries << EngineQuery("song", EngineQuery::StartsWith, 2); queries << EngineQuery("of", EngineQuery::StartsWith, 3); queries << EngineQuery("ice", EngineQuery::StartsWith, 4); queries << EngineQuery("and", EngineQuery::StartsWith, 5); queries << EngineQuery("fire", EngineQuery::StartsWith, 6); EngineQuery q(queries, EngineQuery::And); QCOMPARE(query, q); } void QueryParserTest::testPhraseSearchEmail() { QueryParser parser; EngineQuery query = parser.parseQuery("The song@ice.com Fire"); QVector phraseQueries; phraseQueries << EngineQuery("song", 2); phraseQueries << EngineQuery("ice", 3); phraseQueries << EngineQuery("com", 4); QVector queries; queries << EngineQuery("the", EngineQuery::StartsWith, 1); queries << EngineQuery(phraseQueries, EngineQuery::Phrase); queries << EngineQuery("fire", EngineQuery::StartsWith, 5); EngineQuery q(queries, EngineQuery::And); QCOMPARE(query, q); } void QueryParserTest::testAccentSearch() { QueryParser parser; EngineQuery query = parser.parseQuery(QString::fromUtf8("s\xC3\xB3ng")); // sóng EngineQuery q("song", EngineQuery::StartsWith, 1); QCOMPARE(query, q); } void QueryParserTest::testUnderscoreSplitting() { QueryParser parser; EngineQuery query = parser.parseQuery("The_Fire"); QVector queries; - queries << EngineQuery("the", EngineQuery::StartsWith, 1); - queries << EngineQuery("fire", EngineQuery::StartsWith, 2); + queries << EngineQuery("the", 1); + queries << EngineQuery("fire", 2); - EngineQuery q(queries, EngineQuery::And); + EngineQuery q(queries, EngineQuery::Phrase); QCOMPARE(query, q); query = parser.parseQuery("_Fire"); q = EngineQuery("fire", EngineQuery::StartsWith, 1); QCOMPARE(query, q); } void QueryParserTest::testAutoExpand() { QueryParser parser; parser.setAutoExapandSize(0); { EngineQuery query = parser.parseQuery("the fire"); QVector queries; queries << EngineQuery("the", EngineQuery::Equal, 1); queries << EngineQuery("fire", EngineQuery::Equal, 2); EngineQuery q(queries, EngineQuery::And); QCOMPARE(query, q); } { EngineQuery query = parser.parseQuery("'the fire"); QVector queries; queries << EngineQuery("the", EngineQuery::Equal, 1); queries << EngineQuery("fire", EngineQuery::Equal, 2); EngineQuery q(queries, EngineQuery::And); QCOMPARE(query, q); } parser.setAutoExapandSize(4); { EngineQuery query = parser.parseQuery("the fire"); QVector queries; queries << EngineQuery("the", EngineQuery::Equal, 1); queries << EngineQuery("fire", EngineQuery::StartsWith, 2); EngineQuery q(queries, EngineQuery::And); QCOMPARE(query, q); } } void QueryParserTest::testUnicodeLowering() { // This string is unicode mathematical italic "Hedge" QString str = QString::fromUtf8("\xF0\x9D\x90\xBB\xF0\x9D\x91\x92\xF0\x9D\x91\x91\xF0\x9D\x91\x94\xF0\x9D\x91\x92"); QueryParser parser; EngineQuery query = parser.parseQuery(str); EngineQuery expected = EngineQuery("hedge", EngineQuery::StartsWith, 1); QCOMPARE(query, expected); } QTEST_MAIN(QueryParserTest) #include "queryparsertest.moc" diff --git a/src/engine/queryparser.cpp b/src/engine/queryparser.cpp index 9156a662..56716e67 100644 --- a/src/engine/queryparser.cpp +++ b/src/engine/queryparser.cpp @@ -1,171 +1,171 @@ /* * This file is part of the KDE Baloo Project * Copyright (C) 2014-2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ #include "queryparser.h" #include "enginequery.h" #include #include #include using namespace Baloo; QueryParser::QueryParser() : m_autoExpandSize(3) { } namespace { bool containsSpace(const QString& string) { Q_FOREACH (const QChar& ch, string) { if (ch.isSpace()) return true; } return false; } } EngineQuery QueryParser::parseQuery(const QString& text_, const QString& prefix) { Q_ASSERT(!text_.isEmpty()); QString text(text_); text.replace(QLatin1Char('_'), QLatin1Char(' ')); QVector queries; QVector phraseQueries; int start = 0; int end = 0; int position = 0; bool inDoubleQuotes = false; bool inSingleQuotes = false; bool inPhrase = false; QTextBoundaryFinder bf(QTextBoundaryFinder::Word, text); for (; bf.position() != -1; bf.toNextBoundary()) { if (bf.boundaryReasons() & QTextBoundaryFinder::StartOfItem) { // // Check the previous delimiter int pos = bf.position(); if (pos != end) { - QString delim = text.mid(end, pos-end); + QString delim = text_.mid(end, pos-end); if (delim.contains(QLatin1Char('"'))) { if (inDoubleQuotes) { queries << EngineQuery(phraseQueries, EngineQuery::Phrase); phraseQueries.clear(); inDoubleQuotes = false; } else { inDoubleQuotes = true; } } else if (delim.contains(QLatin1Char('\''))) { if (inSingleQuotes) { queries << EngineQuery(phraseQueries, EngineQuery::Phrase); phraseQueries.clear(); inSingleQuotes = false; } else { inSingleQuotes = true; } } else if (!containsSpace(delim)) { if (!inPhrase && !queries.isEmpty()) { EngineQuery q = queries.takeLast(); q.setOp(EngineQuery::Equal); phraseQueries << q; + inPhrase = true; } - inPhrase = true; } else if (inPhrase && !phraseQueries.isEmpty()) { queries << EngineQuery(phraseQueries, EngineQuery::Phrase); phraseQueries.clear(); inPhrase = false; } } start = bf.position(); continue; } else if (bf.boundaryReasons() & QTextBoundaryFinder::EndOfItem) { end = bf.position(); QString str = text.mid(start, end - start); // Remove all accents and lower it const QString denormalized = str.normalized(QString::NormalizationForm_KD).toLower(); QString cleanString; Q_FOREACH (const QChar& ch, denormalized) { auto cat = ch.category(); if (cat != QChar::Mark_NonSpacing && cat != QChar::Mark_SpacingCombining && cat != QChar::Mark_Enclosing) { cleanString.append(ch); } } str = cleanString.normalized(QString::NormalizationForm_KC); const QString term = prefix + str; const QByteArray arr = term.toUtf8(); position++; if (inDoubleQuotes || inSingleQuotes || inPhrase) { phraseQueries << EngineQuery(arr, position); } else { if (m_autoExpandSize && arr.size() >= m_autoExpandSize) { queries << EngineQuery(arr, EngineQuery::StartsWith, position); } else { queries << EngineQuery(arr, position); } } } } if (inPhrase) { queries << EngineQuery(phraseQueries, EngineQuery::Phrase); phraseQueries.clear(); inPhrase = false; } if (!phraseQueries.isEmpty()) { for (EngineQuery& q : phraseQueries) { if (m_autoExpandSize && q.term().size() >= m_autoExpandSize) { q.setOp(EngineQuery::StartsWith); } else { q.setOp(EngineQuery::Equal); } } queries << phraseQueries; phraseQueries.clear(); } if (queries.size() == 1) { return queries.first(); } return EngineQuery(queries, EngineQuery::And); } void QueryParser::setAutoExapandSize(int size) { m_autoExpandSize = size; }