Changeset View
Changeset View
Standalone View
Standalone View
lib/contactcompleter.cpp
Show All 18 Lines | |||||
19 | * License along with this library. If not, see <https://www.gnu.org/licenses/>. | 19 | * License along with this library. If not, see <https://www.gnu.org/licenses/>. | ||
20 | * | 20 | * | ||
21 | */ | 21 | */ | ||
22 | 22 | | |||
23 | #include <xapian.h> | 23 | #include <xapian.h> | ||
24 | 24 | | |||
25 | #include "contactcompleter.h" | 25 | #include "contactcompleter.h" | ||
26 | #include "query.h" | 26 | #include "query.h" | ||
27 | #include "akonadi_search_pim_debug.h" | ||||
27 | 28 | | |||
28 | #include <QStandardPaths> | 29 | #include <QStandardPaths> | ||
29 | #include <QDebug> | 30 | #include <QDebug> | ||
30 | #include <QFile> | 31 | #include <QFile> | ||
32 | #include <QElapsedTimer> | ||||
31 | 33 | | |||
32 | using namespace Akonadi::Search::PIM; | 34 | using namespace Akonadi::Search::PIM; | ||
33 | 35 | | |||
34 | ContactCompleter::ContactCompleter(const QString &prefix, int limit) | 36 | ContactCompleter::ContactCompleter(const QString &prefix, int limit) | ||
35 | : m_prefix(prefix.toLower()) | 37 | : m_prefix(prefix.toLower()) | ||
36 | , m_limit(limit) | 38 | , m_limit(limit) | ||
37 | { | 39 | { | ||
38 | } | 40 | } | ||
39 | 41 | | |||
42 | static QStringList processEnquire(Xapian::Enquire &enq, int limit) | ||||
43 | { | ||||
44 | QElapsedTimer timer; | ||||
45 | timer.start(); | ||||
46 | | ||||
47 | // Retrieves no results but provides statistics - it's very quick | ||||
48 | auto statsmset = enq.get_mset(0, 0); | ||||
49 | qCDebug(AKONADI_SEARCH_PIM_LOG) << "Query:" << QString::fromStdString(enq.get_query().get_description()); | ||||
50 | qCDebug(AKONADI_SEARCH_PIM_LOG) << "Estimated matches:" << statsmset.get_matches_estimated(); | ||||
51 | const int matchEstimate = statsmset.get_matches_estimated(); | ||||
52 | | ||||
53 | QStringList list; | ||||
54 | list.reserve(std::min(limit, matchEstimate)); | ||||
55 | int duplicates = 0; | ||||
56 | int firstItem = 0; | ||||
57 | // We run the query multiple times, since we may discard some results as duplicates. | ||||
58 | while (list.size() < limit) { | ||||
59 | // Always query the "limit"-count of results: | ||||
60 | // * if estimate is less than limit, we make sure we don't miss results any due to wrong estimate | ||||
61 | // * if estimate is more than limit, we don't want to query more documents than needed | ||||
62 | Xapian::MSet mset = enq.get_mset(firstItem, limit); | ||||
63 | if (mset.empty()) { // there are no more non-duplicate results | ||||
64 | break; | ||||
65 | } | ||||
66 | | ||||
67 | for (auto it = mset.begin(), end = mset.end(); it != end && list.size() < limit; ++it) { | ||||
68 | const auto entry = QString::fromStdString(it.get_document().get_data()); | ||||
69 | // TODO: Be smarter about the deduplication by fixing the indexing code: | ||||
70 | // If we store mailbox name and address as separate named terms then we could deduplicate | ||||
71 | // purely based on the email address. | ||||
72 | if (!list.contains(entry, Qt::CaseInsensitive)) { | ||||
73 | qCDebug(AKONADI_SEARCH_PIM_LOG, "Match: \"%s\" (%d%%), docid %u", qUtf8Printable(entry), it.get_percent(), *it); | ||||
74 | list.push_back(entry); | ||||
75 | } else { | ||||
76 | ++duplicates; | ||||
77 | qCDebug(AKONADI_SEARCH_PIM_LOG, "Skipped duplicate match \"%s\" (%d%%) docid %u", qUtf8Printable(entry), it.get_percent(), *it); | ||||
78 | } | ||||
79 | ++firstItem; | ||||
80 | } | ||||
81 | } | ||||
82 | | ||||
83 | qCDebug(AKONADI_SEARCH_PIM_LOG) << "Collected" << list.size() << "results in" << timer.elapsed() << "ms, skipped" << duplicates << "duplicates."; | ||||
84 | return list; | ||||
85 | } | ||||
86 | | ||||
40 | QStringList ContactCompleter::complete() | 87 | QStringList ContactCompleter::complete() | ||
41 | { | 88 | { | ||
42 | const QString dir = Query::defaultLocation(QStringLiteral("emailContacts")); | 89 | const QString dir = Query::defaultLocation(QStringLiteral("emailContacts")); | ||
43 | Xapian::Database db; | 90 | Xapian::Database db; | ||
44 | try { | 91 | try { | ||
45 | db = Xapian::Database(QFile::encodeName(dir).constData()); | 92 | db = Xapian::Database(QFile::encodeName(dir).constData()); | ||
46 | } catch (const Xapian::DatabaseOpeningError &) { | 93 | } catch (const Xapian::DatabaseOpeningError &) { | ||
47 | qWarning() << "Xapian Database does not exist at " << dir; | 94 | qCWarning(AKONADI_SEARCH_PIM_LOG) << "Xapian Database does not exist at " << dir; | ||
48 | return QStringList(); | 95 | return QStringList(); | ||
49 | } catch (const Xapian::DatabaseCorruptError &) { | 96 | } catch (const Xapian::DatabaseCorruptError &) { | ||
50 | qWarning() << "Xapian Database corrupted"; | 97 | qCWarning(AKONADI_SEARCH_PIM_LOG) << "Xapian Database corrupted"; | ||
51 | return QStringList(); | 98 | return QStringList(); | ||
52 | } catch (const Xapian::DatabaseError &e) { | 99 | } catch (const Xapian::DatabaseError &e) { | ||
53 | qWarning() << QString::fromStdString(e.get_type()) << QString::fromStdString(e.get_description()); | 100 | qCWarning(AKONADI_SEARCH_PIM_LOG) << QString::fromStdString(e.get_type()) << QString::fromStdString(e.get_description()); | ||
54 | return QStringList(); | 101 | return QStringList(); | ||
55 | } catch (...) { | 102 | } catch (...) { | ||
56 | qWarning() << "Random exception, but we do not want to crash"; | 103 | qCWarning(AKONADI_SEARCH_PIM_LOG) << "Random exception, but we do not want to crash"; | ||
57 | return QStringList(); | 104 | return QStringList(); | ||
58 | } | 105 | } | ||
59 | 106 | | |||
60 | Xapian::QueryParser parser; | 107 | Xapian::QueryParser parser; | ||
61 | parser.set_database(db); | 108 | parser.set_database(db); | ||
62 | 109 | | |||
63 | std::string prefix(m_prefix.toUtf8().constData()); | 110 | const int flags = Xapian::QueryParser::FLAG_DEFAULT | Xapian::QueryParser::FLAG_PARTIAL; | ||
64 | int flags = Xapian::QueryParser::FLAG_DEFAULT | Xapian::QueryParser::FLAG_PARTIAL; | 111 | const Xapian::Query q = parser.parse_query(m_prefix.toStdString(), flags); | ||
65 | Xapian::Query q = parser.parse_query(prefix, flags); | | |||
66 | 112 | | |||
67 | Xapian::Enquire enq(db); | 113 | Xapian::Enquire enq(db); | ||
68 | enq.set_query(q); | 114 | enq.set_query(q); | ||
115 | enq.set_sort_by_relevance(); | ||||
116 | // TODO: extend the indexer to use value slots for the normalized email address so that | ||||
117 | // duplicates can be collapsed by Xapian::Enquire::set_collapse_key() | ||||
69 | 118 | | |||
70 | Xapian::MSet mset = enq.get_mset(0, m_limit); | | |||
71 | Xapian::MSetIterator mit = mset.begin(); | | |||
72 | | ||||
73 | QStringList list; | | |||
74 | Xapian::MSetIterator end = mset.end(); | | |||
75 | list.reserve(mset.size()); | | |||
76 | Q_FOREVER { | 119 | Q_FOREVER { | ||
77 | try { | 120 | try { | ||
78 | for (; mit != end; ++mit) { | 121 | return processEnquire(enq, m_limit); | ||
79 | std::string str = mit.get_document().get_data(); | | |||
80 | const QString entry = QString::fromUtf8(str.c_str(), str.length()); | | |||
81 | list << entry; | | |||
82 | } | | |||
83 | return list; | | |||
84 | } catch (const Xapian::DatabaseCorruptError &e) { | 122 | } catch (const Xapian::DatabaseCorruptError &e) { | ||
85 | qWarning() << "The emailContacts Xapian database is corrupted:" << QString::fromStdString(e.get_description()); | 123 | qCWarning(AKONADI_SEARCH_PIM_LOG) << "The emailContacts Xapian database is corrupted:" << QString::fromStdString(e.get_description()); | ||
86 | return QStringList(); | 124 | return QStringList(); | ||
87 | } catch (const Xapian::DatabaseModifiedError &e) { | 125 | } catch (const Xapian::DatabaseModifiedError &e) { | ||
88 | db.reopen(); | 126 | db.reopen(); | ||
89 | continue; // try again | 127 | continue; // try again | ||
90 | } | 128 | } | ||
91 | } | 129 | } | ||
92 | } | 130 | } |