diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -25,6 +25,7 @@ writetransaction.cpp global.cpp fsutils.cpp + characterrangescjk.cpp ) add_library(KF5BalooEngine ${BALOO_ENGINE_SRCS}) diff --git a/src/engine/characterrangescjk.h b/src/engine/characterrangescjk.h new file mode 100644 --- /dev/null +++ b/src/engine/characterrangescjk.h @@ -0,0 +1,51 @@ +/* + * This file is part of the KDE Baloo project. + * Copyright 2018 Michael Heidelbach + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License or (at your option) version 3 or any later version + * accepted by the membership of KDE e.V. (or its successor approved + * by the membership of KDE e.V.), which shall act as a proxy + * defined in Section 14 of version 3 of the license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef CHARACTERRANGESCJK_H +#define CHARACTERRANGESCJK_H + +#include +#include + +namespace Baloo +{ + +/** + * This class provides tools to generate + * search terms for CJK languages +*/ +class CjkCharacters { +public: + /** + * Tests if \p text contains CJK characters and + * returns \return a list of search terms taking + * each grapheme as a term. + * If no CJK characters are found this list is empty. + * */ + static const QStringList termsFromCJK(const QString& text); +private: + static const QString getCJKCharacter(const QChar cha); + static const QVector> m_ranges; +}; + +} + +#endif // CHARACTERRANGESCJK_H diff --git a/src/engine/characterrangescjk.cpp b/src/engine/characterrangescjk.cpp new file mode 100644 --- /dev/null +++ b/src/engine/characterrangescjk.cpp @@ -0,0 +1,54 @@ +/* + * This file is part of the KDE Baloo project. + * Copyright 2018 Michael Heidelbach + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License or (at your option) version 3 or any later version + * accepted by the membership of KDE e.V. (or its successor approved + * by the membership of KDE e.V.), which shall act as a proxy + * defined in Section 14 of version 3 of the license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "characterrangescjk.h" + +using namespace Baloo; +// Taken from https://en.wikipedia.org/wiki/CJK_Unified_Ideographs +// FIXME: Refine, extend and unit test! +const QVector> CjkCharacters::m_ranges = { + // CJK Unified Ideographs blocks + {0x4E00, 0x62FF}, + {0x6300, 0x77FF}, + {0x7800, 0x8CFF}, + {0x8D00, 0x9FFF} +}; + +const QString CjkCharacters::getCJKCharacter(const QChar cha) { + quint16 uni = cha.unicode(); + for (const auto& range : CjkCharacters::m_ranges) { + if(uni >= range.first && uni <= range.second) { + return cha; + } + } + return QString(); +} + +const QStringList CjkCharacters::termsFromCJK(const QString& text) { + QStringList result; + for (int i = 0; i < text.length(); i++) { + auto cjkChar = CjkCharacters::getCJKCharacter(text.at(i)); + if (!cjkChar.isEmpty()) { + result << cjkChar; + } + } + return result; +} diff --git a/src/engine/queryparser.cpp b/src/engine/queryparser.cpp --- a/src/engine/queryparser.cpp +++ b/src/engine/queryparser.cpp @@ -20,6 +20,7 @@ #include "queryparser.h" #include "enginequery.h" +#include "characterrangescjk.h" #include #include @@ -158,7 +159,12 @@ queries << phraseQueries; phraseQueries.clear(); } - + + const auto& cjkTerms = CjkCharacters::termsFromCJK(text_); + for (const auto& cjkTerm : cjkTerms) { + queries << EngineQuery(cjkTerm.toUtf8(), EngineQuery::StartsWith); + } + if (queries.size() == 1) { return queries.first(); } diff --git a/src/engine/termgenerator.cpp b/src/engine/termgenerator.cpp --- a/src/engine/termgenerator.cpp +++ b/src/engine/termgenerator.cpp @@ -19,6 +19,7 @@ */ #include "termgenerator.h" +#include "characterrangescjk.h" #include "document.h" #include @@ -76,7 +77,7 @@ } } } - + list += CjkCharacters::termsFromCJK(text_); return list; }