diff --git a/plugins/messageviewer/bodypartformatter/autotests/unstructureddataextractortest.cpp b/plugins/messageviewer/bodypartformatter/autotests/unstructureddataextractortest.cpp index a26d0966..a3e8289e 100644 --- a/plugins/messageviewer/bodypartformatter/autotests/unstructureddataextractortest.cpp +++ b/plugins/messageviewer/bodypartformatter/autotests/unstructureddataextractortest.cpp @@ -1,140 +1,142 @@ /* Copyright (c) 2017 Volker Krause This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "extractor.h" #include "extractorengine.h" #include "extractorpreprocessor.h" #include #include #include #include #include #include class UnstructuredDataExtractorTest : public QObject { Q_OBJECT private Q_SLOTS: - void init() + void initTestCase() { Q_INIT_RESOURCE(rules); + // use some exotic locale to ensure the date/time parsing doesn't just work by luck + QLocale::setDefault(QLocale(QStringLiteral("fr_FR"))); } void testExtractText_data() { QTest::addColumn("inputFile"); QTest::addColumn("extractorName"); QTest::addColumn("jsonFile"); QDir dir(QStringLiteral(SOURCE_DIR "/unstructureddata")); const auto lst = dir.entryList(QStringList(QStringLiteral("*.txt")), QDir::Files | QDir::Readable | QDir::NoSymLinks); for (const auto &file : lst) { const auto refFile = dir.path() + QLatin1Char('/') + file.left(file.size() - 4) + QStringLiteral(".json"); if (!QFile::exists(refFile)) { qDebug() << "reference file" << refFile << "does not exist, skipping test file" << file; continue; } const auto idx = file.indexOf(QLatin1Char('_')); QTest::newRow(file.toLatin1()) << QString(dir.path() + QLatin1Char('/') + file) << file.left(idx) << refFile; } } void testExtractText() { QFETCH(QString, inputFile); QFETCH(QString, extractorName); QFETCH(QString, jsonFile); QFile f(inputFile); QVERIFY(f.open(QFile::ReadOnly)); Extractor extractor; QVERIFY(extractor.load(QLatin1String(":/org.kde.messageviewer/semantic/rules/") + extractorName + QLatin1String(".xml"))); ExtractorEngine engine; engine.setText(QString::fromUtf8(f.readAll())); engine.setExtractor(&extractor); const auto data = engine.extract(); QFile ref(jsonFile); QVERIFY(ref.open(QFile::ReadOnly)); const auto doc = QJsonDocument::fromJson(ref.readAll()); QVERIFY(doc.isArray()); if (data != doc.array()) { qDebug().noquote() << QJsonDocument(data).toJson(); } QCOMPARE(data, doc.array()); } void testExtractHtml_data() { QTest::addColumn("inputFile"); QTest::addColumn("extractorName"); QTest::addColumn("jsonFile"); QDir dir(QStringLiteral(SOURCE_DIR "/unstructureddata")); const auto lst = dir.entryList(QStringList(QStringLiteral("*.html")), QDir::Files | QDir::Readable | QDir::NoSymLinks); for (const auto &file : lst) { const auto refFile = dir.path() + QLatin1Char('/') + file.left(file.size() - 5) + QStringLiteral(".json"); if (!QFile::exists(refFile)) { qDebug() << "reference file" << refFile << "does not exist, skipping test file" << file; continue; } const auto idx = file.indexOf(QLatin1Char('_')); QTest::newRow(file.toLatin1()) << QString(dir.path() + QLatin1Char('/') + file) << file.left(idx) << refFile; } } void testExtractHtml() { QFETCH(QString, inputFile); QFETCH(QString, extractorName); QFETCH(QString, jsonFile); QFile f(inputFile); QVERIFY(f.open(QFile::ReadOnly)); Extractor extractor; QVERIFY(extractor.load(QLatin1String(":/org.kde.messageviewer/semantic/rules/") + extractorName + QLatin1String(".xml"))); ExtractorPreprocessor preproc; preproc.preprocessHtml(QString::fromUtf8(f.readAll())); ExtractorEngine engine; engine.setText(preproc.text()); engine.setExtractor(&extractor); const auto data = engine.extract(); QFile ref(jsonFile); QVERIFY(ref.open(QFile::ReadOnly)); const auto doc = QJsonDocument::fromJson(ref.readAll()); QVERIFY(doc.isArray()); if (data != doc.array()) { qDebug().noquote() << QJsonDocument(data).toJson(); } QCOMPARE(data, doc.array()); } }; QTEST_APPLESS_MAIN(UnstructuredDataExtractorTest) #include "unstructureddataextractortest.moc" diff --git a/plugins/messageviewer/bodypartformatter/semantic/extractorrule.cpp b/plugins/messageviewer/bodypartformatter/semantic/extractorrule.cpp index 90f79ca7..d172f9d7 100644 --- a/plugins/messageviewer/bodypartformatter/semantic/extractorrule.cpp +++ b/plugins/messageviewer/bodypartformatter/semantic/extractorrule.cpp @@ -1,174 +1,181 @@ /* Copyright (c) 2017 Volker Krause This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "extractorrule.h" #include "extractorcontext.h" #include "extractorengine.h" #include "semantic_debug.h" #include #include #include ExtractorRule::~ExtractorRule() = default; QString ExtractorRule::name() const { return m_name; } QString ExtractorRule::type() const { return m_type; } bool ExtractorRule::repeats() const { return m_repeat; } QString ExtractorRule::value(const QRegularExpressionMatch &match, ExtractorContext *context) const { auto v = m_value; while (true) { const auto begin = v.indexOf(QLatin1String("${")); if (begin < 0) { break; } const auto end = v.indexOf(QLatin1Char('}'), begin + 3); const auto varName = v.mid(begin + 2, end - begin - 2); bool isNum = false; const auto captureIdx = varName.toInt(&isNum); if (isNum) { v.replace(begin, end - begin + 1, match.captured(captureIdx)); } else { v.replace(begin, end - begin + 1, context->variableValue(varName)); } } return v.trimmed(); } QString ExtractorRule::format() const { return m_format; } +QLocale ExtractorRule::locale() const +{ + return m_locale; +} + bool ExtractorRule::load(QXmlStreamReader &reader) { m_name = reader.attributes().value(QLatin1String("name")).toString(); m_type = reader.attributes().value(QLatin1String("type")).toString(); m_value = reader.attributes().value(QLatin1String("value")).toString(); m_format = reader.attributes().value(QLatin1String("format")).toString(); m_repeat = reader.attributes().value(QLatin1String("repeat")) == QLatin1String("true"); m_regexp.setPattern(reader.attributes().value(QLatin1String("match")).toString()); if (!m_regexp.isValid()) { qCWarning(SEMANTIC_LOG) << m_regexp.errorString() << m_regexp.pattern() << "at offset" << m_regexp.patternErrorOffset(); } + if (reader.attributes().hasAttribute(QLatin1String("locale"))) + m_locale = QLocale(reader.attributes().value(QLatin1String("locale")).toString()); return true; } bool ExtractorVariableRule::match(ExtractorContext *context) const { const auto res = m_regexp.match(context->engine()->text(), context->offset()); if (res.hasMatch()) { qCDebug(SEMANTIC_LOG) << name() << res.captured() << context->offset() << res.capturedEnd() << context->engine()->text().mid(context->offset(), 20); context->setVariable(name(), value(res, context)); context->setOffset(res.capturedEnd()); } return res.hasMatch(); } ExtractorClassRule::~ExtractorClassRule() { qDeleteAll(m_rules); } bool ExtractorClassRule::load(QXmlStreamReader &reader) { if (!ExtractorRule::load(reader)) { return false; } while (!reader.atEnd()) { reader.readNext(); if (reader.tokenType() == QXmlStreamReader::EndElement) { return true; } if (reader.tokenType() != QXmlStreamReader::StartElement) { continue; } std::unique_ptr rule; if (reader.name() == QLatin1String("variable")) { rule.reset(new ExtractorVariableRule); if (!rule->load(reader)) { return false; } reader.skipCurrentElement(); } else if (reader.name() == QLatin1String("class")) { rule.reset(new ExtractorClassRule); if (!rule->load(reader)) { return false; } } else if (reader.name() == QLatin1String("property")) { rule.reset(new ExtractorPropertyRule); if (!rule->load(reader)) { return false; } reader.skipCurrentElement(); } else { return false; } m_rules.push_back(rule.release()); } return true; } bool ExtractorClassRule::match(ExtractorContext *context) const { const auto res = m_regexp.match(context->engine()->text(), context->offset()); if (res.hasMatch()) { context->setOffset(res.capturedEnd()); } return res.hasMatch(); } QVector ExtractorClassRule::rules() const { return m_rules; } bool ExtractorPropertyRule::match(ExtractorContext *context) const { const auto res = m_regexp.match(context->engine()->text(), context->offset()); if (res.hasMatch()) { auto val = value(res, context); if (type() == QLatin1String("dateTime") && !format().isEmpty()) { - const auto dt = QDateTime::fromString(val, format()); + const auto dt = locale().toDateTime(val, format()); val = dt.toString(Qt::ISODate); } context->setProperty(name(), val); context->setOffset(res.capturedEnd()); } return res.hasMatch(); } diff --git a/plugins/messageviewer/bodypartformatter/semantic/extractorrule.h b/plugins/messageviewer/bodypartformatter/semantic/extractorrule.h index add2e0b8..6086f848 100644 --- a/plugins/messageviewer/bodypartformatter/semantic/extractorrule.h +++ b/plugins/messageviewer/bodypartformatter/semantic/extractorrule.h @@ -1,79 +1,82 @@ /* Copyright (c) 2017 Volker Krause This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef EXTRACTORRULE_H #define EXTRACTORRULE_H +#include #include #include #include class ExtractorContext; class QXmlStreamReader; /** A single unstructured data extractor rule. */ class ExtractorRule { public: virtual ~ExtractorRule(); virtual bool load(QXmlStreamReader &reader); virtual bool match(ExtractorContext *context) const = 0; QString name() const; QString type() const; bool repeats() const; protected: QString value(const QRegularExpressionMatch &match, ExtractorContext *context) const; QString format() const; + QLocale locale() const; QRegularExpression m_regexp; private: QString m_name; QString m_type; QString m_value; QString m_format; + QLocale m_locale; bool m_repeat = false; }; class ExtractorVariableRule : public ExtractorRule { public: bool match(ExtractorContext *context) const override; }; class ExtractorClassRule : public ExtractorRule { public: ~ExtractorClassRule(); bool load(QXmlStreamReader &reader) override; bool match(ExtractorContext *context) const override; QVector rules() const; private: QVector m_rules; }; class ExtractorPropertyRule : public ExtractorRule { public: bool match(ExtractorContext *context) const override; }; #endif // EXTRACTORRULE_H diff --git a/plugins/messageviewer/bodypartformatter/semantic/rules/amadeus.xml b/plugins/messageviewer/bodypartformatter/semantic/rules/amadeus.xml index 572d6778..58c15346 100644 --- a/plugins/messageviewer/bodypartformatter/semantic/rules/amadeus.xml +++ b/plugins/messageviewer/bodypartformatter/semantic/rules/amadeus.xml @@ -1,26 +1,26 @@ - + - + diff --git a/plugins/messageviewer/bodypartformatter/semantic/rules/brusselsairlines.xml b/plugins/messageviewer/bodypartformatter/semantic/rules/brusselsairlines.xml index 53a9a268..f494faa8 100644 --- a/plugins/messageviewer/bodypartformatter/semantic/rules/brusselsairlines.xml +++ b/plugins/messageviewer/bodypartformatter/semantic/rules/brusselsairlines.xml @@ -1,26 +1,26 @@ - + - + diff --git a/plugins/messageviewer/bodypartformatter/semantic/rules/eurowings.xml b/plugins/messageviewer/bodypartformatter/semantic/rules/eurowings.xml index cd3237cf..d3e14f28 100644 --- a/plugins/messageviewer/bodypartformatter/semantic/rules/eurowings.xml +++ b/plugins/messageviewer/bodypartformatter/semantic/rules/eurowings.xml @@ -1,26 +1,26 @@ - + - + diff --git a/plugins/messageviewer/bodypartformatter/semantic/rules/fcmtravel.xml b/plugins/messageviewer/bodypartformatter/semantic/rules/fcmtravel.xml index f14bf8a4..b151ddb0 100644 --- a/plugins/messageviewer/bodypartformatter/semantic/rules/fcmtravel.xml +++ b/plugins/messageviewer/bodypartformatter/semantic/rules/fcmtravel.xml @@ -1,30 +1,30 @@ - + - +