diff --git a/src/extractorrepository.cpp b/src/extractorrepository.cpp index 2e32d9a..60a8946 100644 --- a/src/extractorrepository.cpp +++ b/src/extractorrepository.cpp @@ -1,189 +1,225 @@ /* Copyright (c) 2017 Volker Krause This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "extractorrepository.h" #include "extractor.h" #include "extractorfilter.h" #include "logging.h" #include #include #include #include #include #include #include using namespace KItinerary; static void initResources() // must be outside of a namespace { Q_INIT_RESOURCE(extractors); } namespace KItinerary { class ExtractorRepositoryPrivate { public: ExtractorRepositoryPrivate(); void loadExtractors(); std::vector m_extractors; Extractor m_genericHtmlExtractor; Extractor m_genericPdfExtractor; Extractor m_genericPkPassExtractor; }; } ExtractorRepositoryPrivate::ExtractorRepositoryPrivate() { initResources(); loadExtractors(); } ExtractorRepository::ExtractorRepository() { static ExtractorRepositoryPrivate repo; d = &repo; } ExtractorRepository::~ExtractorRepository() = default; ExtractorRepository::ExtractorRepository(KItinerary::ExtractorRepository &&) = default; std::vector ExtractorRepository::extractorsForMessage(KMime::Content *part) const { std::vector v; if (!part) { return v; } v.push_back(&d->m_genericHtmlExtractor); for (auto it = d->m_extractors.begin(), end = d->m_extractors.end(); it != end; ++it) { if ((*it).type() == Extractor::PkPass) { continue; } for (const auto &filter : (*it).filters()) { auto header = part->headerByType(filter.headerName()); auto ancestor = part; while (!header && ancestor->parent()) { ancestor = ancestor->parent(); header = ancestor->headerByType(filter.headerName()); } if (!header) { continue; } const auto headerData = header->asUnicodeString(); if (filter.matches(headerData)) { v.push_back(&(*it)); break; } } } // ### we probably want to check for the part mimetype here (but note the test data doesn't have that set!) v.push_back(&d->m_genericPdfExtractor); return v; } std::vector ExtractorRepository::extractorsForPass(KPkPass::Pass *pass) const { std::vector v; if (pass->type() != KPkPass::Pass::BoardingPass && pass->type() != KPkPass::Pass::EventTicket) { return v; } for (auto it = d->m_extractors.begin(), end = d->m_extractors.end(); it != end; ++it) { if ((*it).type() != Extractor::PkPass) { continue; } for (const auto &filter : (*it).filters()) { QString value; if (strcmp(filter.headerName(), "passTypeIdentifier") == 0) { value = pass->passTypeIdentifier(); } else { continue; } if (filter.matches(value)) { v.push_back(&(*it)); break; } } } if (v.empty()) { v.push_back(&d->m_genericPkPassExtractor); } return v; } +static QString providerId(const QJsonObject &res) +{ + if (res.value(QLatin1String("@type")).toString() == QLatin1String("FlightReservation")) { + return res.value(QLatin1String("reservationFor")).toObject().value(QLatin1String("airline")).toObject().value(QLatin1String("iataCode")).toString(); + } + if (res.value(QLatin1String("@type")).toString() == QLatin1String("TrainReservation")) { + return res.value(QLatin1String("reservationFor")).toObject().value(QLatin1String("provider")).toObject().value(QLatin1String("identifier")).toString(); + } + + return {}; +} + +std::vector ExtractorRepository::extractorsForJsonLd(const QJsonArray &data) const +{ + std::vector v; + + for (const auto &val : data) { + const auto id = providerId(val.toObject()); + for (auto it = d->m_extractors.begin(), end = d->m_extractors.end(); it != end; ++it) { + for (const auto &filter : (*it).filters()) { + QString value; + if (strcmp(filter.headerName(), "provider") != 0) { + continue; + } + if (filter.matches(id)) { + v.push_back(&(*it)); + break; + } + } + } + } + + return v; +} + void ExtractorRepositoryPrivate::loadExtractors() { auto searchDirs = QStandardPaths::standardLocations(QStandardPaths::GenericDataLocation); searchDirs += QStringLiteral(":/org.kde.pim"); for (const auto &dir : qAsConst(searchDirs)) { QDirIterator it(dir + QStringLiteral("/kitinerary/extractors"), {QStringLiteral("*.json")}, QDir::Files); while (it.hasNext()) { const auto fileName = it.next(); QFile file(fileName); if (!file.open(QFile::ReadOnly)) { continue; } QJsonParseError error; const auto doc = QJsonDocument::fromJson(file.readAll(), &error); if (doc.isNull()) { qCWarning(Log) << "Extractor loading error:" << fileName << error.errorString(); continue; } QFileInfo fi(fileName); if (doc.isObject()) { const auto obj = doc.object(); Extractor e; if (e.load(obj, fi.absolutePath())) { m_extractors.push_back(std::move(e)); } } else if (doc.isArray()) { for (const auto &v : doc.array()) { Extractor e; if (e.load(v.toObject(), fi.absolutePath())) { m_extractors.push_back(std::move(e)); } } } else { qCWarning(Log) << "Invalid extractor meta-data:" << fileName; continue; } } } + // TODO: remove once all users are ported away from this QJsonObject dummy; dummy.insert(QLatin1String("type"), QLatin1String("html")); m_genericHtmlExtractor.load(dummy, {}); dummy.insert(QLatin1String("type"), QLatin1String("pdf")); m_genericPdfExtractor.load(dummy, QString()); dummy.insert(QLatin1String("type"), QLatin1String("pkpass")); m_genericPkPassExtractor.load(dummy, QString()); } diff --git a/src/extractorrepository.h b/src/extractorrepository.h index 4df7f31..b264088 100644 --- a/src/extractorrepository.h +++ b/src/extractorrepository.h @@ -1,64 +1,68 @@ /* Copyright (c) 2017 Volker Krause This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef EXTRACTORREPOSITORY_H #define EXTRACTORREPOSITORY_H #include "kitinerary_export.h" #include #include namespace KMime { class Content; } namespace KPkPass { class Pass; } +class QJsonArray; + namespace KItinerary { class Extractor; class ExtractorRepositoryPrivate; /** Collection of all unstructured data extractor rule sets. * * See KItinerary::Extractor on where this loads its content from. */ class KITINERARY_EXPORT ExtractorRepository { public: ExtractorRepository(); ~ExtractorRepository(); ExtractorRepository(ExtractorRepository &&); ExtractorRepository(const ExtractorRepository &) = delete; /** Finds matching extractors for the given message part. */ std::vector extractorsForMessage(KMime::Content *part) const; /** Finds matching extractors for the given pkpass boarding pass. */ std::vector extractorsForPass(KPkPass::Pass *pass) const; + /** Finds matching extractors for the given JSON-LD data provided by generic extractors. */ + std::vector extractorsForJsonLd(const QJsonArray &data) const; private: ExtractorRepositoryPrivate* d; }; } #endif // EXTRACTORREPOSITORY_H diff --git a/src/extractors/airbaltic.json b/src/extractors/airbaltic.json index 77e8f06..bfdf6ba 100644 --- a/src/extractors/airbaltic.json +++ b/src/extractors/airbaltic.json @@ -1,5 +1,8 @@ { "type": "pdf", - "filter": [ { "header": "From", "match": "@airbaltic.com" } ], + "filter": [ + { "header": "From", "match": "@airbaltic.com" }, + { "header": "provider", "match": "BT" } + ], "script": "airbaltic.js" } diff --git a/src/extractors/americanairlines.json b/src/extractors/americanairlines.json index 0da7245..0f4e126 100644 --- a/src/extractors/americanairlines.json +++ b/src/extractors/americanairlines.json @@ -1,5 +1,8 @@ { "type": "pdf", - "filter": [ { "header": "From", "match": "americanairlines@aa.com" } ], + "filter": [ + { "header": "From", "match": "americanairlines@aa.com" }, + { "header": "provider", "match": "AA" } + ], "script": "americanairlines.js" } diff --git a/src/extractors/sas.json b/src/extractors/sas.json index e702a1d..54ce0d6 100644 --- a/src/extractors/sas.json +++ b/src/extractors/sas.json @@ -1,12 +1,15 @@ [ { "type": "pdf", - "filter": [ { "header": "From", "match": "no-reply@flysas.com" } ], + "filter": [ + { "header": "From", "match": "no-reply@flysas.com" }, + { "header": "provider", "match": "SK" } + ], "script": "sas-boardingpass.js" }, { "type": "pdf", "filter": [ { "header": "From", "match": "no-reply@flysas.com" } ], "script": "sas-receipt.js" } ]