diff --git a/src/extractorrepository.cpp b/src/extractorrepository.cpp index 2cfbc98..d44d1a1 100644 --- a/src/extractorrepository.cpp +++ b/src/extractorrepository.cpp @@ -1,184 +1,182 @@ /* Copyright (c) 2017 Volker Krause This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "extractorrepository.h" #include "extractor.h" #include "extractorfilter.h" #include "logging.h" #include #include #include #include #include #include #include using namespace KItinerary; static void initResources() // must be outside of a namespace { Q_INIT_RESOURCE(extractors); } namespace KItinerary { class ExtractorRepositoryPrivate { public: void loadExtractors(); std::vector m_extractors; Extractor m_genericHtmlExtractor; Extractor m_genericPdfExtractor; Extractor m_genericPkPassExtractor; }; } ExtractorRepository::ExtractorRepository() : d(new ExtractorRepositoryPrivate) { initResources(); d->loadExtractors(); } ExtractorRepository::~ExtractorRepository() = default; ExtractorRepository::ExtractorRepository(KItinerary::ExtractorRepository &&) = default; std::vector ExtractorRepository::extractorsForMessage(KMime::Content *part) const { std::vector v; if (!part) { return v; } v.push_back(&d->m_genericHtmlExtractor); for (auto it = d->m_extractors.begin(), end = d->m_extractors.end(); it != end; ++it) { if ((*it).type() == Extractor::PkPass) { continue; } for (const auto &filter : (*it).filters()) { auto header = part->headerByType(filter.headerName()); auto ancestor = part; while (!header && ancestor->parent()) { ancestor = ancestor->parent(); header = ancestor->headerByType(filter.headerName()); } if (!header) { continue; } const auto headerData = header->asUnicodeString(); if (filter.matches(headerData)) { v.push_back(&(*it)); break; } } } // ### we probably want to check for the part mimetype here (but note the test data doesn't have that set!) - if (v.size() == 1) { - v.push_back(&d->m_genericPdfExtractor); - } + v.push_back(&d->m_genericPdfExtractor); return v; } std::vector ExtractorRepository::extractorsForPass(KPkPass::Pass *pass) const { std::vector v; if (pass->type() != KPkPass::Pass::BoardingPass && pass->type() != KPkPass::Pass::EventTicket) { return v; } for (auto it = d->m_extractors.begin(), end = d->m_extractors.end(); it != end; ++it) { if ((*it).type() != Extractor::PkPass) { continue; } for (const auto &filter : (*it).filters()) { QString value; if (strcmp(filter.headerName(), "passTypeIdentifier") == 0) { value = pass->passTypeIdentifier(); } else { continue; } if (filter.matches(value)) { v.push_back(&(*it)); break; } } } if (v.empty()) { v.push_back(&d->m_genericPkPassExtractor); } return v; } void ExtractorRepositoryPrivate::loadExtractors() { auto searchDirs = QStandardPaths::standardLocations(QStandardPaths::GenericDataLocation); searchDirs += QStringLiteral(":/org.kde.pim"); for (const auto &dir : qAsConst(searchDirs)) { QDirIterator it(dir + QStringLiteral("/kitinerary/extractors"), {QStringLiteral("*.json")}, QDir::Files); while (it.hasNext()) { const auto fileName = it.next(); QFile file(fileName); if (!file.open(QFile::ReadOnly)) { continue; } QJsonParseError error; const auto doc = QJsonDocument::fromJson(file.readAll(), &error); if (doc.isNull()) { qCWarning(Log) << "Extractor loading error:" << fileName << error.errorString(); continue; } QFileInfo fi(fileName); if (doc.isObject()) { const auto obj = doc.object(); Extractor e; if (e.load(obj, fi.absolutePath())) { m_extractors.push_back(std::move(e)); } } else if (doc.isArray()) { for (const auto &v : doc.array()) { Extractor e; if (e.load(v.toObject(), fi.absolutePath())) { m_extractors.push_back(std::move(e)); } } } else { qCWarning(Log) << "Invalid extractor meta-data:" << fileName; continue; } } } QJsonObject dummy; dummy.insert(QLatin1String("type"), QLatin1String("html")); m_genericHtmlExtractor.load(dummy, {}); dummy.insert(QLatin1String("type"), QLatin1String("pdf")); m_genericPdfExtractor.load(dummy, QString()); dummy.insert(QLatin1String("type"), QLatin1String("pkpass")); m_genericPkPassExtractor.load(dummy, QString()); } diff --git a/src/extractors/airfrance.json b/src/extractors/airfrance.json index 07acbc4..21b45a9 100644 --- a/src/extractors/airfrance.json +++ b/src/extractors/airfrance.json @@ -1,17 +1,8 @@ [{ "type": "pdf", "filter": [ { "header": "From", "match": "airfrance.fr" }, { "header": "From", "match": "airfrance.com" } ], "script": "airfrance.js" -}, -{ - "type": "pdf", - "filter": [ - { "header": "From", "match": "airfrance.fr" }, - { "header": "From", "match": "airfrance.com" } - ], - "script": "generic.js", - "function": "parsePdfBoardingPass" }] diff --git a/src/extractors/brusselsairlines.json b/src/extractors/brusselsairlines.json index 3ce6342..87ed69f 100644 --- a/src/extractors/brusselsairlines.json +++ b/src/extractors/brusselsairlines.json @@ -1,20 +1,14 @@ [ { "type": "html", "filter": [ { "header": "From", "match": "@brusselsairlines.com" } ], "script": "brusselsairlines.js" }, { "type": "pdf", "filter": [ { "header": "From", "match": "brusselsairlines.com" } ], "script": "brusselsairlines-receipt.js" - }, - { - "type": "pdf", - "filter": [{ "header": "From", "match": "brusselsairlines.com" }], - "script": "generic.js", - "function": "parsePdfBoardingPass" } ] diff --git a/src/extractors/eurowings.json b/src/extractors/eurowings.json index 953e605..0469c5a 100644 --- a/src/extractors/eurowings.json +++ b/src/extractors/eurowings.json @@ -1,16 +1,11 @@ [{ "type": "text", "filter": [ { "header": "From", "match": "@booking.eurowings.com" } ], "script": "eurowings.js" }, { "type": "pkpass", "filter": [ { "header": "passTypeIdentifier", "match": "pass.wings.boardingpass" } ], "script": "eurowings-pkpass.js" -}, { - "type": "pdf", - "filter": [{ "header": "From", "match": "eurowings.com" }], - "script": "generic.js", - "function": "parsePdfBoardingPass" }] diff --git a/src/extractors/extractors.qrc b/src/extractors/extractors.qrc index dc00315..524a623 100644 --- a/src/extractors/extractors.qrc +++ b/src/extractors/extractors.qrc @@ -1,54 +1,52 @@ aerlingus.json aerlingus.js airbaltic.json airbaltic.js airfrance.json airfrance.js amadeus.json amadeus.js americanairlines.json americanairlines.js aohostels.json aohostels.js booking.json booking.js brusselsairlines.json brusselsairlines.js brusselsairlines-receipt.js czechrailways.json czechrailways.js deutschebahn.json deutschebahn.js eurowings.json eurowings.js eurowings-pkpass.js fcmtravel.json fcmtravel.js - generic.json - generic.js hertz.js hertz.json iberia.json iberia.js klm.json klm.js korail.json korail.js lufthansa.json lufthansa-pkpass.js regiojet.json regiojet.js sas.json sas-boardingpass.js sas-receipt.js sncf.json sncf.js swiss.json swiss.js swiss-pkpass.js vueling.json vueling.js diff --git a/src/extractors/generic.js b/src/extractors/generic.js deleted file mode 100644 index 2273f78..0000000 --- a/src/extractors/generic.js +++ /dev/null @@ -1,46 +0,0 @@ -/* - Copyright (c) 2018 Volker Krause - - This library is free software; you can redistribute it and/or modify it - under the terms of the GNU Library General Public License as published by - the Free Software Foundation; either version 2 of the License, or (at your - option) any later version. - - This library is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public - License for more details. - - You should have received a copy of the GNU Library General Public License - along with this library; see the file COPYING.LIB. If not, write to the - Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. -*/ - -function parsePdfBoardingPass(pdf) { - var result = new Array(); - - for (var i = 0; i < pdf.pageCount; ++i) { - var page = pdf.pages[i]; - var images = page.images; - for (var j = 0; j < images.length; ++j) { - var image = images[j]; - if (image.width < 30 || images.height < 10) - continue; - var aspectRatio = image.width / image.height; - if (aspectRatio < 1) - aspectRatio = 1/aspectRatio; - - var bcbp; - if (aspectRatio < 1.2) - bcbp = Barcode.decodeAztec(image); - else if (aspectRatio > 1.5 && aspectRatio < 6) - bcbp = Barcode.decodePdf417(image); - if (!bcbp) - continue; - result = result.concat(JsonLd.toJson(Barcode.decodeIataBcbp(bcbp))); - } - } - - return result; -} diff --git a/src/extractors/generic.json b/src/extractors/generic.json deleted file mode 100644 index 395cc5a..0000000 --- a/src/extractors/generic.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "type": "pdf", - "filter": [ - { "header": "From", "match": "airberlin.com" }, - { "header": "From", "match": "britishairways.com" }, - { "header": "From", "match": "finnair.com" }, - { "header": "From", "match": "tap.pt" }, - { "header": "From", "match": "united.com" } - ], - "script": "generic.js", - "function": "parsePdfBoardingPass" -} diff --git a/src/extractors/klm.json b/src/extractors/klm.json index 1e6c3f9..af90935 100644 --- a/src/extractors/klm.json +++ b/src/extractors/klm.json @@ -1,12 +1,7 @@ [{ "type": "text", "filter": [ { "header": "From", "match": "noreply@klm.com" } ], "script": "klm.js" -}, { - "type": "pdf", - "filter": [{ "header": "From", "match": "klm.com" }], - "script": "generic.js", - "function": "parsePdfBoardingPass" }] diff --git a/src/extractors/lufthansa.json b/src/extractors/lufthansa.json index 4e2df9d..13e1bad 100644 --- a/src/extractors/lufthansa.json +++ b/src/extractors/lufthansa.json @@ -1,10 +1,5 @@ [{ "type": "pkpass", "filter": [ { "header": "passTypeIdentifier", "match": "pass.com.lufthansa.mbp" } ], "script": "lufthansa-pkpass.js" -}, { - "type": "pdf", - "filter": [{ "header": "From", "match": "lufthansa.com" }], - "script": "generic.js", - "function": "parsePdfBoardingPass" }] diff --git a/src/extractors/swiss.json b/src/extractors/swiss.json index 3b07d9b..c4eaca0 100644 --- a/src/extractors/swiss.json +++ b/src/extractors/swiss.json @@ -1,18 +1,12 @@ [ { "type": "text", "filter": [ { "header": "From", "match": "noreply@swiss.com" } ], "script": "swiss.js" }, { "type": "pkpass", "filter": [ { "header": "passTypeIdentifier", "match": "pass.booking.swiss.com" } ], "script": "swiss-pkpass.js" - }, - { - "type": "pdf", - "filter": [{ "header": "From", "match": "swiss.com" }], - "script": "generic.js", - "function": "parsePdfBoardingPass" } ] diff --git a/src/extractors/vueling.json b/src/extractors/vueling.json index 077da4f..afd92d3 100644 --- a/src/extractors/vueling.json +++ b/src/extractors/vueling.json @@ -1,14 +1,8 @@ [ { "type": "html", "filter": [ { "header": "From", "match": "@vueling.com" } ], "script": "vueling.js", "function": "parseHtmlBooking" - }, - { - "type": "pdf", - "filter": [{ "header": "From", "match": "@vueling.com" }], - "script": "generic.js", - "function": "parsePdfBoardingPass" } ]