diff --git a/src/extractorengine.cpp b/src/extractorengine.cpp index 26c29fb..d0d05be 100644 --- a/src/extractorengine.cpp +++ b/src/extractorengine.cpp @@ -1,617 +1,617 @@ /* Copyright (c) 2017 Volker Krause This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "config-kitinerary.h" #include "extractorengine.h" #include "extractor.h" #include "extractorrepository.h" #include "genericpdfextractor.h" #include "htmldocument.h" #include "jsonlddocument.h" #include "logging.h" #include "pdfdocument.h" #include "structureddataextractor.h" #include "jsapi/barcode.h" #include "jsapi/context.h" #include "jsapi/jsonld.h" #ifdef HAVE_KCAL #include #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include using namespace KItinerary; namespace KItinerary { class ExtractorEnginePrivate { public: void setupEngine(); void resetContent(); void setContent(KMime::Content *content); void setContext(KMime::Content *context); void setContextDate(const QDateTime &dt); void extractRecursive(KMime::Content *content); void extractDocument(); void extractStructured(); void extractCustom(); void extractGeneric(); void executeScript(const Extractor *extractor); void processScriptResult(const QJSValue &result); void extractPass(); void extractBoardingPass(QJsonObject &resFor); void extractEventTicketPass(QJsonObject &resFor); std::vector m_extractors; JsApi::Barcode *m_barcodeApi = nullptr; JsApi::Context *m_context = nullptr; JsApi::JsonLd *m_jsonLdApi = nullptr; QString m_text; std::unique_ptr> m_htmlDoc; std::unique_ptr> m_pdfDoc; std::unique_ptr> m_pass; #ifdef HAVE_KCAL KCalCore::Calendar::Ptr m_calendar; #endif KMime::Content *m_mimeContent = nullptr; KMime::Content *m_mimeContext = nullptr; GenericPdfExtractor m_genericPdfExtractor; QJsonArray m_result; QJSEngine m_engine; ExtractorRepository m_repo; }; template static std::unique_ptr> make_owning_ptr(T *ptr) { return std::unique_ptr>(ptr, [](T *ptr){ delete ptr; }); } template static std::unique_ptr> make_nonowning_ptr(T *ptr) { return std::unique_ptr>(ptr, [](T*){}); } } void ExtractorEnginePrivate::setupEngine() { m_context = new JsApi::Context; // will be deleted by QJSEngine taking ownership m_engine.installExtensions(QJSEngine::ConsoleExtension); m_jsonLdApi = new JsApi::JsonLd(&m_engine); m_engine.globalObject().setProperty(QStringLiteral("JsonLd"), m_engine.newQObject(m_jsonLdApi)); m_barcodeApi = new JsApi::Barcode; m_engine.globalObject().setProperty(QStringLiteral("Barcode"), m_engine.newQObject(m_barcodeApi)); m_engine.globalObject().setProperty(QStringLiteral("Context"), m_engine.newQObject(m_context)); } ExtractorEngine::ExtractorEngine() : d(new ExtractorEnginePrivate) { d->setupEngine(); } ExtractorEngine::ExtractorEngine(ExtractorEngine &&) noexcept = default; ExtractorEngine::~ExtractorEngine() = default; void ExtractorEngine::clear() { d->resetContent(); d->m_result = {}; d->m_mimeContext = nullptr; d->m_context->m_senderDate = {}; } void ExtractorEnginePrivate::resetContent() { m_text.clear(); m_pdfDoc.reset(); m_htmlDoc.reset(); m_pass.reset(); #ifdef HAVE_KCAL m_calendar.reset(); #endif m_mimeContent = nullptr; } void ExtractorEngine::setExtractors(std::vector &&extractors) { d->m_extractors = extractors; } void ExtractorEngine::setText(const QString &text) { d->m_text = text; } void ExtractorEngine::setHtmlDocument(HtmlDocument *htmlDoc) { d->m_htmlDoc = make_nonowning_ptr(htmlDoc); } void ExtractorEngine::setPdfDocument(PdfDocument *pdfDoc) { d->m_pdfDoc = make_nonowning_ptr(pdfDoc); } void ExtractorEngine::setPass(KPkPass::Pass *pass) { d->m_pass = make_nonowning_ptr(pass); } void ExtractorEngine::setCalendar(const QSharedPointer &calendar) { #ifdef HAVE_KCAL d->m_calendar = calendar; #else Q_UNUSED(calendar); #endif } static bool isContentType(KMime::Content *content, KMime::Headers::ContentType *ct, const char *mimeType, const char *ext) { if (ct && ct->mimeType() == mimeType) { return true; } if (ct && ct->name().endsWith(QLatin1String(ext))) { return true; } const auto cd = content->contentDisposition(false); return cd && cd->filename().endsWith(QLatin1String(ext)); } void ExtractorEnginePrivate::setContent(KMime::Content *content) { setContext(content); const auto ct = content->contentType(false); if (isContentType(content, ct, "application/vnd.apple.pkpass", ".pkpass")) { m_pass = make_owning_ptr(KPkPass::Pass::fromData(content->decodedContent())); } else if (isContentType(content, ct, "text/calendar", ".ics")) { #ifdef HAVE_KCAL m_calendar.reset(new KCalCore::MemoryCalendar(QTimeZone())); KCalCore::ICalFormat format; if (!format.fromRawString(m_calendar, content->decodedContent())) { m_calendar.reset(); } #endif } else if (isContentType(content, ct, "application/pdf", ".pdf")) { m_pdfDoc = make_owning_ptr(PdfDocument::fromData(content->decodedContent())); } else if (ct && ct->isHTMLText()) { m_htmlDoc = make_owning_ptr(HtmlDocument::fromData(content->decodedContent())); } else if ( (ct && ct->isPlainText()) || (!ct && content->isTopLevel())) { m_text = content->decodedText(); } m_mimeContent = (ct && ct->isMultipart()) ? content : nullptr; } void ExtractorEnginePrivate::setContext(KMime::Content *context) { m_mimeContext = context; auto dateHdr = context->header(); while (!dateHdr && context->parent()) { context = context->parent(); dateHdr = context->header(); } if (dateHdr) { setContextDate(dateHdr->dateTime()); } } void ExtractorEnginePrivate::setContextDate(const QDateTime &dt) { m_context->m_senderDate = dt; m_jsonLdApi->setContextDate(dt); - m_barcodeApi->setContextDate(dt.date()); + m_barcodeApi->setContextDate(dt); m_genericPdfExtractor.setContextDate(dt); } void ExtractorEngine::setContent(KMime::Content *content) { d->setContent(content); } void ExtractorEngine::setContext(KMime::Content *context) { d->setContext(context); } void ExtractorEngine::setContextDate(const QDateTime &dt) { d->setContextDate(dt); } QJsonArray ExtractorEngine::extract() { if (d->m_mimeContent) { d->extractRecursive(d->m_mimeContent); } else { d->extractDocument(); } return d->m_result; } void ExtractorEnginePrivate::extractRecursive(KMime::Content *content) { for (const auto child : content->contents()) { resetContent(); setContent(child); if (m_mimeContent) { extractRecursive(m_mimeContent); } else { extractDocument(); } } } void ExtractorEnginePrivate::extractDocument() { // structured content extractStructured(); if (!m_result.isEmpty()) { return; } // custom extractors if (m_pass) { m_extractors = m_repo.extractorsForPass(m_pass.get()); } else if (m_mimeContext) { m_extractors = m_repo.extractorsForMessage(m_mimeContext); } extractCustom(); // generic extractors extractGeneric(); // check if generic extractors identified documents we have custom extractors for m_extractors = m_repo.extractorsForJsonLd(m_result); extractCustom(); // check the unrecognized (vendor-specific) barcodes, if any if (m_pdfDoc) { for (const auto &code : m_genericPdfExtractor.unrecognizedBarcodes()) { m_extractors = m_repo.extractorsForBarcode(code); qDebug() << code << m_extractors.size(); extractCustom(); } } } void ExtractorEnginePrivate::extractStructured() { if (m_htmlDoc) { qCDebug(Log) << "Looking for structured annotations..."; for (const auto &v : StructuredDataExtractor::extract(m_htmlDoc.get())) { m_result.push_back(v); } } } void ExtractorEnginePrivate::extractCustom() { for (const auto extractor : m_extractors) { switch (extractor->type()) { case Extractor::Text: // running text extractors on PDF or HTML docs is possible, // but only extract the text when really needed if (m_text.isEmpty() && m_pdfDoc) { m_text = m_pdfDoc->text(); } if (m_text.isEmpty() && m_htmlDoc) { m_text = m_htmlDoc->root().recursiveContent(); } if (!m_text.isEmpty()) { executeScript(extractor); } break; case Extractor::Html: if (m_htmlDoc) { executeScript(extractor); } break; case Extractor::Pdf: if (m_pdfDoc) { executeScript(extractor); } break; case Extractor::PkPass: if (m_pass) { executeScript(extractor); } break; case Extractor::ICal: #ifdef HAVE_KCAL if (m_calendar) { executeScript(extractor); } #endif break; } if (!m_result.isEmpty()) { break; } } } void ExtractorEnginePrivate::extractGeneric() { if (m_pass) { extractPass(); } else if (m_pdfDoc && m_result.isEmpty()) { m_genericPdfExtractor.extract(m_pdfDoc.get(), m_result); } } void ExtractorEnginePrivate::executeScript(const Extractor *extractor) { Q_ASSERT(extractor); if (extractor->scriptFileName().isEmpty()) { return; } QFile f(extractor->scriptFileName()); if (!f.open(QFile::ReadOnly)) { qCWarning(Log) << "Failed to open extractor script" << f.fileName() << f.errorString(); return; } auto result = m_engine.evaluate(QString::fromUtf8(f.readAll()), f.fileName()); if (result.isError()) { qCWarning(Log) << "Script parsing error in" << result.property(QLatin1String("fileName")).toString() << ':' << result.property(QLatin1String("lineNumber")).toInt() << result.toString(); return; } auto mainFunc = m_engine.globalObject().property(extractor->scriptFunction()); if (!mainFunc.isCallable()) { qCWarning(Log) << "Script entry point not found!" << extractor->scriptFunction(); return; } qCDebug(Log) << "Running custom extractor" << extractor->scriptFileName() << extractor->scriptFunction(); QJSValueList args; switch (extractor->type()) { case Extractor::Text: args = {m_text}; break; case Extractor::Html: args = {m_engine.toScriptValue(m_htmlDoc.get())}; break; case Extractor::Pdf: args = {m_engine.toScriptValue(m_pdfDoc.get())}; break; case Extractor::PkPass: args = {m_engine.toScriptValue(m_pass.get())}; break; case Extractor::ICal: #ifdef HAVE_KCAL for (const auto &event : m_calendar->events()) { processScriptResult(mainFunc.call({m_engine.toScriptValue(*event.data())})); } #endif break; } if (!args.isEmpty()) { processScriptResult(mainFunc.call(args)); } } void ExtractorEnginePrivate::processScriptResult(const QJSValue &result) { if (result.isError()) { qCWarning(Log) << "Script execution error in" << result.property(QLatin1String("fileName")).toString() << ':' << result.property(QLatin1String("lineNumber")).toInt() << result.toString(); return; } if (result.isArray()) { QJSValueIterator it(result); while (it.hasNext()) { it.next(); if (it.value().isObject()) { m_result.push_back(QJsonValue::fromVariant(it.value().toVariant())); } } } else if (result.isObject()) { m_result.push_back(QJsonValue::fromVariant(result.toVariant())); } else { qCWarning(Log) << "Invalid result type from script"; } } void ExtractorEnginePrivate::extractPass() { if (m_result.size() > 1) { // a pkpass file contains exactly one boarding pass return; } if (m_result.isEmpty()) { // no script run, so we need to create the top-level element ourselves QJsonObject res; QJsonObject resFor; if (auto boardingPass = qobject_cast(m_pass.get())) { switch (boardingPass->transitType()) { case KPkPass::BoardingPass::Air: res.insert(QLatin1String("@type"), QLatin1String("FlightReservation")); resFor.insert(QLatin1String("@type"), QLatin1String("Flight")); break; // TODO expand once we have test files for train tickets default: return; } } else { switch (m_pass->type()) { case KPkPass::Pass::EventTicket: res.insert(QLatin1String("@type"), QLatin1String("EventReservation")); resFor.insert(QLatin1String("@type"), QLatin1String("Event")); break; default: return; } } res.insert(QLatin1String("reservationFor"), resFor); m_result.push_back(res); } // extract structured data from a pkpass, if the extractor script hasn't done so already auto res = m_result.at(0).toObject(); auto resFor = res.value(QLatin1String("reservationFor")).toObject(); switch (m_pass->type()) { case KPkPass::Pass::BoardingPass: extractBoardingPass(resFor); break; case KPkPass::Pass::EventTicket: extractEventTicketPass(resFor); break; default: return; } // barcode contains the ticket token if (!m_pass->barcodes().isEmpty() && !res.contains(QLatin1String("reservedTicket"))) { const auto barcode = m_pass->barcodes().at(0); QString token; switch (barcode.format()) { case KPkPass::Barcode::QR: token += QLatin1String("qrCode:"); break; case KPkPass::Barcode::Aztec: token += QLatin1String("aztecCode:"); break; default: break; } token += barcode.message(); QJsonObject ticket; ticket.insert(QLatin1String("@type"), QLatin1String("Ticket")); ticket.insert(QLatin1String("ticketToken"), token); res.insert(QLatin1String("reservedTicket"), ticket); } res.insert(QLatin1String("reservationFor"), resFor); // associate the pass with the result, so we can find the pass again for display if (!m_pass->passTypeIdentifier().isEmpty() && !m_pass->serialNumber().isEmpty()) { res.insert(QLatin1String("pkpassPassTypeIdentifier"), m_pass->passTypeIdentifier()); res.insert(QLatin1String("pkpassSerialNumber"), m_pass->serialNumber()); } m_result[0] = res; } void ExtractorEnginePrivate::extractBoardingPass(QJsonObject &resFor) { // "relevantDate" is the best guess for the boarding time if (m_pass->relevantDate().isValid() && !resFor.contains(QLatin1String("boardingTime"))) { resFor.insert(QLatin1String("boardingTime"), m_pass->relevantDate().toString(Qt::ISODate)); } // look for common field names containing the boarding time, if we still have no idea if (!resFor.contains(QLatin1String("boardingTime"))) { for (const auto &field : m_pass->fields()) { if (!field.key().contains(QLatin1String("boarding"), Qt::CaseInsensitive)) { continue; } const auto time = QTime::fromString(field.value().toString()); if (time.isValid()) { // this misses date, but the postprocessor will fill that in resFor.insert(QLatin1String("boardingTime"), QDateTime(QDate(1, 1, 1), time).toString(Qt::ISODate)); break; } } } // location is the best guess for the departure airport geo coordinates auto depAirport = resFor.value(QLatin1String("departureAirport")).toObject(); if (depAirport.isEmpty()) { depAirport.insert(QLatin1String("@type"), QLatin1String("Airport")); } auto depGeo = depAirport.value(QLatin1String("geo")).toObject(); if (m_pass->locations().size() == 1 && depGeo.isEmpty()) { const auto loc = m_pass->locations().at(0); depGeo.insert(QLatin1String("@type"), QLatin1String("GeoCoordinates")); depGeo.insert(QLatin1String("latitude"), loc.latitude()); depGeo.insert(QLatin1String("longitude"), loc.longitude()); depAirport.insert(QLatin1String("geo"), depGeo); resFor.insert(QLatin1String("departureAirport"), depAirport); } // organizationName is the best guess for airline name auto airline = resFor.value(QLatin1String("airline")).toObject(); if (airline.isEmpty()) { airline.insert(QLatin1String("@type"), QLatin1String("Airline")); } if (!airline.contains(QLatin1String("name"))) { airline.insert(QLatin1String("name"), m_pass->organizationName()); } resFor.insert(QLatin1String("airline"), airline); } void ExtractorEnginePrivate::extractEventTicketPass(QJsonObject &resFor) { if (!resFor.contains(QLatin1String("name"))) { resFor.insert(QLatin1String("name"), m_pass->description()); } // "relevantDate" is the best guess for the start time if (m_pass->relevantDate().isValid() && !resFor.contains(QLatin1String("startDate"))) { resFor.insert(QLatin1String("startDate"), m_pass->relevantDate().toString(Qt::ISODate)); } // location is the best guess for the venue auto venue = resFor.value(QLatin1String("location")).toObject(); if (venue.isEmpty()) { venue.insert(QLatin1String("@type"), QLatin1String("Place")); } auto geo = venue.value(QLatin1String("geo")).toObject(); if (!m_pass->locations().isEmpty() && geo.isEmpty()) { const auto loc = m_pass->locations().at(0); geo.insert(QLatin1String("@type"), QLatin1String("GeoCoordinates")); geo.insert(QLatin1String("latitude"), loc.latitude()); geo.insert(QLatin1String("longitude"), loc.longitude()); venue.insert(QLatin1String("geo"), geo); venue.insert(QLatin1String("name"), loc.relevantText()); resFor.insert(QLatin1String("location"), venue); } } diff --git a/src/genericpdfextractor.cpp b/src/genericpdfextractor.cpp index bc17731..5af1f25 100644 --- a/src/genericpdfextractor.cpp +++ b/src/genericpdfextractor.cpp @@ -1,160 +1,161 @@ /* Copyright (C) 2018 Volker Krause This program is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "genericpdfextractor.h" #include #include #include #include #include #include #include #include #include #include using namespace KItinerary; enum { MaxPageCount = 10, // maximum in the current test set is 6 MaxFileSize = 4000000, // maximum in the current test set is 980kB // unit is pixels, assuming landscape orientation MinSourceImageHeight = 10, MinSourceImageWidth = 30, MaxSourceImageHeight = 1000, // TODO what's a realisitic value here? MaxSourceImageWidth = 2000, // unit is 1/72 inch, assuming landscape orientation MinTargetImageHeight = 30, MinTargetImageWidth = 72, MaxTargetImageHeight = 252, MaxTargetImageWidth = 252, }; GenericPdfExtractor::GenericPdfExtractor() = default; GenericPdfExtractor::~GenericPdfExtractor() = default; void GenericPdfExtractor::setContextDate(const QDateTime &dt) { m_contextDate = dt; } void GenericPdfExtractor::extract(PdfDocument *doc, QJsonArray &result) { m_unrecognizedBarcodes.clear(); // stay away from documents that are atypically large for what we are looking for // that's just unecessarily eating up resources if (doc->pageCount() > MaxPageCount || doc->fileSize() > MaxFileSize) { return; } m_imageIds.clear(); for (int i = 0; i < doc->pageCount(); ++i) { const auto page = doc->page(i); for (int j = 0; j < page.imageCount(); ++j) { const auto img = page.image(j); if (m_imageIds.find(img.objectId()) != m_imageIds.end()) { continue; } // image source size sanity checks if (std::min(img.sourceWidth(), img.sourceHeight()) < MinSourceImageHeight || std::max(img.sourceWidth(), img.sourceHeight()) < MinSourceImageWidth || img.sourceHeight() > MaxSourceImageHeight || img.sourceWidth() > MaxSourceImageWidth) { continue; } // image target size checks const auto targetRect = img.transform().map(QRectF(0, 0, 1, -1)).boundingRect(); if (std::min(targetRect.width(), targetRect.height()) < MinTargetImageHeight || std::max(targetRect.width(), targetRect.height()) < MinTargetImageWidth || targetRect.height() > MaxTargetImageHeight || targetRect.width() > MaxTargetImageWidth) { continue; } extractImage(img, result); m_imageIds.insert(img.objectId()); } } } QStringList GenericPdfExtractor::unrecognizedBarcodes() const { return m_unrecognizedBarcodes; } void GenericPdfExtractor::extractImage(const PdfImage &img, QJsonArray &result) { const auto aspectRatio = img.width() < img.height() ? (float)img.height() / (float)img.width() : (float)img.width() / (float)img.height(); // almost square, assume Aztec (or QR, which we don't handle here yet) if (aspectRatio < 1.2f) { const auto b = BarcodeDecoder::decodeAztecBinary(img.image()); if (Uic9183Parser::maybeUic9183(b)) { extractUic9183(b, result); } else { extractBarcode(QString::fromUtf8(b), result); } } // rectangular with medium aspect ratio, assume PDF 417 if (aspectRatio > 1.5 && aspectRatio < 6) { const auto s = BarcodeDecoder::decodePdf417(img.image()); extractBarcode(s, result); } } void GenericPdfExtractor::extractBarcode(const QString &code, QJsonArray &result) { if (IataBcbpParser::maybeIataBcbp(code)) { const auto res = IataBcbpParser::parse(code, m_contextDate.date()); const auto jsonLd = JsonLdDocument::toJson(res); std::copy(jsonLd.begin(), jsonLd.end(), std::back_inserter(result)); } m_unrecognizedBarcodes.push_back(code); } void GenericPdfExtractor::extractUic9183(const QByteArray &data, QJsonArray &result) { Uic9183Parser p; + p.setContextDate(m_contextDate); p.parse(data); if (!p.isValid()) { return; } // TODO: add RCT2 ticket data QJsonObject org; org.insert(QLatin1String("@type"), QLatin1String("Organization")); org.insert(QLatin1String("identifier"), QString(QLatin1String("uic:") + p.carrierId())); QJsonObject trip; trip.insert(QLatin1String("@type"), QLatin1String("TrainTrip")); trip.insert(QLatin1String("provider"), org); QJsonObject res; res.insert(QLatin1String("@type"), QLatin1String("TrainReservation")); res.insert(QLatin1String("reservationFor"), trip); res.insert(QLatin1String("reservationNumber"), p.pnr()); result.push_back(res); } diff --git a/src/jsapi/barcode.cpp b/src/jsapi/barcode.cpp index ec72e76..51fd4b0 100644 --- a/src/jsapi/barcode.cpp +++ b/src/jsapi/barcode.cpp @@ -1,76 +1,77 @@ /* Copyright (C) 2018 Volker Krause This program is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "barcode.h" #include #include #include #include #include using namespace KItinerary; QString JsApi::Barcode::decodePdf417(const QVariant &img) const { if (img.userType() == qMetaTypeId()) { return BarcodeDecoder::decodePdf417(img.value().image()); } return {}; } QString JsApi::Barcode::decodeAztec(const QVariant &img) const { if (img.userType() == qMetaTypeId()) { return BarcodeDecoder::decodeAztec(img.value().image()); } return {}; } QVariant JsApi::Barcode::decodeAztecBinary(const QVariant &img) const { if (img.userType() == qMetaTypeId()) { const auto b = BarcodeDecoder::decodeAztecBinary(img.value().image()); return QVariant::fromValue(b); } return {}; } QVariant JsApi::Barcode::decodeUic9183(const QVariant &s) const { Uic9183Parser p; + p.setContextDate(m_contextDate); p.parse(s.toByteArray()); return QVariant::fromValue(p); } QVariant JsApi::Barcode::decodeIataBcbp(const QString &s) const { - return QVariant::fromValue(IataBcbpParser::parse(s, m_contextDate)); + return QVariant::fromValue(IataBcbpParser::parse(s, m_contextDate.date())); } QString JsApi::Barcode::toBase64(const QVariant &b) const { return QString::fromUtf8(b.toByteArray().toBase64()); } -void JsApi::Barcode::setContextDate(const QDate& date) +void JsApi::Barcode::setContextDate(const QDateTime &dt) { - m_contextDate = date; + m_contextDate = dt; } #include "moc_barcode.cpp" diff --git a/src/jsapi/barcode.h b/src/jsapi/barcode.h index 09bcd20..23f3ae3 100644 --- a/src/jsapi/barcode.h +++ b/src/jsapi/barcode.h @@ -1,68 +1,68 @@ /* Copyright (C) 2018 Volker Krause This program is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef KITINERARY_JSAPI_BARCODE_H #define KITINERARY_JSAPI_BARCODE_H -#include +#include #include namespace KItinerary { namespace JsApi { /** Barcode decoding functions. */ class Barcode : public QObject { Q_OBJECT public: /** Decode a PDF417 barcode image. * @param img An image containing the barcode, e.g. a PdfImage instance. */ Q_INVOKABLE QString decodePdf417(const QVariant &img) const; /** Decode a Aztec barcode image. * @param img An image containing the barcode, e.g. a PdfImage instance. */ Q_INVOKABLE QString decodeAztec(const QVariant &img) const; /** Decode a Aztec barcode image containing binary data. * @param img An image containing the barcode, e.g. a PdfImage instance. * @return a QByteArray, which from the JS perspective is essentially an opque handle. */ Q_INVOKABLE QVariant decodeAztecBinary(const QVariant &img) const; /** Decode an UIC 918.3 message from a train ticket Aztec code. * @param s A QByteArray containing the raw data from the barcode. * @returns An instance of Uic9183Parser. */ Q_INVOKABLE QVariant decodeUic9183(const QVariant &s) const; /** Decode an IATA BCBP message from a flight boarding pass barcode. * @returns A JSON-LD structure representing the boarding pass. */ Q_INVOKABLE QVariant decodeIataBcbp(const QString &s) const; /** Converts the given QByteArray into an base64 encoded string. */ Q_INVOKABLE QString toBase64(const QVariant &b) const; ///@cond internal - void setContextDate(const QDate &date); + void setContextDate(const QDateTime &dt); ///@endcond private: - QDate m_contextDate; + QDateTime m_contextDate; }; } } #endif // KITINERARY_JSAPI_BARCODE_H diff --git a/src/uic9183parser.cpp b/src/uic9183parser.cpp index f65595a..c434837 100644 --- a/src/uic9183parser.cpp +++ b/src/uic9183parser.cpp @@ -1,662 +1,676 @@ /* Copyright (C) 2018 Volker Krause This program is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "uic9183parser.h" #include "logging.h" #include #include #include #include #include using namespace KItinerary; static int asciiToInt(const char *s, int size) { if (!s) { return 0; } int v = 0; for (int i = 0; i < size; ++i) { v *= 10; v += (*(s + i)) - '0'; } return v; } namespace KItinerary { class Uic9183Block { public: Uic9183Block() = default; Uic9183Block(const char *data, int size); const char *data() const { return m_data; } int version() const; int size() const { return m_size; } bool isNull() const { return m_size <= 12; } private: const char *m_data = nullptr; int m_size = 0; }; // 0080BL vendor block sub-block ("S block") // 1x 'S' // 3x field type // 4x field value length // nx field value class Vendor0080BLSubBlock { public: Vendor0080BLSubBlock() = default; Vendor0080BLSubBlock(const char *data, int size); bool isNull() const { return m_size <= 0 || !m_data; } int size() const { return m_size; } const char *id() const { return m_data + 1; } const char *data() const { return m_data + 8; } QString toString() const { return QString::fromUtf8(data(), size()); } private: const char *m_data = nullptr; int m_size = 0; }; // 0080BL vendor block (DB) (version 2/3, dynamic size) // 2x stuff // 1x number of certificate blocks // 22+8+8+8x (v2) or 8+8+10x (v3) certificate block // 2x number of sub blocks class Vendor0080BLBlock { public: Vendor0080BLBlock(const Uic9183Block &block); bool isValid() const; Vendor0080BLSubBlock findSubBlock(const char id[3]) const; private: static int subblockOffset(const Uic9183Block &block); Uic9183Block m_block; }; class Uic9183ParserPrivate : public QSharedData { public: // name is either "U_" + 4 letter type or a 4 digit vendor id + 2 char type Uic9183Block findBlock(const char name[6]) const; QByteArray m_payload; + QDateTime m_contextDt; }; // 2x field line, number as ascii text // 2x field column // 2x field height // 2x field width // 1x field format // 4x text length // Nx text content class Rct2TicketField { public: Rct2TicketField() = default; /** Create a new RCT2 field starting at @p data. * @param size The size of the remaining RCT2 field array (not just this field!). */ Rct2TicketField(const char *data, int size); bool isNull() const; // size of the field data, not size of the text content int size() const; int row() const; int column() const; int height() const; int width() const; QString text() const; Rct2TicketField next() const; private: const char *m_data = nullptr; int m_size = 0; }; class Rct2TicketPrivate : public QSharedData { public: QString fieldText(int row, int column, int width, int height = 1) const; QDate firstDayOfValidity() const; QDateTime parseTime(const QString &dateStr, const QString &timeStr) const; Rct2TicketField firstField() const; Uic9183Block block; + QDateTime contextDt; }; } Uic9183Block::Uic9183Block(const char* data, int size) : m_data(data) , m_size(size) { } int Uic9183Block::version() const { return QByteArray(m_data + 6, 2).toInt(); } Vendor0080BLSubBlock::Vendor0080BLSubBlock(const char *data, int size) : m_data(data) , m_size(size) { } Vendor0080BLBlock::Vendor0080BLBlock(const Uic9183Block &block) { if (block.isNull()) { return; } if (block.version() != 2 && block.version() != 3) { qCWarning(Log) << "Unsupported version of 0080BL vendor block." << block.version(); return; } if (block.isNull() || block.size() < 15 || subblockOffset(block) > block.size()) { return; } m_block = block; } bool Vendor0080BLBlock::isValid() const { return !m_block.isNull(); } Vendor0080BLSubBlock Vendor0080BLBlock::findSubBlock(const char id[3]) const { for (int i = subblockOffset(m_block); i < m_block.size();) { if (*(m_block.data() + i) != 'S') { qCWarning(Log) << "0080BL invalid S-block format."; return {}; } const int subblockSize = QByteArray(m_block.data() + i + 4, 4).toInt(); if (subblockSize + i > m_block.size()) { qCWarning(Log) << "0080BL S-block size exceeds block size."; return {}; } Vendor0080BLSubBlock sb(m_block.data() + i, subblockSize); if (!sb.isNull() && strncmp(sb.id(), id, 3) == 0) { return sb; } i += subblockSize + 8; } return {}; } int Vendor0080BLBlock::subblockOffset(const Uic9183Block& block) { const auto certCount = *(block.data() + 14) - '0'; const auto certSize = block.version() == 2 ? 46 : 26; return 15 + certSize * certCount + 2; } Uic9183Block Uic9183ParserPrivate::findBlock(const char name[6]) const { // 6x header name // 2x block version // 4x block size as string, including the header for (int i = 0; i < m_payload.size() - 12;) { const int blockSize = m_payload.mid(i + 8, 4).toInt(); if (blockSize + i > m_payload.size()) { qCWarning(Log) << "UIC 918-3 block size exceeds payload size."; return {}; } if (strncmp(name, m_payload.data() + i, 6) == 0) { return {m_payload.data() + i, blockSize}; } i += blockSize; } return {}; } Rct2TicketField::Rct2TicketField(const char *data, int size) : m_data(data) , m_size(size) { if (size <= 13) { // too small qCWarning(Log) << "Found too small RCT2 field:" << size; m_data = nullptr; return; } // invalid format if (!std::all_of(data, data + 8, isdigit) || !std::all_of(data + 9, data + 13, isdigit)) { qCWarning(Log) << "Found RCT2 field with invalid format"; m_data = nullptr; return; } // size is too large if (this->size() > m_size) { qCWarning(Log) << "Found RCT2 field with invalid size" << this->size() << m_size; m_data = nullptr; return; } } bool Rct2TicketField::isNull() const { return !m_data || m_size <= 13; } int Rct2TicketField::size() const { return asciiToInt(m_data + 9, 4) + 13; } int Rct2TicketField::row() const { return asciiToInt(m_data, 2); } int Rct2TicketField::column() const { return asciiToInt(m_data + 2, 2); } int Rct2TicketField::height() const { return asciiToInt(m_data + 4, 2); } int Rct2TicketField::width() const { return asciiToInt(m_data + 6, 2); } QString Rct2TicketField::text() const { return QString::fromUtf8(m_data + 13, asciiToInt(m_data + 9, 4)); } Rct2TicketField Rct2TicketField::next() const { const auto thisSize = size(); const auto remaining = m_size - size(); if (remaining < 0) { return {}; } // search for the next field // in theory this should always trigger at i == 0, unless // the size field is wrong, which happens unfortunately for (int i = 0; i < remaining - 13; ++i) { Rct2TicketField f(m_data + thisSize + i, remaining - i); if (!f.isNull()) { return f; } } return {}; } Rct2TicketField Rct2TicketPrivate::firstField() const { if (block.size() > 20) { return Rct2TicketField(block.data() + 20, block.size() - 20); } return {}; } QString Rct2TicketPrivate::fieldText(int row, int column, int width, int height) const { QString s; for (auto f = firstField(); !f.isNull(); f = f.next()) { if (f.row() + f.height() - 1 < row || f.row() > row + height - 1) { continue; } if (f.column() + f.width() - 1 < column || f.column() > column + width - 1) { continue; } //qDebug() << "Field:" << f.height() << f.column() << f.height() << f.width() << f.size() << f.text(); // split field into lines // TODO this needs to follow the RCT2 word-wrapping algorithm? const auto content = f.text(); const auto lines = content.splitRef(QLatin1Char('\n')); // cut out the right part of the line for (int i = 0; i < lines.size(); ++i) { if (f.row() + i < row) { continue; } if (f.row() + i > row + height - 1) { break; } // TODO also truncate by w const auto offset = column - f.column(); if (offset >= 0) { s += lines.at(i).mid(offset).left(width); } else { s += lines.at(i); // TODO left padding by offset, truncate by width + offset } } } //qDebug() << "Result:" << x << y << w << h << s; return s; } QDate Rct2TicketPrivate::firstDayOfValidity() const { const auto f = fieldText(3, 1, 48); const auto it = std::find_if(f.begin(), f.end(), [](QChar c) { return c.isDigit(); }); if (it == f.end()) { return {}; } const auto dtStr = f.midRef(std::distance(f.begin(), it)); auto dt = QDate::fromString(dtStr.left(10).toString(), QStringLiteral("dd.MM.yyyy")); if (dt.isValid()) { return dt; } dt = QDate::fromString(dtStr.left(8).toString(), QStringLiteral("dd.MM.yy")); if (dt.isValid()) { if (dt.year() < 2000) { dt.setDate(dt.year() + 100, dt.month(), dt.day()); } return dt; } dt = QDate::fromString(dtStr.left(4).toString(), QStringLiteral("yyyy")); return dt; } QDateTime Rct2TicketPrivate::parseTime(const QString &dateStr, const QString &timeStr) const { const auto d = QDate::fromString(dateStr, QStringLiteral("dd.MM")); const auto t = QTime::fromString(timeStr, QStringLiteral("hh:mm")); return QDateTime({firstDayOfValidity().year(), d.month(), d.day()}, t); } // 6x "U_TLAY" // 2x version (always "01") // 4x record length, numbers as ASCII text // 4x ticket layout type ("RCT2") // 4x field count // Nx fields (see Rct2TicketField) Rct2Ticket::Rct2Ticket() : d(new Rct2TicketPrivate) { } Rct2Ticket::Rct2Ticket(Uic9183Block block) : d(new Rct2TicketPrivate) { d->block = block; qDebug() << QByteArray(block.data(), block.size()); } Rct2Ticket::Rct2Ticket(const Rct2Ticket&) = default; Rct2Ticket::~Rct2Ticket() = default; Rct2Ticket& Rct2Ticket::operator=(const Rct2Ticket&) = default; bool Rct2Ticket::isValid() const { return !d->block.isNull() && d->block.size() > 34 && std::strncmp(d->block.data() + 6, "01", 2) == 0 && std::strncmp(d->block.data() + 12, "RCT2", 4) == 0; } +void Rct2Ticket::setContextDate(const QDateTime &contextDt) +{ + d->contextDt = contextDt; +} + QDate Rct2Ticket::firstDayOfValidity() const { return d->firstDayOfValidity(); } QDateTime Rct2Ticket::outboundDepartureTime() const { return d->parseTime(d->fieldText(6, 1, 5), d->fieldText(6, 7, 5)); } QDateTime Rct2Ticket::outboundArrivalTime() const { return d->parseTime(d->fieldText(6, 52, 5), d->fieldText(6, 58, 5)); } QString Rct2Ticket::outboundDepartureStation() const { return d->fieldText(6, 13, 17).trimmed(); } QString Rct2Ticket::outboundArrivalStation() const { return d->fieldText(6, 34, 17).trimmed(); } QString Rct2Ticket::outboundClass() const { return d->fieldText(6, 66, 5).trimmed(); } Uic9183Parser::Uic9183Parser() : d(new Uic9183ParserPrivate) { } Uic9183Parser::Uic9183Parser(const Uic9183Parser&) = default; Uic9183Parser::~Uic9183Parser() = default; Uic9183Parser& Uic9183Parser::operator=(const Uic9183Parser&) = default; +void Uic9183Parser::setContextDate(const QDateTime &contextDt) +{ + d->m_contextDt = contextDt; +} + void Uic9183Parser::parse(const QByteArray &data) { d->m_payload.clear(); // header and signature block (64 byte total) if (!Uic9183Parser::maybeUic9183(data)) { qCWarning(Log) << "UIC 918-3 ticket too short or has wrong header/version."; return; } // 3x header // 2x version // 4x UIC code of the signing carrier // 5x signature key id // 50x ASN.1 signature // zlib compressed payload if (data.size() < 64 + 8) { qCWarning(Log) << "UIC 918-3 payload too short."; return; } // 4x compressed payload size as string // 2x zlib header 0x789C if (data[68] != 0x78 || ((uchar)data[69] != 0x9C && (uchar)data[69] != 0xDA)) { qCWarning(Log) << "UIC 918-3 payload has wrong zlib header."; return; } // nx zlib payload d->m_payload.resize(4096); z_stream stream; stream.zalloc = nullptr; stream.zfree = nullptr; stream.opaque = nullptr; stream.avail_in = data.size() - 68; stream.next_in = reinterpret_cast(const_cast(data.data() + 68)); stream.avail_out = d->m_payload.size(); stream.next_out = reinterpret_cast(d->m_payload.data()); inflateInit(&stream); const auto res = inflate(&stream, Z_NO_FLUSH); switch (res) { case Z_OK: case Z_STREAM_END: break; // all good default: qCWarning(Log) << "UIC 918.3 payload zlib decompression failed" << stream.msg; return; } inflateEnd(&stream); d->m_payload.truncate(d->m_payload.size() - stream.avail_out); //qCDebug(Log) << res << d->m_payload << stream.avail_out; } bool Uic9183Parser::isValid() const { return !d->m_payload.isEmpty(); } // U_HEAD (version 1, size 53) // 4x issuing carrier id // 6x PNR // 20x unique ticket key // 12x issuing date/time as ddMMyyyyHHMM, as UTC // 1x flags // 2x ticket language // 2x secondary ticket language QString Uic9183Parser::pnr() const { const auto b = d->findBlock("U_HEAD"); if (b.isNull() || b.version() != 1 || b.size() != 53) { return {}; } return QString::fromUtf8(b.data() + 16, 6); } QString Uic9183Parser::carrierId() const { const auto b = d->findBlock("U_HEAD"); if (b.isNull() || b.version() != 1 || b.size() != 53) { return {}; } return QString::fromUtf8(b.data() + 12, 4); } Person Uic9183Parser::person() const { // Deutsche Bahn vendor block const auto b = Vendor0080BLBlock(d->findBlock("0080BL")); if (b.isValid()) { // S028 contains family and given name separated by a '#', UTF-8 encoded auto sblock = b.findSubBlock("028"); if (!sblock.isNull()) { const auto endIt = sblock.data() + sblock.size(); auto it = std::find(sblock.data(), endIt, '#'); if (it != endIt) { Person p; p.setGivenName(QString::fromUtf8(sblock.data(), std::distance(sblock.data(), it))); ++it; p.setFamilyName(QString::fromUtf8(it, std::distance(it, endIt))); return p; } } // S023 contains the full name, UTF-8 encoded sblock = b.findSubBlock("023"); if (!sblock.isNull()) { Person p; p.setName(sblock.toString()); return p; } } // RCT2 tickets const auto rct2 = rct2Ticket(); if (rct2.isValid()) { const auto name = rct2.d->fieldText(0, 52, 19); if (!name.isEmpty()) { Person p; p.setName(name); return p; } } return {}; } QString Uic9183Parser::outboundDepartureStationId() const { const auto b = Vendor0080BLBlock(d->findBlock("0080BL")); if (b.isValid()) { // S035 contains the IBNR, possible with leading '80' country code and leading 0 stripped const auto sblock = b.findSubBlock("035"); if (!sblock.isNull() && sblock.size() <= 7) { QString ibnr = QStringLiteral("ibnr:8000000"); const auto s = sblock.toString(); return ibnr.replace(ibnr.size() - s.size(), s.size(), s); } } return {}; } QString Uic9183Parser::outboundArrivalStationId() const { const auto b = Vendor0080BLBlock(d->findBlock("0080BL")); if (b.isValid()) { // S036 contains the IBNR, possible with leading '80' country code and leading 0 stripped const auto sblock = b.findSubBlock("036"); if (!sblock.isNull() && sblock.size() <= 7) { QString ibnr = QStringLiteral("ibnr:8000000"); const auto s = sblock.toString(); return ibnr.replace(ibnr.size() - s.size(), s.size(), s); } } return {}; } Rct2Ticket Uic9183Parser::rct2Ticket() const { - return Rct2Ticket(d->findBlock("U_TLAY")); + Rct2Ticket rct2(d->findBlock("U_TLAY")); + rct2.setContextDate(d->m_contextDt); + return rct2; } QVariant Uic9183Parser::rct2TicketVariant() const { const auto rct2 = rct2Ticket(); if (rct2.isValid()) { return QVariant::fromValue(rct2); } return {}; } bool Uic9183Parser::maybeUic9183(const QByteArray& data) { if (data.size() < 64) { return false; } if (!data.startsWith("#UT") && !data.startsWith("OTI")) { return false; } if (data.at(3) != '0' || data.at(4) != '1') { return false; } return true; } #include "moc_uic9183parser.cpp" diff --git a/src/uic9183parser.h b/src/uic9183parser.h index 5fc9fca..1614b71 100644 --- a/src/uic9183parser.h +++ b/src/uic9183parser.h @@ -1,136 +1,144 @@ /* Copyright (C) 2018 Volker Krause This program is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef KITINERARY_UIC9183PARSER_H #define KITINERARY_UIC9183PARSER_H #include "kitinerary_export.h" #include #include #include class QDateTime; namespace KItinerary { class Rct2TicketPrivate; class Uic9183Block; class Uic9183Parser; /** RCT2 ticket layout payload of an UIC 918.3 ticket token. */ class KITINERARY_EXPORT Rct2Ticket { Q_GADGET Q_PROPERTY(QDate firstDayOfValidity READ firstDayOfValidity) Q_PROPERTY(QDateTime outboundDepartureTime READ outboundDepartureTime) Q_PROPERTY(QDateTime outboundArrivalTime READ outboundArrivalTime) Q_PROPERTY(QString outboundDepartureStation READ outboundDepartureStation) Q_PROPERTY(QString outboundArrivalStation READ outboundArrivalStation) Q_PROPERTY(QString outboundClass READ outboundClass) public: Rct2Ticket(); Rct2Ticket(const Rct2Ticket&); ~Rct2Ticket(); Rct2Ticket& operator=(const Rct2Ticket&); /** Returns whether this is a valid RCT2 ticket layout block. */ bool isValid() const; + /** Date/time this ticket was first encounted, to recover possibly missing year numbers. */ + void setContextDate(const QDateTime &contextDt); + /** First day the ticket is valid. */ QDate firstDayOfValidity() const; /** Departure time of the outbound segment. */ QDateTime outboundDepartureTime() const; /** Arrival time of the outbound segment. */ QDateTime outboundArrivalTime() const; /** Departure station of the outbound segment. */ QString outboundDepartureStation() const; /** Arrival station of the outbound segement. */ QString outboundArrivalStation() const; /** Class of the outbound segment. */ QString outboundClass() const; private: friend class Uic9183Parser; Rct2Ticket(Uic9183Block block); QExplicitlySharedDataPointer d; }; class Uic9183ParserPrivate; /** Parser for UIC 918.3 and 918.3* train tickets. * * @see http://www.era.europa.eu/Document-Register/Documents/ERA_Technical_Document_TAP_B_7_v1.2.pdf * for information about the general UIC 918-3 structure * @see http://www.era.europa.eu/Document-Register/Documents/ERA_Technical_Document_TAP_B_6_v1_2.pdf * for information about the U_TLAY block * @see https://www.bahn.de/p/view/angebot/regio/barcode.shtml * for information about the 0080VU vendor block */ class KITINERARY_EXPORT Uic9183Parser { Q_GADGET Q_PROPERTY(QString pnr READ pnr) Q_PROPERTY(QString carrierId READ carrierId) Q_PROPERTY(KItinerary::Person person READ person) Q_PROPERTY(QString outboundDepartureStationId READ outboundDepartureStationId) Q_PROPERTY(QString outboundArrivalStationId READ outboundArrivalStationId) /** RCT2 ticket layout block, if present, @c null otherwise. */ Q_PROPERTY(QVariant rct2Ticket READ rct2TicketVariant) public: Uic9183Parser(); Uic9183Parser(const Uic9183Parser&); ~Uic9183Parser(); Uic9183Parser& operator=(const Uic9183Parser&); + /** Date/time this ticket was first encountered. + * This is used to recover a missing year in the ticket data. + */ + void setContextDate(const QDateTime &contextDt); + void parse(const QByteArray &data); bool isValid() const; /** The booking reference. */ QString pnr() const; /** The UIC carrier code. */ QString carrierId() const; /** The person this ticket is issued to. */ Person person() const; /** Station identifier for the departure station of the outbound trip. */ QString outboundDepartureStationId() const; /** Station identifier for the arrival station of the outbound trip. */ QString outboundArrivalStationId() const; /** RCT2 ticket layout, if present. */ Rct2Ticket rct2Ticket() const; /** Quickly checks if @p might be UIC 918.3 content. * This priorizes speed over correctness and is used in barcode content auto-detection. */ static bool maybeUic9183(const QByteArray &data); private: QVariant rct2TicketVariant() const; QExplicitlySharedDataPointer d; }; } Q_DECLARE_METATYPE(KItinerary::Rct2Ticket) Q_DECLARE_METATYPE(KItinerary::Uic9183Parser) #endif // KITINERARY_UIC9183PARSER_H