diff --git a/src/extractors/deutschebahn.js b/src/extractors/deutschebahn.js index b56bf96..a58835b 100644 --- a/src/extractors/deutschebahn.js +++ b/src/extractors/deutschebahn.js @@ -1,200 +1,200 @@ /* Copyright (c) 2017 Volker Krause This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ function isHeaderOrFooter(line) { return line.search(/(Ihre Reiseverbindung|Wichtige Nutzungshinweise|Hinweise:|Seite \d \/ \d)/) >= 0; } function parseSeat(res, text) { var coach = text.match(/Wg. (\d+)/); if (coach) res.reservedTicket.ticketedSeat.seatSection = coach[1]; var seat = text.match(/Pl. ([\d ]+\d)/); if (seat) res.reservedTicket.ticketedSeat.seatNumber = seat[1]; } function parseDeparture(res, line, year, compact) { var dep = line.match(/^(.+?) *([0-9]{2})\.([0-9]{2})\. +ab ([0-9]{2}:[0-9]{2})/); if (!dep) return false; res.reservationFor.departureStation.name = dep[1]; res.reservationFor.departureTime = JsonLd.toDateTime(dep[2] + ' ' + dep[3] + ' ' + year + ' ' + dep[4], "dd MM yyyy hh:mm", "de"); var idx = dep.index + dep[0].length; var platform = line.substr(idx).match(/^ {1,3}(.*?)(?=( | IC|$))/); if (platform) { idx += platform.index + platform[0].length; res.reservationFor.departurePlatform = platform[1]; } var trainId = line.substr(idx).match(compact ? / +([^,]*?)(?=(,|$))/ : / +(.*?)(?=( |$))/); if (trainId) { idx += trainId.index + trainId[0].length res.reservationFor.trainNumber = trainId[1]; } parseSeat(res, line.substr(idx)); return true; } function parseArrival(res, line, year) { var arr = line.match(/^(.+?) *([0-9]{2})\.([0-9]{2})\. +an ([0-9]{2}:[0-9]{2})/); if (!arr) return false; res.reservationFor.arrivalStation.name = arr[1]; res.reservationFor.arrivalTime = JsonLd.toDateTime(arr[2] + ' ' + arr[3] + ' ' + year + ' ' + arr[4], "dd MM yyyy hh:mm", "de"); var idx = arr.index + arr[0].length; var platform = line.substr(idx).match(/^ {1,3}(.*?)(?=( | IC|$))/); if (platform) { idx += platform.index + platform[0].length; res.reservationFor.arrivalPlatform = platform[1]; } parseSeat(res, line.substr(idx)); return true; } function parseLegs(text, year, compact) { var reservations = new Array(); var lines = text.split('\n'); var offset = lines[0].match(/^ */); for (var i = 0; compact && i < lines.length; ++i) lines[i] = lines[i].substr(offset[0].length); for (var i = 0; i < lines.length;) { // stop when reaching the footer or the next itinerary header if (isHeaderOrFooter(lines[i])) return reservations; var res = JsonLd.newTrainReservation(); while (i < lines.length && !isHeaderOrFooter(lines[i])) { if (parseDeparture(res, lines[i++], year, compact)) break; } while (i < lines.length && !isHeaderOrFooter(lines[i])) { if (parseArrival(res, lines[i], year)) { ++i; break; } // continuation of departure line var depStation = lines[i].match(/^(\S.*?)(?: |\n|$)/) if (depStation) res.reservationFor.departureStation.name = res.reservationFor.departureStation.name + " " + depStation[1]; parseSeat(res, lines[i]); ++i; } // handle continuations of the arrival line while (i < lines.length && !isHeaderOrFooter(lines[i])) { if (lines[i].match(/^\S.+? *[0-9]{2}\.[0-9]{2}\. +ab [0-9]{2}:[0-9]{2}/)) // next departure line break; // continuation of arrival line var arrStation = lines[i].match(/^(\S.*?)(?: |\n|$)/) if (arrStation) res.reservationFor.arrivalStation.name = res.reservationFor.arrivalStation.name + " " + arrStation[1]; parseSeat(res, lines[i]); ++i; } if (res.reservationFor.arrivalStation != undefined) { reservations.push(res); } else { ++i; } } return reservations; } function parseText(text) { // used by unit tests return parseTicket(text, null); } function parseTicket(text, uic918ticket) { var reservations = new Array(); var pos = 0; var returnResIndex = 0; while (true) { // find itinerary headers var header = text.substr(pos).match(/Ihre Reiseverbindung[\S ]+(Hin|Rück)fahrt am [0-9]{2}.[0-9]{2}.([0-9]{4}).*\n/); if (!header) break; var idx = header.index + header[0].length; var year = header[2]; // determine ticket type var domesticHeader = text.substr(pos + idx).match(/ Reservierung\n/); var intlHeader = text.substr(pos + idx).match(/(Produkte\/Reservierung|Fahrt\/Reservierung).*\n/); if (domesticHeader) { idx += domesticHeader.index + domesticHeader[0].length; reservations = reservations.concat(parseLegs(text.substr(pos + idx), year, false)); } else if (intlHeader) { idx += intlHeader.index + intlHeader[0].length; reservations = reservations.concat(parseLegs(text.substr(pos + idx), year, true)); } else { break; } // for outward journeys we have station ids from the UIC 918-3 code if (uic918ticket && header[1] === "Hin") { reservations[0].reservationFor.departureStation.identifier = uic918ticket.outboundDepartureStationId; reservations[reservations.length - 1].reservationFor.arrivalStation.identifier = uic918ticket.outboundArrivalStationId; returnResIndex = reservations.length; } else { // propagate station ids from outward to return journey for (var i = returnResIndex; i < reservations.length; ++i) { for (var j = 0; j < returnResIndex; ++j) { if (reservations[i].reservationFor.departureStation.name === reservations[j].reservationFor.arrivalStation.name) reservations[i].reservationFor.departureStation.identifier = reservations[j].reservationFor.arrivalStation.identifier; if (reservations[i].reservationFor.arrivalStation.name === reservations[j].reservationFor.departureStation.name) reservations[i].reservationFor.arrivalStation.identifier = reservations[j].reservationFor.departureStation.identifier; } } } if (idx == 0) break; pos += idx; } // international tickets have the booking reference somewhere on the side, so we don't really know // where it is relative to the itinerary var bookingRef = text.match(/(?:Auftragsnummer|Auftrag \(NVS\)):\s*([A-Z0-9]{6,9})\n/); for (var i = 0; i < reservations.length; ++i) reservations[i].reservationNumber = bookingRef[1]; return reservations; } function parsePdf(pdf) { var page = pdf.pages[Context.pdfPageNumber]; var uic918ticket = Barcode.decodeUic9183(Context.barcode); var reservations = parseTicket(page.text, uic918ticket); for (var i = 0; i < reservations.length; ++i) { reservations[i].reservedTicket.ticketToken = "aztecbin:" + Barcode.toBase64(Context.barcode); reservations[i].reservedTicket.ticketedSeat.seatingType = uic918ticket.seatingType; - if (Context.data.length > 0) { + if (Context.data && Context.data.length > 0) { reservations[i].reservedTicket.name = Context.data[0].reservedTicket.name; } reservations[i].underName = JsonLd.toJson(uic918ticket.person); } return reservations; } diff --git a/src/extractors/regiojet.js b/src/extractors/regiojet.js index 8e39fb0..5398df1 100644 --- a/src/extractors/regiojet.js +++ b/src/extractors/regiojet.js @@ -1,217 +1,217 @@ /* Copyright (c) 2017 Volker Krause Copyright (c) 2018 Daniel Vrátil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ var regExMap = new Array(); regExMap["cs_CZ"] = new Array(); regExMap["cs_CZ"]["ticketId"] = /Elektronická jízdenka č\.\s+([0-9]+)/; regExMap["cs_CZ"]["singleTripHeader"] = /Cesta/; regExMap["cs_CZ"]["thereTripHeader"] = /Cesta tam/; regExMap["cs_CZ"]["returnTripHeader"] = /Cesta zpět/; regExMap["cs_CZ"]["columns"] = [ /Datum/, /Zastávka\/Přestup/, /Příjezd/, /Odjezd/, /Nást\./, /Spoj/, /Vůz\/sedadla/ ]; -regExMap["cs_CZ"]["date"] = /([0-9]{1,2})\.([0-9]{1,2})\.([0-9]{2})/; +regExMap["cs_CZ"]["date"] = /([0-9]{1,2})\.([0-9]{1,2})\.([0-9]{2,4}$)/; regExMap["en_US"] = new Array(); regExMap["en_US"]["ticketId"] = /Electronic ticket\s+([0-9]+)/; regExMap["en_US"]["singleTripHeader"] = /Route/; regExMap["en_US"]["thereTripHeader"] = /Route there/; regExMap["en_US"]["returnTripHeader"] = /Route back/; regExMap["en_US"]["columns"] = [ /Date/, /Station\/Transfer/, /Arrival/, /Departure/, /Platf\./, /Connection/, /Coach\/Seats/ ]; -regExMap["en_US"]["date"] = /([0-9]{2})\/([0-9]{2})\/([0-9]{2})/; +regExMap["en_US"]["date"] = /([0-9]{2})\/([0-9]{2})\/([0-9]{2,4}$)/; function padDigit(s) { while (s.length < 2) { s = '0' + s; } return s; } function parseDate(date, time, locale) { var d = date.match(regExMap[locale]["date"]); var t = time.match(/([0-9]{1,2}):([0-9]{1,2})/); if (!d || !t) { return null; } return JsonLd.toDateTime(padDigit(d[1]) + "." + padDigit(d[2]) + ".20" + d[3] + " " + padDigit(t[1]) + ":" + padDigit(t[2]), "dd.MM.yyyy HH:mm", locale); } var Columns = Object.freeze({ Date: 0, Station: 1, ArrivalTime: 2, DepartureTime: 3, Platform: 4, Connection: 5, Seat: 6, ColumnCount: 7 }); function columnValue(line, columns, column, locale) { var start = columns.match(regExMap[locale]["columns"][column]); if (!start) { return ""; } if (column < Columns.ColumnCount - 1) { var end = columns.match(regExMap[locale]["columns"][column + 1]); if (!end) { return ""; } return line.substr(start.index, end.index - start.index).trim(); } else { return line.substr(start.index).trim(); } } function parseTrip(trip, locale) { var text = trip.split("\n") var columns = text[0]; var reservations = new Array(); var transportType = "Bus"; for (var i = 1; i < text.length; i++) { // Skip the destination arrival part, we already populated it as part // of completing the previous departure line if (i < text.length - 1 && !text[i + 1]) { break; } var connection = columnValue(text[i], columns, Columns.Connection, locale); var number = null; var name = null; if (connection) { var split = connection.lastIndexOf("(") name = connection.substr(0, split - 1); number = connection.substr(split + 1, connection.length - split - 2); transportType = number.match(/RJ [0-9]+/) ? "Train" : "Bus"; } var res = JsonLd.newObject(transportType + "Reservation"); res.reservationFor = JsonLd.newObject(transportType + "Trip"); if (transportType == "Bus") { if (number) { res.reservationFor.busNumber = number; } if (name) { res.reservationFor.busName = name; } } else if (transportType == "Train") { if (number) { res.reservationFor.trainNumber = number; } if (name) { res.reservationFor.trainName = name; } } var arrivalTime = columnValue(text[i + 1], columns, Columns.ArrivalTime, locale); var arrivalDate = columnValue(text[i + 1], columns, Columns.Date, locale); if (!arrivalTime) { arrivalTime = columnValue(text[i], columns, Columns.ArrivalTime, locale); } if (!arrivalDate) { arrivalDate = columnValue(text[i], columns, Columns.Date, locale); } if (arrivalDate && arrivalTime) { res.reservationFor.arrivalStation = JsonLd.newObject(transportType + "Station"); res.reservationFor.arrivalStation.name = columnValue(text[i+1], columns, Columns.Station, locale); res.reservationFor.arrivalTime = parseDate(arrivalDate, arrivalTime, locale); } var departureTime = columnValue(text[i], columns, Columns.DepartureTime, locale); var departure = ""; if (departureTime !== "") { departure = text[i]; } else if (i > 0) { departure = text[i - 1]; departureTime = columnValue(departure, columns, Columns.DepartureTime, locale); } if (departure) { res.reservationFor.departureStation = JsonLd.newObject(transportType + "Station"); res.reservationFor.departureStation.name = columnValue(departure, columns, Columns.Station, locale); res.reservationFor.departureTime = parseDate(columnValue(departure, columns, Columns.Date, locale), departureTime, locale); var platform = columnValue(departure, columns, Columns.Platform, locale); if (platform) { res.reservationFor.departurePlatform = platform; } // seats are always bound to departur var seat = columnValue(departure, columns, Columns.Seat, locale); if (seat) { var r = seat.match(/([0-9]+)\/([0-9]+)/); res.reservedTicket = JsonLd.newObject("Ticket"); res.reservedTicket.ticketedSeat = JsonLd.newObject("Seat"); if (r) { res.reservedTicket.ticketedSeat.seatSection = r[1]; res.reservedTicket.ticketedSeat.seatNumber = r[2]; } else { res.reservedTicket.ticketedSeat.seatNumber = seat; } } } reservations.push(res); } return reservations; } function main(text) { var reservations = new Array(); for (var locale in regExMap) { var ticketId = text.match(regExMap[locale]["ticketId"]); if (!ticketId) { continue; } var resUrl = text.match(/http(s)?:\/\/jizdenky\.(regiojet|studentagency)\.cz\/OnlineTicket\?pam1=[0-9]+\&pam2=[0-9]+/) var returnHeader = text.match(regExMap[locale]["returnTripHeader"]); var isReturn = (returnHeader !== null); var routeHeader = text.match(regExMap[locale][isReturn ? "thereTripHeader" : "singleTripHeader"]); if (!routeHeader) { break; } var trip = text.substr(routeHeader.index + routeHeader[0].length + 1); var newRes = parseTrip(trip, locale); if (newRes.length === 0) { break; } reservations = reservations.concat(newRes); if (isReturn) { trip = text.substr(returnHeader.index + returnHeader[0].length + 1); reservations = reservations.concat(parseTrip(trip, locale)); } for (var i = 0; i < reservations.length; ++i) { reservations[i].reservationNumber = ticketId[1]; if (resUrl) { reservations[i].modifyReservationUrl = resUrl[0]; } } // No need to scan any further locales break; } return reservations; } diff --git a/src/pdf/pdfextractoroutputdevice.cpp b/src/pdf/pdfextractoroutputdevice.cpp index ddfdb9e..d13e91b 100644 --- a/src/pdf/pdfextractoroutputdevice.cpp +++ b/src/pdf/pdfextractoroutputdevice.cpp @@ -1,217 +1,217 @@ /* Copyright (C) 2019 Volker Krause This program is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "pdfextractoroutputdevice_p.h" #include "pdfimage.h" #include "pdfimage_p.h" #include "popplerutils_p.h" #include using namespace KItinerary; #ifdef HAVE_POPPLER PdfExtractorOutputDevice::PdfExtractorOutputDevice() : TextOutputDev(nullptr, false, 0, false, false) { } void PdfExtractorOutputDevice::drawImage(GfxState* state, Object* ref, Stream* str, int width, int height, GfxImageColorMap* colorMap, bool interpolate, PopplerMaskColors* maskColors, bool inlineImg) { Q_UNUSED(str); Q_UNUSED(interpolate); Q_UNUSED(maskColors); Q_UNUSED(inlineImg); if (!colorMap || !colorMap->isOk() || !ref || !ref->isRef()) { return; } QImage::Format format; if (colorMap->getColorSpace()->getMode() == csIndexed) { format = QImage::Format_RGB888; } else if (colorMap->getNumPixelComps() == 1 && (colorMap->getBits() >= 1 && colorMap->getBits() <= 8)) { format = QImage::Format_Grayscale8; } else if (colorMap->getNumPixelComps() == 3 && colorMap->getBits() == 8) { format = QImage::Format_RGB888; } else { return; } PdfImage pdfImg; pdfImg.d->m_refNum = ref->getRef().num; pdfImg.d->m_refGen = ref->getRef().gen; #if KPOPPLER_VERSION >= QT_VERSION_CHECK(0, 69, 0) pdfImg.d->m_colorMap.reset(colorMap->copy()); #endif pdfImg.d->m_sourceHeight = height; pdfImg.d->m_sourceWidth = width; pdfImg.d->m_width = width; pdfImg.d->m_height = height; // deal with aspect-ratio changing scaling const auto sourceAspectRatio = (double)width / (double)height; const auto targetAspectRatio = state->getCTM()[0] / -state->getCTM()[3]; if (!qFuzzyCompare(sourceAspectRatio, targetAspectRatio) && qFuzzyIsNull(state->getCTM()[1]) && qFuzzyIsNull(state->getCTM()[2])) { if (targetAspectRatio > sourceAspectRatio) { pdfImg.d->m_width = width * targetAspectRatio / sourceAspectRatio; } else { pdfImg.d->m_height = height * sourceAspectRatio / targetAspectRatio; } } pdfImg.d->m_transform = PopplerUtils::currentTransform(state); pdfImg.d->m_format = format; m_images.push_back(pdfImg); } void PdfExtractorOutputDevice::saveState(GfxState *state) { Q_UNUSED(state); m_vectorOps.push_back(VectorOp{VectorOp::PushState, {}, {}}); } void PdfExtractorOutputDevice::restoreState(GfxState *state) { Q_UNUSED(state); if (m_vectorOps.empty()) { return; } const auto &lastOp = *(m_vectorOps.end() -1); if (lastOp.type == VectorOp::PushState) { m_vectorOps.resize(m_vectorOps.size() - 1); } else { m_vectorOps.push_back(VectorOp{VectorOp::PopState, {}, {}}); } } static bool isRelevantStroke(const QPen &pen) { return !qFuzzyCompare(pen.widthF(), 0.0) && pen.color() == Qt::black; } void PdfExtractorOutputDevice::stroke(GfxState *state) { const auto pen = PopplerUtils::currentPen(state); if (!isRelevantStroke(pen)) { return; } const auto path = PopplerUtils::convertPath(state->getPath(), Qt::WindingFill); const auto t = PopplerUtils::currentTransform(state); m_vectorOps.push_back(VectorOp{VectorOp::Path, t, {path, pen, QBrush()}}); } static bool isRelevantFill(const QBrush &brush) { return brush.color() == Qt::black; } void PdfExtractorOutputDevice::fill(GfxState *state) { const auto brush = PopplerUtils::currentBrush(state); if (!isRelevantFill(brush)) { return; } const auto path = PopplerUtils::convertPath(state->getPath(), Qt::WindingFill); const auto b = path.boundingRect(); if (b.width() == 0 || b.height() == 0) { return; } const auto t = PopplerUtils::currentTransform(state); m_vectorOps.push_back(VectorOp{VectorOp::Path, t, {path, QPen(), brush}}); } void PdfExtractorOutputDevice::eoFill(GfxState *state) { const auto brush = PopplerUtils::currentBrush(state); if (!isRelevantFill(brush)) { return; } const auto path = PopplerUtils::convertPath(state->getPath(), Qt::OddEvenFill); const auto b = path.boundingRect(); if (b.width() == 0 || b.height() == 0) { return; } const auto t = PopplerUtils::currentTransform(state); m_vectorOps.push_back(VectorOp{VectorOp::Path, t, {path, QPen(), brush}}); } void PdfExtractorOutputDevice::finalize() { // remove single state groups, then try to merge adjacents paths std::vector mergedOps; mergedOps.reserve(m_vectorOps.size()); for (auto it = m_vectorOps.begin(); it != m_vectorOps.end(); ++it) { if ((*it).type == VectorOp::PushState && std::distance(it, m_vectorOps.end()) >= 2 && (*(it + 1)).type == VectorOp::Path && (*(it + 2)).type == VectorOp::PopState) { ++it; mergedOps.push_back(*it); ++it; } else { mergedOps.push_back(*it); } } - qDebug() << m_vectorOps.size() << mergedOps.size(); + //qDebug() << m_vectorOps.size() << mergedOps.size(); std::vector strokes; QTransform t; for (const auto &op : mergedOps) { if (op.type == VectorOp::Path) { if (t.isIdentity()) { t = op.transform; } if (t != op.transform) { - qDebug() << "diffent transforms for strokes, not supported yet"; + //qDebug() << "diffent transforms for strokes, not supported yet"; continue; } strokes.push_back(op.stroke); } else if (!strokes.empty()) { PdfVectorPicture pic; pic.setStrokes(std::move(strokes)); pic.setTransform(t); addVectorImage(pic); t = QTransform(); } } if (!strokes.empty()) { PdfVectorPicture pic; pic.setStrokes(std::move(strokes)); pic.setTransform(t); addVectorImage(pic); } } void PdfExtractorOutputDevice::addVectorImage(const PdfVectorPicture &pic) { if (pic.pathElementsCount() < 400) { // not complex enough for a barcode return; } PdfImage img; img.d->m_height = pic.height(); img.d->m_width = pic.width(); img.d->m_sourceHeight = pic.sourceHeight(); img.d->m_sourceWidth = pic.sourceWidth(); img.d->m_transform = pic.transform(); img.d->m_vectorPicture = pic; m_images.push_back(img); } #endif