diff --git a/autotests/extractordata/czechrailways/czechrailways_multileg-one-way-group.txt b/autotests/extractordata/czechrailways/czechrailways_multileg-one-way-group.txt new file mode 100644 index 0000000..230a0c0 --- /dev/null +++ b/autotests/extractordata/czechrailways/czechrailways_multileg-one-way-group.txt @@ -0,0 +1,45 @@ + JÍZDENKA A MÍSTENKA + eTiket Osob: 5 + 1154 + Z/FROM/VON @ DO/TO/NACH TŘÍDA/ + CL./KL. + 06/08 17:50 Praha hl.n. @ Jaroměř 07/08 24:00 2 + * * * @ * * * * + Přes: PhaLb,Kolín,Pard.Hl,HradKrHl Km: 143 + Rezervace (místo) + Skupinová jednosměrná (5 osob) + Cena 683 Kč +Doklad je nepřenosný a platí vždy pouze ve spojení s průkazem cestujícího, jehož jméno je uvedeno na jízdence. +Jízdu je nutno nastoupit v 1. den platnosti, nejdříve však v 17:50 hod. +Daňový doklad (informace o ceně) +Tax receipt (price information) / Rechnung (Preisinformation) +Položka Tar. cena Body Cena DPH +Jízdenka 1 683 Kč 683 Kč 10 % +Rezervace 2 0 Kč 0 Kč +Celkem 683 Kč +Datum vystavení/datum platby: 31.7.2019 21:28 Platba: KARTOU/CC +Prodejce: České dráhy, a.s. 30000 Číslo obj.: 39921 835 00 +Praha 1, Nábřeží L.Svobody 1222, PSČ 110 15 DIČ: CZ70994226 +Jízdní řád a rezervace +Timetable and Reservations / Fahrplan und Reservierung +Stanice Odj. / Příj. x w Místo / Seat / Sitzplatz +Praha hl.n. 06.08. 17:50 EC 283 258 41, 42, 44 - 46 +Pardubice hl.n. 06.08. 18:46 +Ref: 544265457956, 545265457957, 547265457959, 548265457960, 540265457961 Údaje pro kontrolu + Control data +Pardubice hl.n. 06.08. 19:03 R 1262 1 21 - 24, 28 + Doklad číslo / Document no.: +Hradec Králové hl.n. 06.08. 19:23 +Ref: 541265457962 + *9-2144-549 + Jméno / Name: +Hradec Králové hl.n. 06.08. 19:33 Bus 501262 + Daniel Vrátil +Jaroměř 06.08. 19:54 + Kód transakce / Transaction code: + XUBC00 +Jízdní řád má pouze informativní charakter. + Kód nepřehýbejte! / Do not fold the barcode! + Barcode nicht knicken! +Reklama +České dráhy. Lepší cesta každý den. Strana 1/1 diff --git a/autotests/extractordata/czechrailways/czechrailways_multileg-one-way-group.txt.json b/autotests/extractordata/czechrailways/czechrailways_multileg-one-way-group.txt.json new file mode 100644 index 0000000..3a46804 --- /dev/null +++ b/autotests/extractordata/czechrailways/czechrailways_multileg-one-way-group.txt.json @@ -0,0 +1,75 @@ +[ + { + "@context": "http://schema.org", + "@type": "TrainReservation", + "reservationFor": { + "@type": "TrainTrip", + "arrivalStation": { + "@type": "TrainStation", + "name": "Pardubice hl.n." + }, + "arrivalTime": "2019-08-06T18:46:00", + "departureDay": "2019-08-06", + "departureStation": { + "@type": "TrainStation", + "name": "Praha hl.n." + }, + "departureTime": "2019-08-06T17:50:00", + "trainNumber": "EC 283" + }, + "reservedTicket": { + "@type": "Ticket", + "ticketedSeat": { + "@type": "Seat", + "seatNumber": "41", + "seatSection": "258" + } + } + }, + { + "@context": "http://schema.org", + "@type": "TrainReservation", + "reservationFor": { + "@type": "TrainTrip", + "arrivalStation": { + "@type": "TrainStation", + "name": "Hradec Králové hl.n." + }, + "arrivalTime": "2019-08-06T19:23:00", + "departureDay": "2019-08-06", + "departureStation": { + "@type": "TrainStation", + "name": "Pardubice hl.n." + }, + "departureTime": "2019-08-06T19:03:00", + "trainNumber": "R 1262" + }, + "reservedTicket": { + "@type": "Ticket", + "ticketedSeat": { + "@type": "Seat", + "seatNumber": "21", + "seatSection": "1" + } + } + }, + { + "@context": "http://schema.org", + "@type": "TrainReservation", + "reservationFor": { + "@type": "TrainTrip", + "arrivalStation": { + "@type": "TrainStation", + "name": "Jaroměř" + }, + "arrivalTime": "2019-08-06T19:54:00", + "departureDay": "2019-08-06", + "departureStation": { + "@type": "TrainStation", + "name": "Hradec Králové hl.n." + }, + "departureTime": "2019-08-06T19:33:00", + "trainNumber": "Bus 501262" + } + } +] diff --git a/src/extractors/czechrailways.js b/src/extractors/czechrailways.js index 2f2fb80..774a566 100644 --- a/src/extractors/czechrailways.js +++ b/src/extractors/czechrailways.js @@ -1,155 +1,161 @@ /* Copyright (c) 2017 Volker Krause Copyright (c) 2018 Daniel Vrátil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ function isHeaderOrFooter(line) { return line.search(/(Jízdní řád( a rezervace)?|Jízdenku lze použít po stejné trase|Jízdní doklad zakoupený u obchodníka)/) >= 0; } function createSeat(res) { if (!res.reservedTicket) res.reservedTicket = JsonLd.newObject("Ticket"); if (!res.reservedTicket.ticketedSeat) res.reservedTicket.ticketedSeat = JsonLd.newObject("Seat"); } function parseSeat(res, text) { var coach = text.match(/(\s+)(\d+)/); var idx = 0; if (coach) { createSeat(res); res.reservedTicket.ticketedSeat.seatSection = coach[2]; idx = coach.index + coach[1].length + coach[2].length; } var seat = text.substr(idx).match(/\s+(\d+)/); if (seat) { createSeat(res); res.reservedTicket.ticketedSeat.seatNumber = seat[1]; } } // There's no validity year anywhere in the ticket, so we take the purchase date and // if the trip month and day are after the purchase month and day we assume the // the ticket will become valid the same year it was purchased, otherwise we assume // the ticket is for next year. // This fails when you buy the ticket more than a year ahead of the trip, but I doubt // you can even do that with Czech Railways... function detectYear(tripDate, purchaseDate) { var tripDay = parseInt(tripDate[1]); var tripMonth = parseInt(tripDate[2]); var purchaseDay = parseInt(purchaseDate[2]); var purchaseMonth = parseInt(purchaseDate[3]); var purchaseYear = parseInt(purchaseDate[4]); if ((purchaseMonth < tripMonth) || (purchaseMonth === tripMonth) && (purchaseDay <= tripDay)) { return purchaseYear; } else { return purchaseYear + 1; } } function parseDeparture(res, line, purchaseDate) { res.reservationFor.departureStation = JsonLd.newObject("TrainStation"); var station = line.match(/^(.+?) /); if (!station) return; var idx = station.index + station[0].length; res.reservationFor.departureStation.name = station[1]; var dt = line.substr(idx).match(/([0-9]{2})\.([0-9]{2})\. ([0-9]{2}:[0-9]{2})/); if (dt) { idx += dt.index + dt[0].length; res.reservationFor.departureTime = JsonLd.toDateTime(dt[1] + ' ' + dt[2] + ' ' + detectYear(dt, purchaseDate) + ' ' + dt[3], "dd MM yyyy hh:mm", "cs"); } var trainId = line.substr(idx).match(/([a-zA-Z]+ [0-9a-zA-Z]+)/); if (trainId) { idx += trainId.index + trainId[0].length res.reservationFor.trainNumber = trainId[1]; } parseSeat(res, line.substr(idx)); } function parseArrival(res, line, purchaseDate) { res.reservationFor.arrivalStation = JsonLd.newObject("TrainStation"); var station = line.match(/^(.+?) /); if (!station) return; var idx = station.index + station[0].length; res.reservationFor.arrivalStation.name = station[1]; var dt = line.substr(idx).match(/([0-9]{2})\.([0-9]{2})\. ([0-9]{2}:[0-9]{2})/); if (dt) { idx += dt.index + dt[0].length; res.reservationFor.arrivalTime = JsonLd.toDateTime(dt[1] + ' ' + dt[2] + ' ' + detectYear(dt, purchaseDate) + ' ' + dt[3], "dd MM yyyy hh:mm", "cs"); } } +function findNextStationLineIndex(lines, start) { + var pos = start; + while (pos < lines.length && (lines[pos].startsWith(" ") || lines[pos].startsWith("Ref:"))) { + pos += 1; + } + return pos; +} + function parseLegs(text, purchaseDate) { var reservations = new Array(); var lines = text.split('\n'); var depIdx = 1, arrIdx = 2; while (depIdx < lines.length) { // stop when reaching the footer or the next itinerary header if (isHeaderOrFooter(lines[depIdx])) return reservations; var res = JsonLd.newObject("TrainReservation"); res.reservationFor = JsonLd.newObject("TrainTrip"); - arrIdx = depIdx + 1; + arrIdx = findNextStationLineIndex(lines, depIdx + 1) parseDeparture(res, lines[depIdx], purchaseDate); parseArrival(res, lines[arrIdx], purchaseDate); - depIdx = arrIdx + 1; + // Find the next leg - while (lines[depIdx].startsWith(" ")) { - depIdx += 1; - } + depIdx = findNextStationLineIndex(lines, arrIdx + 1); reservations.push(res); } return reservations; } function main(text) { var reservations = new Array(); var pos = 0; var purchaseDate = text.match(/([d|D]atum platby|UZP): ([0-9]{1,2})\.([0-9]{1,2})\.([0-9]{4})/) while (true) { // find itinerary headers var header = text.substr(pos).match(/Timetable( and Reservations)?/); if (!header) break; var idx = header.index + header[0].length; var timetableHeader = text.substr(pos + idx).match(/(Místo \/ Seat \/ Sitzplatz)/) idx = idx + timetableHeader.index + timetableHeader[0].length; reservations = reservations.concat(parseLegs(text.substr(pos + idx), purchaseDate)); if (idx == 0) break; pos += idx + 1; } var bookingRef = text.match(/Kód transakce:\s*([A-Z0-9]{6})\n/); for (var i = 0; bookingRef && i < reservations.length; ++i) reservations[i].reservationNumber = bookingRef[1]; return reservations; }