diff --git a/autotests/CMakeLists.txt b/autotests/CMakeLists.txt index b783fdf..913afff 100644 --- a/autotests/CMakeLists.txt +++ b/autotests/CMakeLists.txt @@ -1,22 +1,21 @@ add_definitions(-DSOURCE_DIR="${CMAKE_CURRENT_SOURCE_DIR}") ecm_add_test(stringutiltest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary) ecm_add_test(datatypestest.cpp LINK_LIBRARIES Qt5::Test Qt5::Qml KPim::Itinerary) ecm_add_test(jsonlddocumenttest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary) ecm_add_test(mergeutiltest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary) ecm_add_test(knowledgedbtest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary) ecm_add_test(airportdbtest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary) ecm_add_test(bcbpparsertest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary) ecm_add_test(uic9183parsertest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary) ecm_add_test(jsapitest.cpp ../src/jsapi/jsonld.cpp TEST_NAME jsapitest LINK_LIBRARIES Qt5::Test KPim::Itinerary Qt5::Qml) ecm_add_test(structureddataextractortest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary) ecm_add_test(pdfdocumenttest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary Qt5::Gui) ecm_add_test(htmldocumenttest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary) ecm_add_test(barcodedecodertest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary Qt5::Gui) -ecm_add_test(unstructureddataextractortest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary) ecm_add_test(pkpassextractortest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary KPim::PkPass) ecm_add_test(postprocessortest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary) if (TARGET KF5::CalendarCore) ecm_add_test(calendarhandlertest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary) ecm_add_test(extractortest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary KPim::PkPass) endif() diff --git a/autotests/unstructureddata/amadeus_1.txt b/autotests/extractordata/amadeus/amadeus_1.txt similarity index 100% rename from autotests/unstructureddata/amadeus_1.txt rename to autotests/extractordata/amadeus/amadeus_1.txt diff --git a/autotests/extractordata/amadeus/amadeus_1.txt.json b/autotests/extractordata/amadeus/amadeus_1.txt.json new file mode 100644 index 0000000..c6c8629 --- /dev/null +++ b/autotests/extractordata/amadeus/amadeus_1.txt.json @@ -0,0 +1,214 @@ +[ + { + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "DL", + "name": "DELTA AIR LINES" + }, + "arrivalAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "NL" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 52.308101654052734, + "longitude": 4.760280132293701 + }, + "iataCode": "AMS", + "name": "AMSTERDAM, NL (SCHIPHOL AIRPORT)" + }, + "arrivalTime": { + "@type": "QDateTime", + "@value": "2016-06-07T13:30:00+02:00", + "timezone": "Europe/Amsterdam" + }, + "departureAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "DE" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 52.55970001220703, + "longitude": 13.287799835205078 + }, + "iataCode": "TXL", + "name": "BERLIN, DE (TEGEL)" + }, + "departureDay": "2016-06-07", + "departureTime": { + "@type": "QDateTime", + "@value": "2016-06-07T12:10:00+02:00", + "timezone": "Europe/Berlin" + }, + "flightNumber": "9520" + }, + "reservationNumber": "123456" + }, + { + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "DL", + "name": "DELTA AIR LINES" + }, + "arrivalAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "US" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 42.212501525878906, + "longitude": -83.35330200195312 + }, + "iataCode": "DTW", + "name": "DETROIT, MI (METROPOLITAN WAYNE CO), TERMINAL EM" + }, + "arrivalTime": { + "@type": "QDateTime", + "@value": "2016-06-07T17:40:00-04:00", + "timezone": "America/Detroit" + }, + "departureAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "NL" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 52.308101654052734, + "longitude": 4.760280132293701 + }, + "iataCode": "AMS", + "name": "AMSTERDAM, NL (SCHIPHOL AIRPORT)" + }, + "departureDay": "2016-06-07", + "departureTime": { + "@type": "QDateTime", + "@value": "2016-06-07T15:00:00+02:00", + "timezone": "Europe/Amsterdam" + }, + "flightNumber": "139" + }, + "reservationNumber": "123456" + }, + { + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "DL", + "name": "DELTA AIR LINES" + }, + "arrivalAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "FR" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 49.009700775146484, + "longitude": 2.5477800369262695 + }, + "iataCode": "CDG", + "name": "PARIS, FR (CHARLES DE GAULLE), TERMINAL 2E" + }, + "arrivalTime": { + "@type": "QDateTime", + "@value": "2016-06-10T11:30:00+02:00", + "timezone": "Europe/Paris" + }, + "departureAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "US" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 42.212501525878906, + "longitude": -83.35330200195312 + }, + "iataCode": "DTW", + "name": "DETROIT, MI (METROPOLITAN WAYNE CO), TERMINAL EM" + }, + "departureDay": "2016-06-09", + "departureTime": { + "@type": "QDateTime", + "@value": "2016-06-09T21:40:00-04:00", + "timezone": "America/Detroit" + }, + "flightNumber": "8573" + }, + "reservationNumber": "123456" + }, + { + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "DL", + "name": "DELTA AIR LINES" + }, + "arrivalAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "DE" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 52.55970001220703, + "longitude": 13.287799835205078 + }, + "iataCode": "TXL", + "name": "BERLIN, DE (TEGEL)" + }, + "arrivalTime": { + "@type": "QDateTime", + "@value": "2016-06-10T14:40:00+02:00", + "timezone": "Europe/Berlin" + }, + "departureAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "FR" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 49.009700775146484, + "longitude": 2.5477800369262695 + }, + "iataCode": "CDG", + "name": "PARIS, FR (CHARLES DE GAULLE), TERMINAL 2F" + }, + "departureDay": "2016-06-10", + "departureTime": { + "@type": "QDateTime", + "@value": "2016-06-10T13:00:00+02:00", + "timezone": "Europe/Paris" + }, + "flightNumber": "8680" + }, + "reservationNumber": "123456" + } +] diff --git a/autotests/extractordata/amadeus/context.eml b/autotests/extractordata/amadeus/context.eml new file mode 100644 index 0000000..ccefef5 --- /dev/null +++ b/autotests/extractordata/amadeus/context.eml @@ -0,0 +1 @@ +From: "Travel Document" diff --git a/autotests/unstructureddata/aohostels_1.txt b/autotests/extractordata/aohostels/aohostels_1.txt similarity index 100% rename from autotests/unstructureddata/aohostels_1.txt rename to autotests/extractordata/aohostels/aohostels_1.txt diff --git a/autotests/unstructureddata/aohostels_1.json b/autotests/extractordata/aohostels/aohostels_1.txt.json similarity index 69% rename from autotests/unstructureddata/aohostels_1.json rename to autotests/extractordata/aohostels/aohostels_1.txt.json index 998bc7c..c1d4fd1 100644 --- a/autotests/unstructureddata/aohostels_1.json +++ b/autotests/extractordata/aohostels/aohostels_1.txt.json @@ -1,31 +1,39 @@ [ { "@context": "http://schema.org", "@type": "LodgingReservation", - "checkinTime": "2018-08-10T15:00:00", - "checkoutTime": "2018-08-18T10:00:00", + "checkinTime": { + "@type": "QDateTime", + "@value": "2018-08-10T15:00:00+02:00", + "timezone": "Europe/Vienna" + }, + "checkoutTime": { + "@type": "QDateTime", + "@value": "2018-08-18T10:00:00+02:00", + "timezone": "Europe/Vienna" + }, "reservationFor": { "@type": "LodgingBusiness", "address": { "@type": "PostalAddress", - "addressCountry": "Österreich", + "addressCountry": "AT", "addressLocality": "Wien", "postalCode": "1100", "streetAddress": "Sonnwendgasse 11" }, + "email": "AO-XX-XX@aohostels.com", "geo": { "@type": "GeoCoordinates", "latitude": 48.1828498840332, "longitude": 16.37863540649414 }, - "email": "AO-XX-XX@aohostels.com", "name": "a&o Wien Hauptbahnhof", "telephone": "+43 1 602 1234 5678" }, "reservationNumber": "AOI-XX-1234567", "underName": { "@type": "Person", "name": "John Doe" } } ] diff --git a/autotests/extractordata/aohostels/context.eml b/autotests/extractordata/aohostels/context.eml new file mode 100644 index 0000000..3de613f --- /dev/null +++ b/autotests/extractordata/aohostels/context.eml @@ -0,0 +1 @@ +From: A&O Service Center diff --git a/autotests/unstructureddata/brusselsairlines_1.html b/autotests/extractordata/brusselsairlines/brusselsairlines_1.html similarity index 100% rename from autotests/unstructureddata/brusselsairlines_1.html rename to autotests/extractordata/brusselsairlines/brusselsairlines_1.html diff --git a/autotests/extractordata/brusselsairlines/brusselsairlines_1.html.json b/autotests/extractordata/brusselsairlines/brusselsairlines_1.html.json new file mode 100644 index 0000000..a3068b4 --- /dev/null +++ b/autotests/extractordata/brusselsairlines/brusselsairlines_1.html.json @@ -0,0 +1,108 @@ +[ + { + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "SN", + "name": "Brussels Airlines" + }, + "arrivalAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "BE" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 50.90140151977539, + "longitude": 4.484439849853516 + }, + "iataCode": "BRU", + "name": "Brussels Airport, BE" + }, + "arrivalTime": { + "@type": "QDateTime", + "@value": "2017-02-03T19:45:00+01:00", + "timezone": "Europe/Brussels" + }, + "departureAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "DE" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 52.55970001220703, + "longitude": 13.287799835205078 + }, + "iataCode": "TXL", + "name": "Berlin, Tegel Airport, DE" + }, + "departureDay": "2017-02-03", + "departureTime": { + "@type": "QDateTime", + "@value": "2017-02-03T18:25:00+01:00", + "timezone": "Europe/Berlin" + }, + "flightNumber": "2588" + }, + "reservationNumber": "XXX007" + }, + { + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "SN", + "name": "Brussels Airlines" + }, + "arrivalAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "DE" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 52.55970001220703, + "longitude": 13.287799835205078 + }, + "iataCode": "TXL", + "name": "Berlin, Tegel Airport, DE" + }, + "arrivalTime": { + "@type": "QDateTime", + "@value": "2017-02-05T22:00:00+01:00", + "timezone": "Europe/Berlin" + }, + "departureAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "BE" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 50.90140151977539, + "longitude": 4.484439849853516 + }, + "iataCode": "BRU", + "name": "Brussels Airport, BE" + }, + "departureDay": "2017-02-05", + "departureTime": { + "@type": "QDateTime", + "@value": "2017-02-05T20:40:00+01:00", + "timezone": "Europe/Brussels" + }, + "flightNumber": "2591" + }, + "reservationNumber": "XXX007" + } +] diff --git a/autotests/extractordata/brusselsairlines/context.eml b/autotests/extractordata/brusselsairlines/context.eml new file mode 100644 index 0000000..0fa124d --- /dev/null +++ b/autotests/extractordata/brusselsairlines/context.eml @@ -0,0 +1 @@ +From: Brussels Airlines diff --git a/autotests/extractordata/czechrailways/context.eml b/autotests/extractordata/czechrailways/context.eml new file mode 100644 index 0000000..54d3a57 --- /dev/null +++ b/autotests/extractordata/czechrailways/context.eml @@ -0,0 +1 @@ +From: info@cd.cz diff --git a/autotests/unstructureddata/czechrailways_one-leg-no-seat-single.txt b/autotests/extractordata/czechrailways/czechrailways_one-leg-no-seat-single.txt similarity index 100% rename from autotests/unstructureddata/czechrailways_one-leg-no-seat-single.txt rename to autotests/extractordata/czechrailways/czechrailways_one-leg-no-seat-single.txt diff --git a/autotests/unstructureddata/czechrailways_one-leg-no-seat-single.json b/autotests/extractordata/czechrailways/czechrailways_one-leg-no-seat-single.txt.json similarity index 100% rename from autotests/unstructureddata/czechrailways_one-leg-no-seat-single.json rename to autotests/extractordata/czechrailways/czechrailways_one-leg-no-seat-single.txt.json diff --git a/autotests/unstructureddata/czechrailways_one-leg-return.txt b/autotests/extractordata/czechrailways/czechrailways_one-leg-return.txt similarity index 100% rename from autotests/unstructureddata/czechrailways_one-leg-return.txt rename to autotests/extractordata/czechrailways/czechrailways_one-leg-return.txt diff --git a/autotests/unstructureddata/czechrailways_one-leg-return.json b/autotests/extractordata/czechrailways/czechrailways_one-leg-return.txt.json similarity index 94% rename from autotests/unstructureddata/czechrailways_one-leg-return.json rename to autotests/extractordata/czechrailways/czechrailways_one-leg-return.txt.json index c4d7bc9..d0fe06b 100644 --- a/autotests/unstructureddata/czechrailways_one-leg-return.json +++ b/autotests/extractordata/czechrailways/czechrailways_one-leg-return.txt.json @@ -1,54 +1,54 @@ [ { - "@context": "http://schema.org", - "@type": "TrainReservation", + "@context": "http://schema.org", + "@type": "TrainReservation", "reservationFor": { "@type": "TrainTrip", "arrivalStation": { "@type": "TrainStation", "name": "Brno hl.n." }, "arrivalTime": "2017-12-31T16:20:00", "departureStation": { "@type": "TrainStation", "name": "Praha hl.n." }, "departureTime": "2017-12-31T13:51:00", "trainNumber": "EC 173" }, "reservedTicket": { "@type": "Ticket", "ticketedSeat": { "@type": "Seat", "seatNumber": "71", "seatSection": "262" } } }, { "@context": "http://schema.org", "@type": "TrainReservation", "reservationFor": { "@type": "TrainTrip", "arrivalStation": { "@type": "TrainStation", "name": "Praha hl.n." }, "arrivalTime": "2018-01-01T13:07:00", "departureStation": { "@type": "TrainStation", "name": "Brno hl.n." }, "departureTime": "2018-01-01T10:38:00", "trainNumber": "rj 72" }, "reservedTicket": { "@type": "Ticket", "ticketedSeat": { "@type": "Seat", "seatNumber": "51", "seatSection": "27" } } } ] diff --git a/autotests/unstructureddata/czechrailways_one-leg-single.txt b/autotests/extractordata/czechrailways/czechrailways_one-leg-single.txt similarity index 100% rename from autotests/unstructureddata/czechrailways_one-leg-single.txt rename to autotests/extractordata/czechrailways/czechrailways_one-leg-single.txt diff --git a/autotests/unstructureddata/czechrailways_one-leg-single.json b/autotests/extractordata/czechrailways/czechrailways_one-leg-single.txt.json similarity index 100% rename from autotests/unstructureddata/czechrailways_one-leg-single.json rename to autotests/extractordata/czechrailways/czechrailways_one-leg-single.txt.json diff --git a/autotests/unstructureddata/czechrailways_two-leg-single.txt b/autotests/extractordata/czechrailways/czechrailways_two-leg-single.txt similarity index 100% rename from autotests/unstructureddata/czechrailways_two-leg-single.txt rename to autotests/extractordata/czechrailways/czechrailways_two-leg-single.txt diff --git a/autotests/unstructureddata/czechrailways_two-leg-single.json b/autotests/extractordata/czechrailways/czechrailways_two-leg-single.txt.json similarity index 100% rename from autotests/unstructureddata/czechrailways_two-leg-single.json rename to autotests/extractordata/czechrailways/czechrailways_two-leg-single.txt.json diff --git a/autotests/extractordata/deutschebahn/context.eml b/autotests/extractordata/deutschebahn/context.eml new file mode 100644 index 0000000..1471641 --- /dev/null +++ b/autotests/extractordata/deutschebahn/context.eml @@ -0,0 +1 @@ +From: buchungsbestaetigung@bahn.de diff --git a/autotests/unstructureddata/deutschebahn_one-leg-return-international.txt b/autotests/extractordata/deutschebahn/deutschebahn_one-leg-return-international.txt similarity index 100% rename from autotests/unstructureddata/deutschebahn_one-leg-return-international.txt rename to autotests/extractordata/deutschebahn/deutschebahn_one-leg-return-international.txt diff --git a/autotests/unstructureddata/deutschebahn_one-leg-return-international.json b/autotests/extractordata/deutschebahn/deutschebahn_one-leg-return-international.txt.json similarity index 100% rename from autotests/unstructureddata/deutschebahn_one-leg-return-international.json rename to autotests/extractordata/deutschebahn/deutschebahn_one-leg-return-international.txt.json diff --git a/autotests/unstructureddata/deutschebahn_one-leg-return-no-seat.txt b/autotests/extractordata/deutschebahn/deutschebahn_one-leg-return-no-seat.txt similarity index 100% rename from autotests/unstructureddata/deutschebahn_one-leg-return-no-seat.txt rename to autotests/extractordata/deutschebahn/deutschebahn_one-leg-return-no-seat.txt diff --git a/autotests/unstructureddata/deutschebahn_one-leg-return-no-seat.json b/autotests/extractordata/deutschebahn/deutschebahn_one-leg-return-no-seat.txt.json similarity index 100% rename from autotests/unstructureddata/deutschebahn_one-leg-return-no-seat.json rename to autotests/extractordata/deutschebahn/deutschebahn_one-leg-return-no-seat.txt.json diff --git a/autotests/unstructureddata/deutschebahn_one-leg-single-international.txt b/autotests/extractordata/deutschebahn/deutschebahn_one-leg-single-international.txt similarity index 100% rename from autotests/unstructureddata/deutschebahn_one-leg-single-international.txt rename to autotests/extractordata/deutschebahn/deutschebahn_one-leg-single-international.txt diff --git a/autotests/unstructureddata/deutschebahn_one-leg-single-international.json b/autotests/extractordata/deutschebahn/deutschebahn_one-leg-single-international.txt.json similarity index 100% rename from autotests/unstructureddata/deutschebahn_one-leg-single-international.json rename to autotests/extractordata/deutschebahn/deutschebahn_one-leg-single-international.txt.json diff --git a/autotests/unstructureddata/deutschebahn_two-leg-return.txt b/autotests/extractordata/deutschebahn/deutschebahn_two-leg-return.txt similarity index 100% rename from autotests/unstructureddata/deutschebahn_two-leg-return.txt rename to autotests/extractordata/deutschebahn/deutschebahn_two-leg-return.txt diff --git a/autotests/unstructureddata/deutschebahn_two-leg-return.json b/autotests/extractordata/deutschebahn/deutschebahn_two-leg-return.txt.json similarity index 97% rename from autotests/unstructureddata/deutschebahn_two-leg-return.json rename to autotests/extractordata/deutschebahn/deutschebahn_two-leg-return.txt.json index 3cb177d..7540de9 100644 --- a/autotests/unstructureddata/deutschebahn_two-leg-return.json +++ b/autotests/extractordata/deutschebahn/deutschebahn_two-leg-return.txt.json @@ -1,102 +1,102 @@ [ { - "@context": "http://schema.org", - "@type": "TrainReservation", + "@context": "http://schema.org", + "@type": "TrainReservation", "reservationFor": { "@type": "TrainTrip", "arrivalPlatform": "6 D-G", "arrivalStation": { "@type": "TrainStation", "name": "Köln Hbf" }, "arrivalTime": "2027-11-03T12:09:00", "departurePlatform": "4A-D", "departureStation": { "@type": "TrainStation", "name": "Berlin Hbf (tief)" }, "departureTime": "2027-11-03T07:49:00", "trainNumber": "ICE 954" }, "reservationNumber": "XXX007", "reservedTicket": { "@type": "Ticket", "ticketedSeat": { "@type": "Seat", "seatNumber": "61", "seatSection": "34" } } }, { "@context": "http://schema.org", "@type": "TrainReservation", "reservationFor": { "@type": "TrainTrip", "arrivalPlatform": "3", "arrivalStation": { "@type": "TrainStation", "name": "Somewhere(Specific)" }, "arrivalTime": "2027-11-03T12:45:00", "departurePlatform": "10 A-B", "departureStation": { "@type": "TrainStation", "name": "Köln Hbf" }, "departureTime": "2027-11-03T12:17:00", "trainNumber": "S0" }, "reservationNumber": "XXX007" }, { "@context": "http://schema.org", "@type": "TrainReservation", "reservationFor": { "@type": "TrainTrip", "arrivalPlatform": "14", "arrivalStation": { "@type": "TrainStation", "name": "Düsseldorf Hbf" }, "arrivalTime": "2027-11-04T14:28:00", "departurePlatform": "3", "departureStation": { "@type": "TrainStation", "name": "Somewhere(Specific)" }, "departureTime": "2027-11-04T14:06:00", "trainNumber": "S0" }, "reservationNumber": "XXX007" }, { "@context": "http://schema.org", "@type": "TrainReservation", "reservationFor": { "@type": "TrainTrip", "arrivalPlatform": "", "arrivalStation": { "@type": "TrainStation", "name": "Berlin Hbf (tief)" }, "arrivalTime": "2027-11-04T19:27:00", "departurePlatform": "19", "departureStation": { "@type": "TrainStation", "name": "Düsseldorf Hbf" }, "departureTime": "2027-11-04T14:52:00", "trainNumber": "ICE 641" }, "reservationNumber": "XXX007", "reservedTicket": { "@type": "Ticket", "ticketedSeat": { "@type": "Seat", "seatNumber": "85", "seatSection": "22" } } } ] diff --git a/autotests/unstructureddata/deutschebahn_two-leg-single.txt b/autotests/extractordata/deutschebahn/deutschebahn_two-leg-single.txt similarity index 100% rename from autotests/unstructureddata/deutschebahn_two-leg-single.txt rename to autotests/extractordata/deutschebahn/deutschebahn_two-leg-single.txt diff --git a/autotests/unstructureddata/deutschebahn_two-leg-single.json b/autotests/extractordata/deutschebahn/deutschebahn_two-leg-single.txt.json similarity index 100% rename from autotests/unstructureddata/deutschebahn_two-leg-single.json rename to autotests/extractordata/deutschebahn/deutschebahn_two-leg-single.txt.json diff --git a/autotests/extractordata/eurowings/context.eml b/autotests/extractordata/eurowings/context.eml new file mode 100644 index 0000000..029e898 --- /dev/null +++ b/autotests/extractordata/eurowings/context.eml @@ -0,0 +1 @@ +From: Booking diff --git a/autotests/unstructureddata/eurowings_1.txt b/autotests/extractordata/eurowings/eurowings_1.txt similarity index 100% rename from autotests/unstructureddata/eurowings_1.txt rename to autotests/extractordata/eurowings/eurowings_1.txt diff --git a/autotests/extractordata/eurowings/eurowings_1.txt.json b/autotests/extractordata/eurowings/eurowings_1.txt.json new file mode 100644 index 0000000..68169fb --- /dev/null +++ b/autotests/extractordata/eurowings/eurowings_1.txt.json @@ -0,0 +1,108 @@ +[ + { + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "4U", + "name": "Germanwings" + }, + "arrivalAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "GB" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 51.477500915527344, + "longitude": -0.4613890051841736 + }, + "iataCode": "LHR", + "name": "London Heathrow" + }, + "arrivalTime": { + "@type": "QDateTime", + "@value": "2017-06-15T13:50:00+01:00", + "timezone": "Europe/London" + }, + "departureAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "DE" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 52.55970001220703, + "longitude": 13.287799835205078 + }, + "iataCode": "TXL", + "name": "Berlin-Tegel" + }, + "departureDay": "2017-06-15", + "departureTime": { + "@type": "QDateTime", + "@value": "2017-06-15T12:55:00+02:00", + "timezone": "Europe/Berlin" + }, + "flightNumber": "8462" + }, + "reservationNumber": "ABC123" + }, + { + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "4U", + "name": "Germanwings" + }, + "arrivalAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "DE" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 52.55970001220703, + "longitude": 13.287799835205078 + }, + "iataCode": "TXL", + "name": "Berlin-Tegel" + }, + "arrivalTime": { + "@type": "QDateTime", + "@value": "2017-06-18T22:00:00+02:00", + "timezone": "Europe/Berlin" + }, + "departureAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "GB" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 51.477500915527344, + "longitude": -0.4613890051841736 + }, + "iataCode": "LHR", + "name": "London Heathrow" + }, + "departureDay": "2017-06-18", + "departureTime": { + "@type": "QDateTime", + "@value": "2017-06-18T19:10:00+01:00", + "timezone": "Europe/London" + }, + "flightNumber": "8465" + }, + "reservationNumber": "ABC123" + } +] diff --git a/autotests/extractordata/fcm/context.eml b/autotests/extractordata/fcm/context.eml new file mode 100644 index 0000000..c222c43 --- /dev/null +++ b/autotests/extractordata/fcm/context.eml @@ -0,0 +1 @@ +From: Travellink Corporate diff --git a/autotests/unstructureddata/fcmtravel_1.txt b/autotests/extractordata/fcm/fcmtravel_1.txt similarity index 100% rename from autotests/unstructureddata/fcmtravel_1.txt rename to autotests/extractordata/fcm/fcmtravel_1.txt diff --git a/autotests/extractordata/fcm/fcmtravel_1.txt.json b/autotests/extractordata/fcm/fcmtravel_1.txt.json new file mode 100644 index 0000000..c332394 --- /dev/null +++ b/autotests/extractordata/fcm/fcmtravel_1.txt.json @@ -0,0 +1,116 @@ +[ + { + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "BT", + "name": "Air Baltic" + }, + "arrivalAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "LV" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 56.92359924316406, + "longitude": 23.971099853515625 + }, + "iataCode": "RIX", + "name": "Riga" + }, + "arrivalTime": { + "@type": "QDateTime", + "@value": "2017-11-05T11:35:00+02:00", + "timezone": "Europe/Riga" + }, + "departureAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "DE" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 52.55970001220703, + "longitude": 13.287799835205078 + }, + "iataCode": "TXL", + "name": "Tegel" + }, + "departureDay": "2017-11-05", + "departureTime": { + "@type": "QDateTime", + "@value": "2017-11-05T08:55:00+01:00", + "timezone": "Europe/Berlin" + }, + "flightNumber": "212" + }, + "reservationNumber": "ABCDEF", + "underName": { + "@type": "Person", + "name": "John Doe" + } + }, + { + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "BT", + "name": "Air Baltic" + }, + "arrivalAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "DE" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 52.55970001220703, + "longitude": 13.287799835205078 + }, + "iataCode": "TXL", + "name": "Tegel" + }, + "arrivalTime": { + "@type": "QDateTime", + "@value": "2017-11-10T08:20:00+01:00", + "timezone": "Europe/Berlin" + }, + "departureAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "LV" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 56.92359924316406, + "longitude": 23.971099853515625 + }, + "iataCode": "RIX", + "name": "Riga" + }, + "departureDay": "2017-11-10", + "departureTime": { + "@type": "QDateTime", + "@value": "2017-11-10T07:35:00+02:00", + "timezone": "Europe/Riga" + }, + "flightNumber": "211" + }, + "reservationNumber": "ABCDEF", + "underName": { + "@type": "Person", + "name": "John Doe" + } + } +] diff --git a/autotests/unstructureddata/fcmtravel_2.txt b/autotests/extractordata/fcm/fcmtravel_2.txt similarity index 100% rename from autotests/unstructureddata/fcmtravel_2.txt rename to autotests/extractordata/fcm/fcmtravel_2.txt diff --git a/autotests/extractordata/fcm/fcmtravel_2.txt.json b/autotests/extractordata/fcm/fcmtravel_2.txt.json new file mode 100644 index 0000000..2fc1e8f --- /dev/null +++ b/autotests/extractordata/fcm/fcmtravel_2.txt.json @@ -0,0 +1,458 @@ +[ + { + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "AB", + "name": "Air Berlin" + }, + "arrivalAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "DE" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 51.289398193359375, + "longitude": 6.766670227050781 + }, + "iataCode": "DUS", + "name": "International Airport" + }, + "arrivalTime": { + "@type": "QDateTime", + "@value": "2016-10-17T12:00:00+02:00", + "timezone": "Europe/Berlin" + }, + "departureAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "DE" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 52.55970001220703, + "longitude": 13.287799835205078 + }, + "iataCode": "TXL", + "name": "Tegel" + }, + "departureDay": "2016-10-17", + "departureTime": { + "@type": "QDateTime", + "@value": "2016-10-17T10:50:00+02:00", + "timezone": "Europe/Berlin" + }, + "flightNumber": "6439" + }, + "reservationNumber": "XXX007", + "underName": { + "@type": "Person", + "name": "Jane Doe" + } + }, + { + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "AB", + "name": "Air Berlin" + }, + "arrivalAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "DE" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 51.289398193359375, + "longitude": 6.766670227050781 + }, + "iataCode": "DUS", + "name": "International Airport" + }, + "arrivalTime": { + "@type": "QDateTime", + "@value": "2016-10-17T12:00:00+02:00", + "timezone": "Europe/Berlin" + }, + "departureAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "DE" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 52.55970001220703, + "longitude": 13.287799835205078 + }, + "iataCode": "TXL", + "name": "Tegel" + }, + "departureDay": "2016-10-17", + "departureTime": { + "@type": "QDateTime", + "@value": "2016-10-17T10:50:00+02:00", + "timezone": "Europe/Berlin" + }, + "flightNumber": "6439" + }, + "reservationNumber": "XXX007", + "underName": { + "@type": "Person", + "name": "John Doe" + } + }, + { + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "AB", + "name": "Air Berlin" + }, + "arrivalAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "US" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 37.618900299072266, + "longitude": -122.375 + }, + "iataCode": "SFO", + "name": "San Francisco International" + }, + "arrivalTime": { + "@type": "QDateTime", + "@value": "2016-10-17T15:55:00-07:00", + "timezone": "America/Los_Angeles" + }, + "departureAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "DE" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 51.289398193359375, + "longitude": 6.766670227050781 + }, + "iataCode": "DUS", + "name": "International Airport" + }, + "departureDay": "2016-10-17", + "departureTime": { + "@type": "QDateTime", + "@value": "2016-10-17T13:20:00+02:00", + "timezone": "Europe/Berlin" + }, + "flightNumber": "7392" + }, + "reservationNumber": "XXX007", + "underName": { + "@type": "Person", + "name": "Jane Doe" + } + }, + { + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "AB", + "name": "Air Berlin" + }, + "arrivalAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "US" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 37.618900299072266, + "longitude": -122.375 + }, + "iataCode": "SFO", + "name": "San Francisco International" + }, + "arrivalTime": { + "@type": "QDateTime", + "@value": "2016-10-17T15:55:00-07:00", + "timezone": "America/Los_Angeles" + }, + "departureAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "DE" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 51.289398193359375, + "longitude": 6.766670227050781 + }, + "iataCode": "DUS", + "name": "International Airport" + }, + "departureDay": "2016-10-17", + "departureTime": { + "@type": "QDateTime", + "@value": "2016-10-17T13:20:00+02:00", + "timezone": "Europe/Berlin" + }, + "flightNumber": "7392" + }, + "reservationNumber": "XXX007", + "underName": { + "@type": "Person", + "name": "John Doe" + } + }, + { + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "AA", + "name": "American Airlines" + }, + "arrivalAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "US" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 41.978599548339844, + "longitude": -87.90470123291016 + }, + "iataCode": "ORD", + "name": "O Hare International" + }, + "arrivalTime": { + "@type": "QDateTime", + "@value": "2016-10-22T14:00:00-05:00", + "timezone": "America/Chicago" + }, + "departureAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "US" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 45.588600158691406, + "longitude": -122.5979995727539 + }, + "iataCode": "PDX", + "name": "Portland International" + }, + "departureDay": "2016-10-22", + "departureTime": { + "@type": "QDateTime", + "@value": "2016-10-22T08:00:00-07:00", + "timezone": "America/Los_Angeles" + }, + "flightNumber": "086" + }, + "reservationNumber": "XXX007", + "underName": { + "@type": "Person", + "name": "Jane Doe" + } + }, + { + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "AA", + "name": "American Airlines" + }, + "arrivalAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "US" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 41.978599548339844, + "longitude": -87.90470123291016 + }, + "iataCode": "ORD", + "name": "O Hare International" + }, + "arrivalTime": { + "@type": "QDateTime", + "@value": "2016-10-22T14:00:00-05:00", + "timezone": "America/Chicago" + }, + "departureAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "US" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 45.588600158691406, + "longitude": -122.5979995727539 + }, + "iataCode": "PDX", + "name": "Portland International" + }, + "departureDay": "2016-10-22", + "departureTime": { + "@type": "QDateTime", + "@value": "2016-10-22T08:00:00-07:00", + "timezone": "America/Los_Angeles" + }, + "flightNumber": "086" + }, + "reservationNumber": "XXX007", + "underName": { + "@type": "Person", + "name": "John Doe" + } + }, + { + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "AB", + "name": "Air Berlin" + }, + "arrivalAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "DE" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 52.55970001220703, + "longitude": 13.287799835205078 + }, + "iataCode": "TXL", + "name": "Tegel" + }, + "arrivalTime": { + "@type": "QDateTime", + "@value": "2016-10-23T07:00:00+02:00", + "timezone": "Europe/Berlin" + }, + "departureAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "US" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 41.978599548339844, + "longitude": -87.90470123291016 + }, + "iataCode": "ORD", + "name": "O Hare International" + }, + "departureDay": "2016-10-22", + "departureTime": { + "@type": "QDateTime", + "@value": "2016-10-22T15:25:00-05:00", + "timezone": "America/Chicago" + }, + "flightNumber": "7421" + }, + "reservationNumber": "XXX007", + "underName": { + "@type": "Person", + "name": "Jane Doe" + } + }, + { + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "AB", + "name": "Air Berlin" + }, + "arrivalAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "DE" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 52.55970001220703, + "longitude": 13.287799835205078 + }, + "iataCode": "TXL", + "name": "Tegel" + }, + "arrivalTime": { + "@type": "QDateTime", + "@value": "2016-10-23T07:00:00+02:00", + "timezone": "Europe/Berlin" + }, + "departureAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "US" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 41.978599548339844, + "longitude": -87.90470123291016 + }, + "iataCode": "ORD", + "name": "O Hare International" + }, + "departureDay": "2016-10-22", + "departureTime": { + "@type": "QDateTime", + "@value": "2016-10-22T15:25:00-05:00", + "timezone": "America/Chicago" + }, + "flightNumber": "7421" + }, + "reservationNumber": "XXX007", + "underName": { + "@type": "Person", + "name": "John Doe" + } + } +] diff --git a/autotests/extractordata/iberia/context.eml b/autotests/extractordata/iberia/context.eml new file mode 100644 index 0000000..d6721a9 --- /dev/null +++ b/autotests/extractordata/iberia/context.eml @@ -0,0 +1,2 @@ +From: IBERIA L A E +Date: Fri, 29 Dec 2017 18:46:02 +0100 diff --git a/autotests/unstructureddata/iberia_1.txt b/autotests/extractordata/iberia/iberia_1.txt similarity index 100% rename from autotests/unstructureddata/iberia_1.txt rename to autotests/extractordata/iberia/iberia_1.txt diff --git a/autotests/extractordata/iberia/iberia_1.txt.json b/autotests/extractordata/iberia/iberia_1.txt.json new file mode 100644 index 0000000..e1c7530 --- /dev/null +++ b/autotests/extractordata/iberia/iberia_1.txt.json @@ -0,0 +1,213 @@ +[ + { + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "IB", + "name": "Iberia Express" + }, + "arrivalAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "ES" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 40.472198486328125, + "longitude": -3.5608301162719727 + }, + "iataCode": "MAD", + "name": "Madrid" + }, + "arrivalTime": { + "@type": "QDateTime", + "@value": "2018-07-20T15:35:00+02:00", + "timezone": "Europe/Madrid" + }, + "departureAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "DE" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 52.55970001220703, + "longitude": 13.287799835205078 + }, + "iataCode": "TXL", + "name": "Berlin" + }, + "departureDay": "2018-07-20", + "departureTime": { + "@type": "QDateTime", + "@value": "2018-07-20T12:25:00+02:00", + "timezone": "Europe/Berlin" + }, + "flightNumber": "3677" + }, + "reservationNumber": "XXX007" + }, + { + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "IB", + "name": "Iberia Regional Air Nostrum" + }, + "arrivalAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "ES" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 36.84389877319336, + "longitude": -2.369999885559082 + }, + "iataCode": "LEI", + "name": "Almeria" + }, + "arrivalTime": { + "@type": "QDateTime", + "@value": "2018-07-20T19:00:00+02:00", + "timezone": "Europe/Madrid" + }, + "departureAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "ES" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 40.472198486328125, + "longitude": -3.5608301162719727 + }, + "iataCode": "MAD", + "name": "Madrid" + }, + "departureDay": "2018-07-20", + "departureTime": { + "@type": "QDateTime", + "@value": "2018-07-20T17:50:00+02:00", + "timezone": "Europe/Madrid" + }, + "flightNumber": "8588" + }, + "reservationNumber": "XXX007" + }, + { + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "IB", + "name": "Iberia Regional Air Nostrum" + }, + "arrivalAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "ES" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 40.472198486328125, + "longitude": -3.5608301162719727 + }, + "iataCode": "MAD", + "name": "Madrid" + }, + "arrivalTime": { + "@type": "QDateTime", + "@value": "2018-07-28T08:25:00+02:00", + "timezone": "Europe/Madrid" + }, + "departureAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "ES" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 36.84389877319336, + "longitude": -2.369999885559082 + }, + "iataCode": "LEI", + "name": "Almeria" + }, + "departureDay": "2018-07-28", + "departureTime": { + "@type": "QDateTime", + "@value": "2018-07-28T07:15:00+02:00", + "timezone": "Europe/Madrid" + }, + "flightNumber": "8603" + }, + "reservationNumber": "XXX007" + }, + { + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "IB" + }, + "arrivalAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "DE" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 52.55970001220703, + "longitude": 13.287799835205078 + }, + "iataCode": "TXL", + "name": "Berlin" + }, + "arrivalTime": { + "@type": "QDateTime", + "@value": "2018-07-28T15:00:00+02:00", + "timezone": "Europe/Berlin" + }, + "departureAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "ES" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 40.472198486328125, + "longitude": -3.5608301162719727 + }, + "iataCode": "MAD", + "name": "Madrid" + }, + "departureDay": "2018-07-28", + "departureTime": { + "@type": "QDateTime", + "@value": "2018-07-28T12:00:00+02:00", + "timezone": "Europe/Madrid" + }, + "flightNumber": "3186" + }, + "reservationNumber": "XXX007" + } +] diff --git a/autotests/extractordata/regiojet/context.eml b/autotests/extractordata/regiojet/context.eml new file mode 100644 index 0000000..bbf0bf3 --- /dev/null +++ b/autotests/extractordata/regiojet/context.eml @@ -0,0 +1 @@ +From: info@regiojet.cz diff --git a/autotests/unstructureddata/regiojet_bus_cs-one-leg-return.txt b/autotests/extractordata/regiojet/regiojet_bus_cs-one-leg-return.txt similarity index 100% rename from autotests/unstructureddata/regiojet_bus_cs-one-leg-return.txt rename to autotests/extractordata/regiojet/regiojet_bus_cs-one-leg-return.txt diff --git a/autotests/unstructureddata/regiojet_bus_cs-one-leg-return.json b/autotests/extractordata/regiojet/regiojet_bus_cs-one-leg-return.txt.json similarity index 100% rename from autotests/unstructureddata/regiojet_bus_cs-one-leg-return.json rename to autotests/extractordata/regiojet/regiojet_bus_cs-one-leg-return.txt.json diff --git a/autotests/unstructureddata/regiojet_bus_cs-one-leg-single.txt b/autotests/extractordata/regiojet/regiojet_bus_cs-one-leg-single.txt similarity index 100% rename from autotests/unstructureddata/regiojet_bus_cs-one-leg-single.txt rename to autotests/extractordata/regiojet/regiojet_bus_cs-one-leg-single.txt diff --git a/autotests/unstructureddata/regiojet_bus_cs-one-leg-single.json b/autotests/extractordata/regiojet/regiojet_bus_cs-one-leg-single.txt.json similarity index 100% rename from autotests/unstructureddata/regiojet_bus_cs-one-leg-single.json rename to autotests/extractordata/regiojet/regiojet_bus_cs-one-leg-single.txt.json diff --git a/autotests/unstructureddata/regiojet_bus_en-one-leg-single.txt b/autotests/extractordata/regiojet/regiojet_bus_en-one-leg-single.txt similarity index 100% rename from autotests/unstructureddata/regiojet_bus_en-one-leg-single.txt rename to autotests/extractordata/regiojet/regiojet_bus_en-one-leg-single.txt diff --git a/autotests/unstructureddata/regiojet_bus_en-one-leg-single.json b/autotests/extractordata/regiojet/regiojet_bus_en-one-leg-single.txt.json similarity index 100% rename from autotests/unstructureddata/regiojet_bus_en-one-leg-single.json rename to autotests/extractordata/regiojet/regiojet_bus_en-one-leg-single.txt.json diff --git a/autotests/unstructureddata/regiojet_train_cs-one-leg-single.txt b/autotests/extractordata/regiojet/regiojet_train_cs-one-leg-single.txt similarity index 100% rename from autotests/unstructureddata/regiojet_train_cs-one-leg-single.txt rename to autotests/extractordata/regiojet/regiojet_train_cs-one-leg-single.txt diff --git a/autotests/unstructureddata/regiojet_train_cs-one-leg-single.json b/autotests/extractordata/regiojet/regiojet_train_cs-one-leg-single.txt.json similarity index 100% rename from autotests/unstructureddata/regiojet_train_cs-one-leg-single.json rename to autotests/extractordata/regiojet/regiojet_train_cs-one-leg-single.txt.json diff --git a/autotests/extractordata/sncf/context.eml b/autotests/extractordata/sncf/context.eml new file mode 100644 index 0000000..7ecbb4d --- /dev/null +++ b/autotests/extractordata/sncf/context.eml @@ -0,0 +1,2 @@ +From: e-billet@sncf.fr +Date: Fri, 29 Dec 2017 18:46:02 +0100 diff --git a/autotests/unstructureddata/sncf_one-leg-single-tgv.txt b/autotests/extractordata/sncf/sncf_one-leg-single-tgv.txt similarity index 100% rename from autotests/unstructureddata/sncf_one-leg-single-tgv.txt rename to autotests/extractordata/sncf/sncf_one-leg-single-tgv.txt diff --git a/autotests/unstructureddata/sncf_one-leg-single-tgv.json b/autotests/extractordata/sncf/sncf_one-leg-single-tgv.txt.json similarity index 99% rename from autotests/unstructureddata/sncf_one-leg-single-tgv.json rename to autotests/extractordata/sncf/sncf_one-leg-single-tgv.txt.json index acd283b..3173e69 100644 --- a/autotests/unstructureddata/sncf_one-leg-single-tgv.json +++ b/autotests/extractordata/sncf/sncf_one-leg-single-tgv.txt.json @@ -1,29 +1,29 @@ [ - { + { "@context": "http://schema.org", "@type": "TrainReservation", "reservationFor": { "@type": "TrainTrip", "arrivalStation": { "@type": "TrainStation", "name": "MONTPELLIER ST-RO" }, "arrivalTime": "2018-07-15T19:58:00", "departureStation": { "@type": "TrainStation", "name": "TOULOUSE MATABIAU" }, "departureTime": "2018-07-15T17:50:00", "trainNumber": "6857" }, "reservationNumber": "XXX007", "reservedTicket": { "@type": "Ticket", "ticketedSeat": { "@type": "Seat", "seatNumber": "31", "seatSection": "13" } } } ] diff --git a/autotests/extractordata/swiss/context.eml b/autotests/extractordata/swiss/context.eml new file mode 100644 index 0000000..1352d08 --- /dev/null +++ b/autotests/extractordata/swiss/context.eml @@ -0,0 +1 @@ +From: SWISS diff --git a/autotests/unstructureddata/swiss_one-leg-return.txt b/autotests/extractordata/swiss/swiss_one-leg-return.txt similarity index 100% rename from autotests/unstructureddata/swiss_one-leg-return.txt rename to autotests/extractordata/swiss/swiss_one-leg-return.txt diff --git a/autotests/extractordata/swiss/swiss_one-leg-return.txt.json b/autotests/extractordata/swiss/swiss_one-leg-return.txt.json new file mode 100644 index 0000000..54ef173 --- /dev/null +++ b/autotests/extractordata/swiss/swiss_one-leg-return.txt.json @@ -0,0 +1,106 @@ +[ + { + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "LX" + }, + "arrivalAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "CH" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 47.464698791503906, + "longitude": 8.549169540405273 + }, + "iataCode": "ZRH", + "name": "Zürich" + }, + "arrivalTime": { + "@type": "QDateTime", + "@value": "2017-09-10T08:15:00+02:00", + "timezone": "Europe/Zurich" + }, + "departureAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "DE" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 52.55970001220703, + "longitude": 13.287799835205078 + }, + "iataCode": "TXL", + "name": "Berlin" + }, + "departureDay": "2017-09-10", + "departureTime": { + "@type": "QDateTime", + "@value": "2017-09-10T06:45:00+02:00", + "timezone": "Europe/Berlin" + }, + "flightNumber": "963" + }, + "reservationNumber": "XXX007" + }, + { + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "LX" + }, + "arrivalAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "DE" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 52.55970001220703, + "longitude": 13.287799835205078 + }, + "iataCode": "TXL", + "name": "Berlin" + }, + "arrivalTime": { + "@type": "QDateTime", + "@value": "2017-09-15T22:15:00+02:00", + "timezone": "Europe/Berlin" + }, + "departureAirport": { + "@type": "Airport", + "address": { + "@type": "PostalAddress", + "addressCountry": "CH" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 47.464698791503906, + "longitude": 8.549169540405273 + }, + "iataCode": "ZRH", + "name": "Zürich" + }, + "departureDay": "2017-09-15", + "departureTime": { + "@type": "QDateTime", + "@value": "2017-09-15T20:50:00+02:00", + "timezone": "Europe/Zurich" + }, + "flightNumber": "962" + }, + "reservationNumber": "XXX007" + } +] diff --git a/autotests/extractortest.cpp b/autotests/extractortest.cpp index 3b54e00..94305a9 100644 --- a/autotests/extractortest.cpp +++ b/autotests/extractortest.cpp @@ -1,188 +1,180 @@ /* Copyright (C) 2018 Volker Krause This program is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace KItinerary; /** Note: this test requires external test data that is not publicly available, * ie. real-world unmodified booking documents. * This data cannot be shared for containing privacy-sensitive data and copyrighted * material (e.g. airline logos). */ class ExtractorTest : public QObject { Q_OBJECT private: ExtractorEngine m_engine; private Q_SLOTS: void initTestCase() { // use some exotic locale to ensure the date/time parsing doesn't just work by luck QLocale::setDefault(QLocale(QStringLiteral("fr_FR"))); } void testExtract_data() { QTest::addColumn("contextFile"); QTest::addColumn("inputFile"); - QDir baseDir(QStringLiteral(SOURCE_DIR "/../../kitinerary-tests")); - // test data not available: add dummy entry to not fail the test - if (!baseDir.exists()) { - QTest::newRow("test data not available") << QString() << QString(); - return; - } + for (const QDir &baseDir : {QStringLiteral(SOURCE_DIR "/extractordata"), QStringLiteral(SOURCE_DIR "/../../kitinerary-tests")}) { + if (!baseDir.exists()) { + continue; + } - bool someTestsFound = false; - - QDirIterator dirIt(baseDir.path(), {QStringLiteral("context.eml")}, QDir::Files | QDir::Readable | QDir::NoSymLinks, QDirIterator::Subdirectories); - while (dirIt.hasNext()) { - QFileInfo contextFi(dirIt.next()); - QDirIterator fileIt(contextFi.absolutePath(), {QStringLiteral("*.txt"), QStringLiteral("*.html"), QStringLiteral("*.pdf"), QStringLiteral("*.pkpass"), QStringLiteral("*.ics")}, QDir::Files | QDir::Readable | QDir::NoSymLinks); - while (fileIt.hasNext()) { - fileIt.next(); - someTestsFound = true; - QTest::newRow((contextFi.dir().dirName() + QLatin1Char('-') + fileIt.fileName()).toLatin1().constData()) - << contextFi.absoluteFilePath() - << fileIt.fileInfo().absoluteFilePath(); + QDirIterator dirIt(baseDir.path(), {QStringLiteral("context.eml")}, QDir::Files | QDir::Readable | QDir::NoSymLinks, QDirIterator::Subdirectories); + while (dirIt.hasNext()) { + QFileInfo contextFi(dirIt.next()); + QDirIterator fileIt(contextFi.absolutePath(), {QStringLiteral("*.txt"), QStringLiteral("*.html"), QStringLiteral("*.pdf"), QStringLiteral("*.pkpass"), QStringLiteral("*.ics")}, QDir::Files | QDir::Readable | QDir::NoSymLinks); + while (fileIt.hasNext()) { + fileIt.next(); + QTest::newRow((contextFi.dir().dirName() + QLatin1Char('-') + fileIt.fileName()).toLatin1().constData()) + << contextFi.absoluteFilePath() + << fileIt.fileInfo().absoluteFilePath(); + } } } - if (!someTestsFound) { - QTest::newRow("no tests found in test dir") << QString() << QString(); - return; - } } void testExtract() { QFETCH(QString, contextFile); QFETCH(QString, inputFile); if (contextFile.isEmpty()) { return; } m_engine.clear(); QFile cf(contextFile); QVERIFY(cf.open(QFile::ReadOnly)); KMime::Message contextMsg; contextMsg.setContent(cf.readAll()); contextMsg.parse(); m_engine.setContext(&contextMsg); QFile inFile(inputFile); QVERIFY(inFile.open(QFile::ReadOnly)); std::unique_ptr pass; std::unique_ptr htmlDoc; std::unique_ptr pdfDoc; KCalCore::Calendar::Ptr calendar; QJsonArray jsonResult; if (inputFile.endsWith(QLatin1String(".pkpass"))) { pass.reset(KPkPass::Pass::fromData(inFile.readAll())); m_engine.setPass(pass.get()); } else if (inputFile.endsWith(QLatin1String(".pdf"))) { pdfDoc.reset(PdfDocument::fromData(inFile.readAll())); QVERIFY(pdfDoc); m_engine.setPdfDocument(pdfDoc.get()); } else if (inputFile.endsWith(QLatin1String(".html"))) { htmlDoc.reset(HtmlDocument::fromData(inFile.readAll())); QVERIFY(htmlDoc); m_engine.setHtmlDocument(htmlDoc.get()); } else if (inputFile.endsWith(QLatin1String(".txt"))) { m_engine.setText(QString::fromUtf8(inFile.readAll())); } else if (inputFile.endsWith(QLatin1String(".ics"))) { calendar.reset(new KCalCore::MemoryCalendar(QTimeZone())); KCalCore::ICalFormat format; QVERIFY(format.fromRawString(calendar, inFile.readAll())); m_engine.setCalendar(calendar); } jsonResult = m_engine.extract(); const auto expectedSkip = QFile::exists(inputFile + QLatin1String(".skip")); if (jsonResult.isEmpty() && expectedSkip) { QSKIP("nothing extracted"); return; } QVERIFY(!jsonResult.isEmpty()); const auto result = JsonLdDocument::fromJson(jsonResult); ExtractorPostprocessor postproc; postproc.setContextDate(contextMsg.date()->dateTime()); postproc.process(result); const auto postProcResult = JsonLdDocument::toJson(postproc.result()); if (postProcResult.isEmpty()) { qDebug() << "Result discared in post processing:"; qDebug().noquote() << QJsonDocument(jsonResult).toJson(); } QVERIFY(!postProcResult.isEmpty()); const QString refFile = inputFile + QLatin1String(".json"); if (!QFile::exists(refFile) && !expectedSkip) { QFile f(refFile); QVERIFY(f.open(QFile::WriteOnly)); f.write(QJsonDocument(postProcResult).toJson()); return; } QFile f(refFile); QVERIFY(f.open(QFile::ReadOnly)); const auto refDoc = QJsonDocument::fromJson(f.readAll()); if (refDoc.array() != postProcResult) { QFile failFile(refFile + QLatin1String(".fail")); QVERIFY(failFile.open(QFile::WriteOnly)); failFile.write(QJsonDocument(postProcResult).toJson()); failFile.close(); QProcess proc; proc.setProcessChannelMode(QProcess::ForwardedChannels); proc.start(QStringLiteral("diff"), {QStringLiteral("-u"), refFile, failFile.fileName()}); QVERIFY(proc.waitForFinished()); } QCOMPARE(refDoc.array(), postProcResult); } }; QTEST_MAIN(ExtractorTest) #include "extractortest.moc" diff --git a/autotests/unstructureddata/amadeus_1.json b/autotests/unstructureddata/amadeus_1.json deleted file mode 100644 index 46af7de..0000000 --- a/autotests/unstructureddata/amadeus_1.json +++ /dev/null @@ -1,102 +0,0 @@ -[ - { - "@context": "http://schema.org", - "@type": "FlightReservation", - "reservationFor": { - "@type": "Flight", - "airline": { - "@type": "Airline", - "iataCode": "DL", - "name": "DELTA AIR LINES" - }, - "arrivalAirport": { - "@type": "Airport", - "name": "AMSTERDAM, NL (SCHIPHOL AIRPORT)" - }, - "arrivalTime": "2016-06-07T13:30:00", - "departureAirport": { - "@type": "Airport", - "name": "BERLIN, DE (TEGEL)" - }, - "departureDay": "2016-06-07", - "departureTime": "2016-06-07T12:10:00", - "flightNumber": "9520" - }, - "reservationNumber": "123456" - }, - { - "@context": "http://schema.org", - "@type": "FlightReservation", - "reservationFor": { - "@type": "Flight", - "airline": { - "@type": "Airline", - "iataCode": "DL", - "name": "DELTA AIR LINES" - }, - "arrivalAirport": { - "@type": "Airport", - "name": "DETROIT, MI (METROPOLITAN WAYNE CO), TERMINAL EM" - }, - "arrivalTime": "2016-06-07T17:40:00", - "departureAirport": { - "@type": "Airport", - "name": "AMSTERDAM, NL (SCHIPHOL AIRPORT)" - }, - "departureDay": "2016-06-07", - "departureTime": "2016-06-07T15:00:00", - "flightNumber": "139" - }, - "reservationNumber": "123456" - }, - { - "@context": "http://schema.org", - "@type": "FlightReservation", - "reservationFor": { - "@type": "Flight", - "airline": { - "@type": "Airline", - "iataCode": "DL", - "name": "DELTA AIR LINES" - }, - "arrivalAirport": { - "@type": "Airport", - "name": "PARIS, FR (CHARLES DE GAULLE), TERMINAL 2E" - }, - "arrivalTime": "2016-06-10T11:30:00", - "departureAirport": { - "@type": "Airport", - "name": "DETROIT, MI (METROPOLITAN WAYNE CO), TERMINAL EM" - }, - "departureDay": "2016-06-09", - "departureTime": "2016-06-09T21:40:00", - "flightNumber": "8573" - }, - "reservationNumber": "123456" - }, - { - "@context": "http://schema.org", - "@type": "FlightReservation", - "reservationFor": { - "@type": "Flight", - "airline": { - "@type": "Airline", - "iataCode": "DL", - "name": "DELTA AIR LINES" - }, - "arrivalAirport": { - "@type": "Airport", - "name": "BERLIN, DE (TEGEL)" - }, - "arrivalTime": "2016-06-10T14:40:00", - "departureAirport": { - "@type": "Airport", - "name": "PARIS, FR (CHARLES DE GAULLE), TERMINAL 2F" - }, - "departureDay": "2016-06-10", - "departureTime": "2016-06-10T13:00:00", - "flightNumber": "8680" - }, - "reservationNumber": "123456" - } -] diff --git a/autotests/unstructureddata/brusselsairlines_1.json b/autotests/unstructureddata/brusselsairlines_1.json deleted file mode 100644 index d874957..0000000 --- a/autotests/unstructureddata/brusselsairlines_1.json +++ /dev/null @@ -1,54 +0,0 @@ -[ - { - "@context": "http://schema.org", - "@type": "FlightReservation", - "reservationFor": { - "@type": "Flight", - "airline": { - "@type": "Airline", - "iataCode": "SN", - "name": "Brussels Airlines" - }, - "arrivalAirport": { - "@type": "Airport", - "name": "Brussels Airport, BE", - "iataCode": "BRU" - }, - "arrivalTime": "2017-02-03T19:45:00", - "departureAirport": { - "@type": "Airport", - "name": "Berlin, Tegel Airport, DE" - }, - "departureDay": "2017-02-03", - "departureTime": "2017-02-03T18:25:00", - "flightNumber": "2588" - }, - "reservationNumber": "XXX007" - }, - { - "@context": "http://schema.org", - "@type": "FlightReservation", - "reservationFor": { - "@type": "Flight", - "airline": { - "@type": "Airline", - "iataCode": "SN", - "name": "Brussels Airlines" - }, - "arrivalAirport": { - "@type": "Airport", - "name": "Berlin, Tegel Airport, DE" - }, - "arrivalTime": "2017-02-05T22:00:00", - "departureAirport": { - "@type": "Airport", - "name": "Brussels Airport, BE", - "iataCode": "BRU" - }, - "departureDay": "2017-02-05", - "departureTime": "2017-02-05T20:40:00", - "flightNumber": "2591" - }, - "reservationNumber": "XXX007" - } -] diff --git a/autotests/unstructureddata/eurowings_1.json b/autotests/unstructureddata/eurowings_1.json deleted file mode 100644 index 81c6854..0000000 --- a/autotests/unstructureddata/eurowings_1.json +++ /dev/null @@ -1,52 +0,0 @@ -[ - { - "@context": "http://schema.org", - "@type": "FlightReservation", - "reservationFor": { - "@type": "Flight", - "airline": { - "@type": "Airline", - "iataCode": "4U", - "name": "Germanwings" - }, - "arrivalAirport": { - "@type": "Airport", - "name": "London Heathrow" - }, - "arrivalTime": "2017-06-15T13:50:00", - "departureAirport": { - "@type": "Airport", - "name": "Berlin-Tegel" - }, - "departureDay": "2017-06-15", - "departureTime": "2017-06-15T12:55:00", - "flightNumber": "8462" - }, - "reservationNumber": "ABC123" - }, - { - "@context": "http://schema.org", - "@type": "FlightReservation", - "reservationFor": { - "@type": "Flight", - "airline": { - "@type": "Airline", - "iataCode": "4U", - "name": "Germanwings" - }, - "arrivalAirport": { - "@type": "Airport", - "name": "Berlin-Tegel" - }, - "arrivalTime": "2017-06-18T22:00:00", - "departureAirport": { - "@type": "Airport", - "name": "London Heathrow" - }, - "departureDay": "2017-06-18", - "departureTime": "2017-06-18T19:10:00", - "flightNumber": "8465" - }, - "reservationNumber": "ABC123" - } -] diff --git a/autotests/unstructureddata/fcmtravel_1.json b/autotests/unstructureddata/fcmtravel_1.json deleted file mode 100644 index b1ddd00..0000000 --- a/autotests/unstructureddata/fcmtravel_1.json +++ /dev/null @@ -1,64 +0,0 @@ -[ - { - "@context": "http://schema.org", - "@type": "FlightReservation", - "reservationFor": { - "@type": "Flight", - "airline": { - "@type": "Airline", - "iataCode": "BT", - "name": "Air Baltic" - }, - "arrivalAirport": { - "@type": "Airport", - "iataCode": "RIX", - "name": "Riga" - }, - "arrivalTime": "2017-11-05T11:35:00", - "departureAirport": { - "@type": "Airport", - "iataCode": "TXL", - "name": "Tegel" - }, - "departureDay": "2017-11-05", - "departureTime": "2017-11-05T08:55:00", - "flightNumber": "212" - }, - "reservationNumber": "ABCDEF", - "underName": { - "@type": "Person", - "name": "John Doe" - } - }, - { - "@context": "http://schema.org", - "@type": "FlightReservation", - "reservationFor": { - "@type": "Flight", - "airline": { - "@type": "Airline", - "iataCode": "BT", - "name": "Air Baltic" - }, - "arrivalAirport": { - "@type": "Airport", - "iataCode": "TXL", - "name": "Tegel" - }, - "arrivalTime": "2017-11-10T08:20:00", - "departureAirport": { - "@type": "Airport", - "iataCode": "RIX", - "name": "Riga" - }, - "departureDay": "2017-11-10", - "departureTime": "2017-11-10T07:35:00", - "flightNumber": "211" - }, - "reservationNumber": "ABCDEF", - "underName": { - "@type": "Person", - "name": "John Doe" - } - } -] diff --git a/autotests/unstructureddata/fcmtravel_2.json b/autotests/unstructureddata/fcmtravel_2.json deleted file mode 100644 index 5099e1a..0000000 --- a/autotests/unstructureddata/fcmtravel_2.json +++ /dev/null @@ -1,250 +0,0 @@ -[ - { - "@context": "http://schema.org", - "@type": "FlightReservation", - "reservationFor": { - "@type": "Flight", - "airline": { - "@type": "Airline", - "iataCode": "AB", - "name": "Air Berlin" - }, - "arrivalAirport": { - "@type": "Airport", - "iataCode": "DUS", - "name": "International Airport" - }, - "arrivalTime": "2016-10-17T12:00:00", - "departureAirport": { - "@type": "Airport", - "iataCode": "TXL", - "name": "Tegel" - }, - "departureDay": "2016-10-17", - "departureTime": "2016-10-17T10:50:00", - "flightNumber": "6439" - }, - "reservationNumber": "XXX007", - "underName": { - "@type": "Person", - "name": "John Doe" - } - }, - { - "@context": "http://schema.org", - "@type": "FlightReservation", - "reservationFor": { - "@type": "Flight", - "airline": { - "@type": "Airline", - "iataCode": "AB", - "name": "Air Berlin" - }, - "arrivalAirport": { - "@type": "Airport", - "iataCode": "DUS", - "name": "International Airport" - }, - "arrivalTime": "2016-10-17T12:00:00", - "departureAirport": { - "@type": "Airport", - "iataCode": "TXL", - "name": "Tegel" - }, - "departureDay": "2016-10-17", - "departureTime": "2016-10-17T10:50:00", - "flightNumber": "6439" - }, - "reservationNumber": "XXX007", - "underName": { - "@type": "Person", - "name": "Jane Doe" - } - }, - { - "@context": "http://schema.org", - "@type": "FlightReservation", - "reservationFor": { - "@type": "Flight", - "airline": { - "@type": "Airline", - "iataCode": "AB", - "name": "Air Berlin" - }, - "arrivalAirport": { - "@type": "Airport", - "iataCode": "SFO", - "name": "San Francisco International" - }, - "arrivalTime": "2016-10-17T15:55:00", - "departureAirport": { - "@type": "Airport", - "iataCode": "DUS", - "name": "International Airport" - }, - "departureDay": "2016-10-17", - "departureTime": "2016-10-17T13:20:00", - "flightNumber": "7392" - }, - "reservationNumber": "XXX007", - "underName": { - "@type": "Person", - "name": "John Doe" - } - }, - { - "@context": "http://schema.org", - "@type": "FlightReservation", - "reservationFor": { - "@type": "Flight", - "airline": { - "@type": "Airline", - "iataCode": "AB", - "name": "Air Berlin" - }, - "arrivalAirport": { - "@type": "Airport", - "iataCode": "SFO", - "name": "San Francisco International" - }, - "arrivalTime": "2016-10-17T15:55:00", - "departureAirport": { - "@type": "Airport", - "iataCode": "DUS", - "name": "International Airport" - }, - "departureDay": "2016-10-17", - "departureTime": "2016-10-17T13:20:00", - "flightNumber": "7392" - }, - "reservationNumber": "XXX007", - "underName": { - "@type": "Person", - "name": "Jane Doe" - } - }, - { - "@context": "http://schema.org", - "@type": "FlightReservation", - "reservationFor": { - "@type": "Flight", - "airline": { - "@type": "Airline", - "iataCode": "AA", - "name": "American Airlines" - }, - "arrivalAirport": { - "@type": "Airport", - "iataCode": "ORD", - "name": "O Hare International" - }, - "arrivalTime": "2016-10-22T14:00:00", - "departureAirport": { - "@type": "Airport", - "iataCode": "PDX", - "name": "Portland International" - }, - "departureDay": "2016-10-22", - "departureTime": "2016-10-22T08:00:00", - "flightNumber": "086" - }, - "reservationNumber": "XXX007", - "underName": { - "@type": "Person", - "name": "John Doe" - } - }, - { - "@context": "http://schema.org", - "@type": "FlightReservation", - "reservationFor": { - "@type": "Flight", - "airline": { - "@type": "Airline", - "iataCode": "AA", - "name": "American Airlines" - }, - "arrivalAirport": { - "@type": "Airport", - "iataCode": "ORD", - "name": "O Hare International" - }, - "arrivalTime": "2016-10-22T14:00:00", - "departureAirport": { - "@type": "Airport", - "iataCode": "PDX", - "name": "Portland International" - }, - "departureDay": "2016-10-22", - "departureTime": "2016-10-22T08:00:00", - "flightNumber": "086" - }, - "reservationNumber": "XXX007", - "underName": { - "@type": "Person", - "name": "Jane Doe" - } - }, - { - "@context": "http://schema.org", - "@type": "FlightReservation", - "reservationFor": { - "@type": "Flight", - "airline": { - "@type": "Airline", - "iataCode": "AB", - "name": "Air Berlin" - }, - "arrivalAirport": { - "@type": "Airport", - "iataCode": "TXL", - "name": "Tegel" - }, - "arrivalTime": "2016-10-23T07:00:00", - "departureAirport": { - "@type": "Airport", - "iataCode": "ORD", - "name": "O Hare International" - }, - "departureDay": "2016-10-22", - "departureTime": "2016-10-22T15:25:00", - "flightNumber": "7421" - }, - "reservationNumber": "XXX007", - "underName": { - "@type": "Person", - "name": "John Doe" - } - }, - { - "@context": "http://schema.org", - "@type": "FlightReservation", - "reservationFor": { - "@type": "Flight", - "airline": { - "@type": "Airline", - "iataCode": "AB", - "name": "Air Berlin" - }, - "arrivalAirport": { - "@type": "Airport", - "iataCode": "TXL", - "name": "Tegel" - }, - "arrivalTime": "2016-10-23T07:00:00", - "departureAirport": { - "@type": "Airport", - "iataCode": "ORD", - "name": "O Hare International" - }, - "departureDay": "2016-10-22", - "departureTime": "2016-10-22T15:25:00", - "flightNumber": "7421" - }, - "reservationNumber": "XXX007", - "underName": { - "@type": "Person", - "name": "Jane Doe" - } - } -] diff --git a/autotests/unstructureddata/iberia_1.json b/autotests/unstructureddata/iberia_1.json deleted file mode 100644 index 19ff6f4..0000000 --- a/autotests/unstructureddata/iberia_1.json +++ /dev/null @@ -1,109 +0,0 @@ -[ - { - "@context": "http://schema.org", - "@type": "FlightReservation", - "reservationFor": { - "@type": "Flight", - "airline": { - "@type": "Airline", - "iataCode": "IB", - "name": "Iberia Express" - }, - "arrivalAirport": { - "@type": "Airport", - "iataCode": "MAD", - "name": "Madrid" - }, - "arrivalTime": "2018-07-20T15:35:00", - "departureAirport": { - "@type": "Airport", - "iataCode": "TXL", - "name": "Berlin" - }, - "departureDay": "2018-07-20", - "departureTime": "2018-07-20T12:25:00", - "flightNumber": "3677" - }, - "reservationNumber": "XXX007" - }, - { - "@context": "http://schema.org", - "@type": "FlightReservation", - "reservationFor": { - "@type": "Flight", - "airline": { - "@type": "Airline", - "iataCode": "IB", - "name": "Iberia Regional Air Nostrum" - }, - "arrivalAirport": { - "@type": "Airport", - "iataCode": "LEI", - "name": "Almeria" - }, - "arrivalTime": "2018-07-20T19:00:00", - "departureAirport": { - "@type": "Airport", - "iataCode": "MAD", - "name": "Madrid" - }, - "departureDay": "2018-07-20", - "departureTime": "2018-07-20T17:50:00", - "flightNumber": "8588" - }, - "reservationNumber": "XXX007" - }, - { - "@context": "http://schema.org", - "@type": "FlightReservation", - "reservationFor": { - "@type": "Flight", - "airline": { - "@type": "Airline", - "iataCode": "IB", - "name": "Iberia Regional Air Nostrum" - }, - "arrivalAirport": { - "@type": "Airport", - "iataCode": "MAD", - "name": "Madrid" - }, - "arrivalTime": "2018-07-28T08:25:00", - "departureAirport": { - "@type": "Airport", - "iataCode": "LEI", - "name": "Almeria" - }, - "departureDay": "2018-07-28", - "departureTime": "2018-07-28T07:15:00", - "flightNumber": "8603" - }, - "reservationNumber": "XXX007" - }, - { - "@context": "http://schema.org", - "@type": "FlightReservation", - "reservationFor": { - "@type": "Flight", - "airline": { - "@type": "Airline", - "iataCode": "IB" - }, - "arrivalAirport": { - "@type": "Airport", - "iataCode": "TXL", - "name": "Berlin" - }, - "arrivalTime": "2018-07-28T15:00:00", - "departureAirport": { - "@type": "Airport", - "iataCode": "MAD", - "name": "Madrid" - }, - "departureDay": "2018-07-28", - "departureTime": "2018-07-28T12:00:00", - "flightNumber": "3186" - }, - "reservationNumber": "XXX007" - } -] diff --git a/autotests/unstructureddata/swiss_one-leg-return.json b/autotests/unstructureddata/swiss_one-leg-return.json deleted file mode 100644 index 47c07ab..0000000 --- a/autotests/unstructureddata/swiss_one-leg-return.json +++ /dev/null @@ -1,54 +0,0 @@ -[ - { - "@context": "http://schema.org", - "@type": "FlightReservation", - "reservationFor": { - "@type": "Flight", - "airline": { - "@type": "Airline", - "iataCode": "LX" - }, - "arrivalAirport": { - "@type": "Airport", - "iataCode": "ZRH", - "name": "Zürich" - }, - "arrivalTime": "2017-09-10T08:15:00", - "departureAirport": { - "@type": "Airport", - "iataCode": "TXL", - "name": "Berlin" - }, - "departureDay": "2017-09-10", - "departureTime": "2017-09-10T06:45:00", - "flightNumber": "963" - }, - "reservationNumber": "XXX007" - }, - { - "@context": "http://schema.org", - "@type": "FlightReservation", - "reservationFor": { - "@type": "Flight", - "airline": { - "@type": "Airline", - "iataCode": "LX" - }, - "arrivalAirport": { - "@type": "Airport", - "iataCode": "TXL", - "name": "Berlin" - }, - "arrivalTime": "2017-09-15T22:15:00", - "departureAirport": { - "@type": "Airport", - "iataCode": "ZRH", - "name": "Zürich" - }, - "departureDay": "2017-09-15", - "departureTime": "2017-09-15T20:50:00", - "flightNumber": "962" - }, - "reservationNumber": "XXX007" - } -] diff --git a/autotests/unstructureddataextractortest.cpp b/autotests/unstructureddataextractortest.cpp deleted file mode 100644 index b8ab028..0000000 --- a/autotests/unstructureddataextractortest.cpp +++ /dev/null @@ -1,172 +0,0 @@ -/* - Copyright (c) 2017 Volker Krause - - This library is free software; you can redistribute it and/or modify it - under the terms of the GNU Library General Public License as published by - the Free Software Foundation; either version 2 of the License, or (at your - option) any later version. - - This library is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public - License for more details. - - You should have received a copy of the GNU Library General Public License - along with this library; see the file COPYING.LIB. If not, write to the - Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. -*/ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace KItinerary; - -class UnstructuredDataExtractorTest : public QObject -{ - Q_OBJECT -private: - bool loadExtractor(Extractor &extractor, const QString &extractorName, const QString &type) - { - QFile f(QLatin1String(":/org.kde.pim/kitinerary/extractors/") + extractorName + QLatin1String(".json")); - if (!f.open(QFile::ReadOnly)) { - return false; - } - const auto doc = QJsonDocument::fromJson(f.readAll()); - if (doc.isObject()) { - return extractor.load(doc.object(), QLatin1String(":/org.kde.pim/kitinerary/extractors/")); - } else if (doc.isArray()) { - for (const auto &v : doc.array()) { - if (v.toObject().value(QLatin1String("type")).toString(QStringLiteral("text")) == type) { - return extractor.load(v.toObject(), QLatin1String(":/org.kde.pim/kitinerary/extractors/")); - } - } - } - return false; - } - -private Q_SLOTS: - void initTestCase() - { - // use some exotic locale to ensure the date/time parsing doesn't just work by luck - QLocale::setDefault(QLocale(QStringLiteral("fr_FR"))); - - // make sure the qrc data is loaded in static builds - ExtractorRepository repo; - } - - void testExtractText_data() - { - QTest::addColumn("inputFile"); - QTest::addColumn("extractorName"); - QTest::addColumn("jsonFile"); - - QDir dir(QStringLiteral(SOURCE_DIR "/unstructureddata")); - const auto lst = dir.entryList(QStringList(QStringLiteral("*.txt")), QDir::Files | QDir::Readable | QDir::NoSymLinks); - for (const auto &file : lst) { - const QString refFile = dir.path() + QLatin1Char('/') + file.left(file.size() - 4) + QStringLiteral(".json"); - if (!QFile::exists(refFile)) { - qDebug() << "reference file" << refFile << "does not exist, skipping test file" << file; - continue; - } - const auto idx = file.indexOf(QLatin1Char('_')); - QTest::newRow(file.toLatin1().constData()) << QString(dir.path() + QLatin1Char('/') + file) << file.left(idx) << refFile; - } - } - - void testExtractText() - { - QFETCH(QString, inputFile); - QFETCH(QString, extractorName); - QFETCH(QString, jsonFile); - - QFile f(inputFile); - QVERIFY(f.open(QFile::ReadOnly)); - - Extractor extractor; - QVERIFY(loadExtractor(extractor, extractorName, QLatin1String("text"))); - - ExtractorEngine engine; - engine.setText(QString::fromUtf8(f.readAll())); - engine.setSenderDate(QDateTime(QDate(2017, 12, 29), QTime(18, 46, 2))); - engine.setExtractors({&extractor}); - const auto data = JsonLdDocument::toJson(JsonLdDocument::fromJson(engine.extract())); - - QFile ref(jsonFile); - QVERIFY(ref.open(QFile::ReadOnly)); - const auto doc = QJsonDocument::fromJson(ref.readAll()); - QVERIFY(doc.isArray()); - - if (data != doc.array()) { - qDebug().noquote() << QJsonDocument(data).toJson(); - } - QCOMPARE(data, doc.array()); - } - - void testExtractHtml_data() - { - QTest::addColumn("inputFile"); - QTest::addColumn("extractorName"); - QTest::addColumn("jsonFile"); - - QDir dir(QStringLiteral(SOURCE_DIR "/unstructureddata")); - const auto lst = dir.entryList(QStringList(QStringLiteral("*.html")), QDir::Files | QDir::Readable | QDir::NoSymLinks); - for (const auto &file : lst) { - const QString refFile = dir.path() + QLatin1Char('/') + file.left(file.size() - 5) + QStringLiteral(".json"); - if (!QFile::exists(refFile)) { - qDebug() << "reference file" << refFile << "does not exist, skipping test file" << file; - continue; - } - const auto idx = file.indexOf(QLatin1Char('_')); - QTest::newRow(file.toLatin1().constData()) << QString(dir.path() + QLatin1Char('/') + file) << file.left(idx) << refFile; - } - } - - void testExtractHtml() - { - QFETCH(QString, inputFile); - QFETCH(QString, extractorName); - QFETCH(QString, jsonFile); - - QFile f(inputFile); - QVERIFY(f.open(QFile::ReadOnly)); - std::unique_ptr htmlDoc(HtmlDocument::fromData(f.readAll())); - QVERIFY(htmlDoc); - - Extractor extractor; - QVERIFY(loadExtractor(extractor, extractorName, QLatin1String("html"))); - - ExtractorEngine engine; - engine.setSenderDate(QDateTime(QDate(2017, 12, 29), QTime(18, 46, 2))); - engine.setExtractors({&extractor}); - engine.setHtmlDocument(htmlDoc.get()); - const auto data = JsonLdDocument::toJson(JsonLdDocument::fromJson(engine.extract())); - - QFile ref(jsonFile); - QVERIFY(ref.open(QFile::ReadOnly)); - const auto doc = QJsonDocument::fromJson(ref.readAll()); - QVERIFY(doc.isArray()); - - if (data != doc.array()) { - qDebug().noquote() << QJsonDocument(data).toJson(); - } - QCOMPARE(data, doc.array()); - } -}; - -QTEST_MAIN(UnstructuredDataExtractorTest) - -#include "unstructureddataextractortest.moc"