diff --git a/src/extractorpostprocessor.cpp b/src/extractorpostprocessor.cpp index 7667f7a..07fa16e 100644 --- a/src/extractorpostprocessor.cpp +++ b/src/extractorpostprocessor.cpp @@ -1,602 +1,609 @@ /* Copyright (c) 2017 Volker Krause This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "config-kitinerary.h" #include "extractorpostprocessor.h" #include "extractorpostprocessor_p.h" #include "flightpostprocessor_p.h" #include "extractorutil.h" #include "iatabcbpparser.h" #include "jsonlddocument.h" #include "logging.h" #include "mergeutil.h" #include "sortutil.h" #include "knowledgedb/trainstationdb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_PHONENUMBER #include #endif #include using namespace KItinerary; ExtractorPostprocessor::ExtractorPostprocessor() : d(new ExtractorPostprocessorPrivate) { } ExtractorPostprocessor::ExtractorPostprocessor(ExtractorPostprocessor &&) noexcept = default; ExtractorPostprocessor::~ExtractorPostprocessor() = default; void ExtractorPostprocessor::process(const QVector &data) { d->m_resultFinalized = false; d->m_data.reserve(d->m_data.size() + data.size()); for (auto elem : data) { if (JsonLd::isA(elem)) { elem = d->processFlightReservation(elem.value()); } else if (JsonLd::isA(elem)) { elem = d->processTrainReservation(elem.value()); } else if (JsonLd::isA(elem)) { elem = d->processLodgingReservation(elem.value()); } else if (JsonLd::isA(elem)) { elem = d->processFoodEstablishmentReservation(elem.value()); } else if (JsonLd::isA(elem)) { elem = d->processTouristAttractionVisit(elem.value()); } else if (JsonLd::isA(elem)) { elem = d->processBusReservation(elem.value()); } else if (JsonLd::isA(elem)) { elem = d->processEventReservation(elem.value()); } else if (JsonLd::isA(elem)) { elem = d->processRentalCarReservation(elem.value()); } else if (JsonLd::isA(elem)) { elem = d->processTaxiReservation(elem.value()); } d->mergeOrAppend(elem); } } QVector ExtractorPostprocessor::result() const { if (!d->m_resultFinalized) { for (auto it = d->m_data.begin(); it != d->m_data.end();) { if (d->filterReservation(*it)) { ++it; } else { //qCDebug(Log).noquote() << "Discarding element:" << QJsonDocument(JsonLdDocument::toJson({*it})).toJson(); it = d->m_data.erase(it); } } d->m_resultFinalized = true; } std::stable_sort(d->m_data.begin(), d->m_data.end(), SortUtil::isBefore); return d->m_data; } void ExtractorPostprocessor::setContextDate(const QDateTime& dt) { d->m_contextDate = dt; } void ExtractorPostprocessorPrivate::mergeOrAppend(const QVariant &elem) { const auto it = std::find_if(m_data.begin(), m_data.end(), [elem](const QVariant &other) { return MergeUtil::isSame(elem, other); }); if (it == m_data.end()) { m_data.push_back(elem); } else { *it = MergeUtil::merge(*it, elem); } } QVariant ExtractorPostprocessorPrivate::processFlightReservation(FlightReservation res) const { // expand ticketToken for IATA BCBP data const auto bcbp = res.reservedTicket().value().ticketTokenData(); if (!bcbp.isEmpty()) { const auto bcbpData = IataBcbpParser::parse(bcbp, m_contextDate.date()); if (bcbpData.size() == 1) { res = JsonLdDocument::apply(bcbpData.at(0), res).value(); } else { for (const auto &data : bcbpData) { if (MergeUtil::isSame(res, data)) { res = JsonLdDocument::apply(data, res).value(); break; } } } } FlightPostProcessor p; res.setReservationFor(p.processFlight(res.reservationFor().value())); return processReservation(res); } TrainReservation ExtractorPostprocessorPrivate::processTrainReservation(TrainReservation res) const { res.setReservationFor(processTrainTrip(res.reservationFor().value())); return processReservation(res); } TrainTrip ExtractorPostprocessorPrivate::processTrainTrip(TrainTrip trip) const { trip.setArrivalPlatform(trip.arrivalPlatform().trimmed()); trip.setDeparturePlatform(trip.departurePlatform().trimmed()); trip.setDepartureStation(processTrainStation(trip.departureStation())); trip.setArrivalStation(processTrainStation(trip.arrivalStation())); trip.setDepartureTime(processTrainTripTime(trip.departureTime(), trip.departureStation())); trip.setArrivalTime(processTrainTripTime(trip.arrivalTime(), trip.arrivalStation())); trip.setTrainNumber(trip.trainNumber().simplified()); trip.setTrainName(trip.trainName().simplified()); return trip; } static void applyStationData(const KnowledgeDb::TrainStation &record, TrainStation &station) { if (!station.geo().isValid() && record.coordinate.isValid()) { GeoCoordinates geo; geo.setLatitude(record.coordinate.latitude); geo.setLongitude(record.coordinate.longitude); station.setGeo(geo); } auto addr = station.address(); if (addr.addressCountry().isEmpty() && record.country.isValid()) { addr.setAddressCountry(record.country.toString()); station.setAddress(addr); } } static void applyStationCountry(const QString &isoCode, TrainStation &station) { auto addr = station.address(); if (addr.addressCountry().isEmpty()) { addr.setAddressCountry(isoCode.toUpper()); station.setAddress(addr); } } TrainStation ExtractorPostprocessorPrivate::processTrainStation(TrainStation station) const { const auto id = station.identifier(); if (id.isEmpty()) { // empty -> null cleanup, to have more compact json-ld output station.setIdentifier(QString()); } else if (id.startsWith(QLatin1String("sncf:")) && id.size() == 10) { const auto record = KnowledgeDb::stationForGaresConnexionsId(KnowledgeDb::GaresConnexionsId{id.mid(5)}); applyStationData(record, station); applyStationCountry(id.mid(5, 2).toUpper(), station); } else if (id.startsWith(QLatin1String("ibnr:")) && id.size() == 12) { const auto record = KnowledgeDb::stationForIbnr(KnowledgeDb::IBNR{id.mid(5).toUInt()}); applyStationData(record, station); const auto country = KnowledgeDb::countryIdForUicCode(id.midRef(5, 2).toUShort()).toString(); applyStationCountry(country, station); } else if (id.startsWith(QLatin1String("uic:")) && id.size() == 11) { const auto record = KnowledgeDb::stationForUic(KnowledgeDb::UICStation{id.mid(4).toUInt()}); applyStationData(record, station); const auto country = KnowledgeDb::countryIdForUicCode(id.midRef(4, 2).toUShort()).toString(); applyStationCountry(country, station); } return processPlace(station); } QDateTime ExtractorPostprocessorPrivate::processTrainTripTime(QDateTime dt, const TrainStation& station) const { if (!dt.isValid()) { return dt; } if (dt.timeSpec() == Qt::TimeZone) { return dt; } QTimeZone tz; if (station.identifier().startsWith(QLatin1String("sncf:"))) { const auto record = KnowledgeDb::stationForGaresConnexionsId(KnowledgeDb::GaresConnexionsId{station.identifier().mid(5)}); tz = record.timezone.toQTimeZone(); } else if (station.identifier().startsWith(QLatin1String("ibnr:"))) { const auto record = KnowledgeDb::stationForIbnr(KnowledgeDb::IBNR{station.identifier().mid(5).toUInt()}); tz = record.timezone.toQTimeZone(); } else if (!station.address().addressCountry().isEmpty()) { tz = KnowledgeDb::timezoneForCountry(KnowledgeDb::CountryId{station.address().addressCountry()}).toQTimeZone(); } if (!tz.isValid()) { return dt; } // prefer our timezone over externally provided UTC offset, if they match if (dt.timeSpec() == Qt::OffsetFromUTC && tz.offsetFromUtc(dt) != dt.offsetFromUtc()) { return dt; } if (dt.timeSpec() == Qt::OffsetFromUTC || dt.timeSpec() == Qt::LocalTime) { dt.setTimeSpec(Qt::TimeZone); dt.setTimeZone(tz); } else if (dt.timeSpec() == Qt::UTC) { dt = dt.toTimeZone(tz); } return dt; } BusReservation ExtractorPostprocessorPrivate::processBusReservation(BusReservation res) const { res.setReservationFor(processBusTrip(res.reservationFor().value())); return processReservation(res); } BusTrip ExtractorPostprocessorPrivate::processBusTrip(BusTrip trip) const { trip.setDepartureBusStop(processPlace(trip.departureBusStop())); trip.setArrivalBusStop(processPlace(trip.arrivalBusStop())); trip.setDepartureTime(processTimeForLocation(trip.departureTime(), trip.departureBusStop())); trip.setArrivalTime(processTimeForLocation(trip.arrivalTime(), trip.arrivalBusStop())); trip.setBusNumber(trip.busNumber().simplified()); trip.setBusName(trip.busName().simplified()); return trip; } LodgingReservation ExtractorPostprocessorPrivate::processLodgingReservation(LodgingReservation res) const { res.setReservationFor(processPlace(res.reservationFor().value())); res.setCheckinTime(processTimeForLocation(res.checkinTime(), res.reservationFor().value())); res.setCheckoutTime(processTimeForLocation(res.checkoutTime(), res.reservationFor().value())); return processReservation(res); } TaxiReservation ExtractorPostprocessorPrivate::processTaxiReservation(TaxiReservation res) const { res.setPickupLocation(processPlace(res.pickupLocation())); res.setPickupTime(processTimeForLocation(res.pickupTime(), res.pickupLocation())); return processReservation(res); } RentalCarReservation ExtractorPostprocessorPrivate::processRentalCarReservation(RentalCarReservation res) const { res.setReservationFor(processRentalCar(res.reservationFor().value())); res.setPickupLocation(processPlace(res.pickupLocation())); res.setDropoffLocation(processPlace(res.dropoffLocation())); res.setPickupTime(processTimeForLocation(res.pickupTime(), res.pickupLocation())); res.setDropoffTime(processTimeForLocation(res.dropoffTime(), res.dropoffLocation())); return processReservation(res); } RentalCar ExtractorPostprocessorPrivate::processRentalCar(RentalCar car) const { car.setName(car.name().trimmed()); return car; } FoodEstablishmentReservation ExtractorPostprocessorPrivate::processFoodEstablishmentReservation(FoodEstablishmentReservation res) const { res.setReservationFor(processPlace(res.reservationFor().value())); res.setStartTime(processTimeForLocation(res.startTime(), res.reservationFor().value())); res.setEndTime(processTimeForLocation(res.endTime(), res.reservationFor().value())); return processReservation(res); } TouristAttractionVisit ExtractorPostprocessorPrivate::processTouristAttractionVisit(TouristAttractionVisit visit) const { visit.setTouristAttraction(processPlace(visit.touristAttraction())); visit.setArrivalTime(processTimeForLocation(visit.arrivalTime(), visit.touristAttraction())); visit.setDepartureTime(processTimeForLocation(visit.departureTime(), visit.touristAttraction())); return visit; } EventReservation ExtractorPostprocessorPrivate::processEventReservation(EventReservation res) const { res.setReservationFor(processEvent(res.reservationFor().value())); return processReservation(res); } Event ExtractorPostprocessorPrivate::processEvent(Event event) const { // normalize location to be a Place if (JsonLd::isA(event.location())) { Place place; place.setAddress(event.location().value()); event.setLocation(place); } if (JsonLd::isA(event.location())) { event.setLocation(processPlace(event.location().value())); // try to obtain timezones if we have a location event.setStartDate(processTimeForLocation(event.startDate(), event.location().value())); event.setEndDate(processTimeForLocation(event.endDate(), event.location().value())); event.setDoorTime(processTimeForLocation(event.doorTime(), event.location().value())); } return event; } template T ExtractorPostprocessorPrivate::processReservation(T res) const { res.setUnderName(processPerson(res.underName().template value())); res.setPotentialAction(processActions(res.potentialAction())); return res; } Person ExtractorPostprocessorPrivate::processPerson(Person person) const { person.setName(person.name().simplified()); if (person.name().isEmpty() && !person.familyName().isEmpty() && !person.givenName().isEmpty()) { person.setName(person.givenName() + QLatin1Char(' ') + person.familyName()); } // strip prefixes, they break comparisons static const char* const honorificPrefixes[] = { "MR ", "MS ", "MRS " }; for (auto prefix : honorificPrefixes) { if (person.name().startsWith(QLatin1String(prefix), Qt::CaseInsensitive)) { person.setName(person.name().mid(strlen(prefix))); break; } } return person; } PostalAddress ExtractorPostprocessorPrivate::processAddress(PostalAddress addr, const QString &phoneNumber) { // convert to ISO 3166-1 alpha-2 country codes if (addr.addressCountry().size() > 2) { const auto isoCode = KContacts::Address::countryToISO(addr.addressCountry()).toUpper(); if (!isoCode.isEmpty()) { addr.setAddressCountry(isoCode); + + // try ISO 3166-1 alpha-3, we get that e.g. from Flixbus + } else if (addr.addressCountry().size() == 3) { + const auto c = KnowledgeDb::countryIdFromIso3166_1alpha3(KnowledgeDb::CountryId3(addr.addressCountry())); + if (c.isValid()) { + addr.setAddressCountry(c.toString()); + } } } // upper case country codes if (addr.addressCountry().size() == 2) { addr.setAddressCountry(addr.addressCountry().toUpper()); } // normalize strings addr.setStreetAddress(addr.streetAddress().simplified()); addr.setAddressLocality(addr.addressLocality().simplified()); addr.setAddressRegion(addr.addressRegion().simplified()); #ifdef HAVE_PHONENUMBER // recover country from phone number, if we have that if (!phoneNumber.isEmpty() && addr.addressCountry().size() != 2) { const auto phoneStr = phoneNumber.toStdString(); const auto util = i18n::phonenumbers::PhoneNumberUtil::GetInstance(); i18n::phonenumbers::PhoneNumber number; if (util->ParseAndKeepRawInput(phoneStr, "ZZ", &number) == i18n::phonenumbers::PhoneNumberUtil::NO_PARSING_ERROR) { std::string isoCode; util->GetRegionCodeForNumber(number, &isoCode); if (!isoCode.empty()) { addr.setAddressCountry(QString::fromStdString(isoCode)); } } } #endif addr = ExtractorUtil::extractPostalCode(addr); return addr; } QString ExtractorPostprocessorPrivate::processPhoneNumber(const QString &phoneNumber, const PostalAddress &addr) { #ifdef HAVE_PHONENUMBER // or complete the phone number if we know the country if (!phoneNumber.isEmpty() && addr.addressCountry().size() == 2) { auto phoneStr = phoneNumber.toStdString(); const auto isoCode = addr.addressCountry().toStdString(); const auto util = i18n::phonenumbers::PhoneNumberUtil::GetInstance(); i18n::phonenumbers::PhoneNumber number; if (util->ParseAndKeepRawInput(phoneStr, isoCode, &number) == i18n::phonenumbers::PhoneNumberUtil::NO_PARSING_ERROR) { if (number.country_code_source() == i18n::phonenumbers::PhoneNumber_CountryCodeSource_FROM_DEFAULT_COUNTRY) { util->Format(number, i18n::phonenumbers::PhoneNumberUtil::INTERNATIONAL, &phoneStr); return QString::fromStdString(phoneStr); } } } #else Q_UNUSED(addr); #endif return phoneNumber; } QVariantList ExtractorPostprocessorPrivate::processActions(QVariantList actions) const { // remove non-actions and actions with invalid URLs QUrl viewUrl; for (auto it = actions.begin(); it != actions.end();) { if (!JsonLd::canConvert(*it)) { it = actions.erase(it); continue; } const auto action = JsonLd::convert(*it); if (!action.target().isValid()) { it = actions.erase(it); continue; } if (JsonLd::isA(*it)) { viewUrl = action.target(); } ++it; } // normalize the order, so JSON comparison still yields correct results std::sort(actions.begin(), actions.end(), [](const QVariant &lhs, const QVariant &rhs) { return strcmp(lhs.typeName(), rhs.typeName()) < 0; }); // remove actions that don't actually have their own target, or duplicates QUrl prevUrl; const char* prevType = nullptr; for (auto it = actions.begin(); it != actions.end();) { const auto action = JsonLd::convert(*it); const auto isDuplicate = action.target() == prevUrl && (prevType ? strcmp(prevType, (*it).typeName()) == 0 : false); if ((JsonLd::isA(*it) || action.target() != viewUrl) && !isDuplicate) { prevUrl = action.target(); prevType = (*it).typeName(); ++it; } else { it = actions.erase(it); } } return actions; } template QDateTime ExtractorPostprocessorPrivate::processTimeForLocation(QDateTime dt, const T &place) const { if (!dt.isValid() || dt.timeSpec() == Qt::TimeZone) { return dt; } QTimeZone tz; if (!place.address().addressCountry().isEmpty()) { tz = KnowledgeDb::timezoneForCountry(KnowledgeDb::CountryId{place.address().addressCountry()}).toQTimeZone(); } if (!tz.isValid()) { return dt; } // prefer our timezone over externally provided UTC offset, if they match if (dt.timeSpec() == Qt::OffsetFromUTC && tz.offsetFromUtc(dt) != dt.offsetFromUtc()) { qCDebug(Log) << "UTC offset clashes with expected timezone!" << dt << dt.offsetFromUtc() << tz.id() << tz.offsetFromUtc(dt); return dt; } if (dt.timeSpec() == Qt::OffsetFromUTC || dt.timeSpec() == Qt::LocalTime) { dt.setTimeSpec(Qt::TimeZone); dt.setTimeZone(tz); } else if (dt.timeSpec() == Qt::UTC) { dt = dt.toTimeZone(tz); } return dt; } bool ExtractorPostprocessorPrivate::filterReservation(const QVariant &res) const { if (JsonLd::isA(res)) { return filterFlight(res.value().reservationFor().value()); } if (JsonLd::isA(res)) { return filterTrainTrip(res.value().reservationFor().value()); } if (JsonLd::isA(res)) { return filterBusTrip(res.value().reservationFor().value()); } if (JsonLd::isA(res)) { return filterLodgingReservation(res.value()); } if (JsonLd::isA(res)) { return filterEventReservation(res.value()); } if (JsonLd::isA(res)) { return filterFoodReservation(res.value()); } // types without specific filters yet if (JsonLd::isA(res) || JsonLd::isA(res) || JsonLd::isA(res)) { return true; } // unknown top-level type return false; } bool ExtractorPostprocessorPrivate::filterLodgingReservation(const LodgingReservation &res) const { return res.checkinTime().isValid() && res.checkoutTime().isValid(); } bool ExtractorPostprocessorPrivate::filterFlight(const Flight &flight) const { // this will be valid if either boarding time, departure time or departure day is set const auto validDate = flight.departureDay().isValid(); return filterAirport(flight.departureAirport()) && filterAirport(flight.arrivalAirport()) && validDate; } bool ExtractorPostprocessorPrivate::filterAirport(const Airport &airport) const { return !airport.iataCode().isEmpty() || !airport.name().isEmpty(); } bool ExtractorPostprocessorPrivate::filterTrainTrip(const TrainTrip &trip) const { return filterTrainOrBusStation(trip.departureStation()) && filterTrainOrBusStation(trip.arrivalStation()) && trip.departureDay().isValid(); } bool ExtractorPostprocessorPrivate::filterBusTrip(const BusTrip &trip) const { return filterTrainOrBusStation(trip.departureBusStop()) && filterTrainOrBusStation(trip.arrivalBusStop()) && trip.departureTime().isValid() && trip.arrivalTime().isValid(); } template bool ExtractorPostprocessorPrivate::filterTrainOrBusStation(const T &station) const { return !station.name().isEmpty(); } bool ExtractorPostprocessorPrivate::filterEventReservation(const EventReservation &res) const { const auto event = res.reservationFor().value(); return !event.name().isEmpty() && event.startDate().isValid(); } bool ExtractorPostprocessorPrivate::filterFoodReservation(const FoodEstablishmentReservation &res) const { return res.startTime().isValid(); }