diff --git a/src/jsapi/jsonld.cpp b/src/jsapi/jsonld.cpp index b0710f0..c5d66ca 100644 --- a/src/jsapi/jsonld.cpp +++ b/src/jsapi/jsonld.cpp @@ -1,265 +1,265 @@ /* Copyright (C) 2018 Volker Krause This program is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "jsonld.h" #include #include #include #include #include #include #include #include #include using namespace KItinerary; JsApi::JsonLd::JsonLd(QJSEngine* engine) : QObject(engine) , m_engine(engine) { } JsApi::JsonLd::~JsonLd() = default; QJSValue JsApi::JsonLd::newObject(const QString &typeName) const { auto v = m_engine->newObject(); v.setProperty(QStringLiteral("@type"), typeName); return v; } QJSValue JsApi::JsonLd::newPlace(const QString &type) const { const auto addr = newObject(QStringLiteral("PostalAddress")); const auto geo = newObject(QStringLiteral("GeoCoordinates")); auto p = newObject(type); p.setProperty(QStringLiteral("address"), addr); p.setProperty(QStringLiteral("geo"), geo); return p; } QJSValue JsApi::JsonLd::newFlightReservation() const { const auto dep = newObject(QStringLiteral("Airport")); const auto arr = newObject(QStringLiteral("Airport")); const auto airline = newObject(QStringLiteral("Airline")); const auto person = newObject(QStringLiteral("Person")); auto resFor = newObject(QStringLiteral("Flight")); resFor.setProperty(QStringLiteral("departureAirport"), dep); resFor.setProperty(QStringLiteral("arrivalAirport"), arr); resFor.setProperty(QStringLiteral("airline"), airline); const auto ticket = newObject(QStringLiteral("Ticket")); auto res = newObject(QStringLiteral("FlightReservation")); res.setProperty(QStringLiteral("reservationFor"), resFor); res.setProperty(QStringLiteral("underName"), person); res.setProperty(QStringLiteral("reservedTicket"), ticket); return res; } QJSValue JsApi::JsonLd::newTrainReservation() const { const auto dep = newObject(QStringLiteral("TrainStation")); const auto arr = newObject(QStringLiteral("TrainStation")); const auto person = newObject(QStringLiteral("Person")); const auto seat = newObject(QStringLiteral("Seat")); auto ticket = newObject(QStringLiteral("Ticket")); ticket.setProperty(QStringLiteral("ticketedSeat"), seat); auto resFor = newObject(QStringLiteral("TrainTrip")); resFor.setProperty(QStringLiteral("departureStation"), dep); resFor.setProperty(QStringLiteral("arrivalStation"), arr); auto res = newObject(QStringLiteral("TrainReservation")); res.setProperty(QStringLiteral("reservationFor"), resFor); res.setProperty(QStringLiteral("underName"), person); res.setProperty(QStringLiteral("reservedTicket"), ticket); return res; } QJSValue JsApi::JsonLd::newBusReservation() const { const auto dep = newPlace(QStringLiteral("BusStation")); const auto arr = newPlace(QStringLiteral("BusStation")); const auto person = newObject(QStringLiteral("Person")); const auto ticket = newObject(QStringLiteral("Ticket")); auto resFor = newObject(QStringLiteral("BusTrip")); resFor.setProperty(QStringLiteral("departureBusStop"), dep); resFor.setProperty(QStringLiteral("arrivalBusStop"), arr); auto res = newObject(QStringLiteral("BusReservation")); res.setProperty(QStringLiteral("reservationFor"), resFor); res.setProperty(QStringLiteral("underName"), person); res.setProperty(QStringLiteral("reservedTicket"), ticket); return res; } QJSValue JsApi::JsonLd::newLodgingReservation() const { const auto person = newObject(QStringLiteral("Person")); const auto resFor = newPlace(QStringLiteral("LodgingBusiness")); auto res = newObject(QStringLiteral("LodgingReservation")); res.setProperty(QStringLiteral("reservationFor"), resFor); res.setProperty(QStringLiteral("underName"), person); return res; } QJSValue KItinerary::JsApi::JsonLd::newEventReservation() const { auto resFor = newObject(QStringLiteral("Event")); resFor.setProperty(QStringLiteral("location"), newPlace(QStringLiteral("Place"))); auto res = newObject(QStringLiteral("EventReservation")); res.setProperty(QStringLiteral("reservationFor"), resFor); const auto ticket = newObject(QStringLiteral("Ticket")); res.setProperty(QStringLiteral("reservedTicket"), ticket); return res; } QDateTime JsApi::JsonLd::toDateTime(const QString &dtStr, const QString &format, const QString &localeName) const { QLocale locale(localeName); auto dt = locale.toDateTime(dtStr, format); // try harder for the "MMM" month format // QLocale expects the exact string in QLocale::shortMonthName(), while we often encounter a three // letter month identifier. For en_US that's the same, for Swedish it isn't though for example. So // let's try to fix up the month identifiers to the full short name. if (!dt.isValid() && format.contains(QLatin1String("MMM"))) { auto dtStrFixed = dtStr; for (int i = 1; i <= 12; ++i) { const auto monthName = locale.monthName(i, QLocale::ShortFormat); dtStrFixed.replace(monthName.left(3), monthName, Qt::CaseInsensitive); } dt = locale.toDateTime(dtStrFixed, format); } // try even harder for "MMM" month format // in the de_DE locale we encounter sometimes almost arbitrary abbreviations for month // names (eg. Mrz, Mär for März). So try to identify those and replace them with what QLocale // expects if (!dt.isValid() && format.contains(QLatin1String("MMM"))) { auto dtStrFixed = dtStr; for (int i = 1; i <= 12; ++i) { const auto monthName = locale.monthName(i, QLocale::LongFormat); const auto beginIdx = dtStr.indexOf(monthName.at(0)); if (beginIdx < 0) { continue; } auto endIdx = beginIdx; for (auto nameIdx = 0; endIdx < dtStr.size() && nameIdx < monthName.size(); ++nameIdx) { if (dtStr.at(endIdx).toCaseFolded() == monthName.at(nameIdx).toCaseFolded()) { ++endIdx; } } if (endIdx - beginIdx >= 3) { dtStrFixed.replace(beginIdx, endIdx - beginIdx, locale.monthName(i, QLocale::ShortFormat)); break; } } dt = locale.toDateTime(dtStrFixed, format); } if (!dt.isValid()) { return dt; } const bool hasFullYear = format.contains(QLatin1String("yyyy")); const bool hasYear = hasFullYear || format.contains(QLatin1String("yy")); - const bool hasMonth = format.contains(QLatin1String("M")); - const bool hasDay = format.contains(QLatin1String("d")); + const bool hasMonth = format.contains(QLatin1Char('M')); + const bool hasDay = format.contains(QLatin1Char('d')); // time only, set a default date if (!hasDay && !hasMonth && !hasYear) { dt.setDate({1970, 1, 1}); } // if the date does not contain a year number, determine that based on the context date, if set else if (!hasYear && m_contextDate.isValid()) { dt.setDate({m_contextDate.date().year(), dt.date().month(), dt.date().day()}); if (dt < m_contextDate) { dt = dt.addYears(1); } } // fix two-digit years ending up in the wrong century else if (!hasFullYear && dt.date().year() / 100 == 19) { dt = dt.addYears(100); } return dt; } QJSValue JsApi::JsonLd::toJson(const QVariant &v) const { if (v.canConvert>()) { return m_engine->toScriptValue(JsonLdDocument::toJson(v.value>())); } const auto json = JsonLdDocument::toJson(v); return m_engine->toScriptValue(json); } QJSValue JsApi::JsonLd::clone(const QJSValue& v) const { return m_engine->toScriptValue(v.toVariant()); } QJSValue JsApi::JsonLd::toGeoCoordinates(const QString &mapUrl) { QUrl url(mapUrl); if (url.host().contains(QLatin1String("google"))) { QRegularExpression regExp(QStringLiteral("[/=](-?\\d+\\.\\d+),(-?\\d+\\.\\d+)")); auto match = regExp.match(url.path()); if (!match.hasMatch()) { match = regExp.match(url.query()); } if (match.hasMatch()) { auto geo = m_engine->newObject(); geo.setProperty(QStringLiteral("@type"), QStringLiteral("GeoCoordinates")); geo.setProperty(QStringLiteral("latitude"), match.capturedRef(1).toDouble()); geo.setProperty(QStringLiteral("longitude"), match.capturedRef(2).toDouble()); return geo; } } return {}; } void JsApi::JsonLd::setContextDate(const QDateTime& dt) { m_contextDate = dt; } #include "moc_jsonld.cpp" diff --git a/src/structureddataextractor.cpp b/src/structureddataextractor.cpp index 8b96abd..c9b1e70 100644 --- a/src/structureddataextractor.cpp +++ b/src/structureddataextractor.cpp @@ -1,162 +1,162 @@ /* Copyright (c) 2017 Volker Krause This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "structureddataextractor_p.h" #include "htmldocument.h" #include "logging.h" #include #include #include #include #include using namespace KItinerary; static QByteArray fixupJson(const QByteArray &data) { auto output(data); // Eurowings doesn't put a comma between objects in top-level arrays... output.replace("}{", "},{"); return output; } static void parseJson(const QByteArray &data, QJsonArray &result) { QJsonParseError error; auto jsonDoc = QJsonDocument::fromJson(data, &error); if (jsonDoc.isNull()) { if (error.error != QJsonParseError::NoError) { // try to fix up common JSON encoding errors jsonDoc = QJsonDocument::fromJson(fixupJson(data)); } if (jsonDoc.isNull()) { qCDebug(Log).noquote() << data; qCDebug(Log) << error.errorString() << "at offset" << error.offset; return; } } if (jsonDoc.isArray()) { const auto jsonArray = jsonDoc.array(); std::copy(jsonArray.begin(), jsonArray.end(), std::back_inserter(result)); } else if (jsonDoc.isObject()) { result.push_back(jsonDoc.object()); } } static QString valueForItemProperty(const HtmlElement &elem) { // TODO see https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes/itemprop#Values const auto elemName = elem.name(); QString v; if (elemName == QLatin1String("meta")) { v = elem.attribute(QStringLiteral("content")); } else if (elemName == QLatin1String("time")) { v = elem.attribute(QStringLiteral("datetime")); - } else if (elemName == QLatin1String("link") || elemName == QLatin1String("a")) { + } else if (elemName == QLatin1String("link") || elemName == QLatin1Char('a')) { if (elem.hasAttribute(QStringLiteral("href"))) { v = elem.attribute(QStringLiteral("href")); } else if (elem.hasAttribute(QStringLiteral("content"))) { v = elem.attribute(QStringLiteral("content")); } else { v = elem.recursiveContent(); } } else { v = elem.recursiveContent(); } return v; } static void parseMicroData(const HtmlElement &elem, QJsonObject &obj) { auto child = elem.firstChild(); while (!child.isNull()) { const auto prop = child.attribute(QStringLiteral("itemprop")); const auto type = child.attribute(QStringLiteral("itemtype")); if (type.startsWith(QLatin1String("http://schema.org/"))) { QJsonObject subObj; parseMicroData(child, subObj); const QUrl typeUrl(type); subObj.insert(QStringLiteral("@type"), typeUrl.fileName()); obj.insert(prop, subObj); } else if (!prop.isEmpty()) { obj.insert(prop, valueForItemProperty(child)); } else { // skip intermediate nodes without Microdata annotations parseMicroData(child, obj); } child = child.nextSibling(); } } static void extractRecursive(const HtmlElement &elem, QJsonArray &result) { // JSON-LD if (elem.name() == QLatin1String("script") && elem.attribute(QStringLiteral("type")) == QLatin1String("application/ld+json")) { parseJson(elem.content().toUtf8(), result); return; } // Microdata const auto itemType = elem.attribute(QStringLiteral("itemtype")); if (itemType.startsWith(QLatin1String("http://schema.org/"))) { QJsonObject obj; parseMicroData(elem, obj); if (obj.isEmpty()) { return; } const QUrl typeUrl(itemType); obj.insert(QStringLiteral("@type"), typeUrl.fileName()); const auto itemProp = elem.attribute(QStringLiteral("itemprop")); if (!itemProp.isEmpty() && !result.isEmpty()) { // this is likely a child of our preceding sibling, but broken XML put it here auto parent = result.last().toObject(); parent.insert(itemProp, obj); result[result.size() - 1] = parent; } else { obj.insert(QStringLiteral("@context"), QStringLiteral("http://schema.org")); result.push_back(obj); } return; } // recurse otherwise auto child = elem.firstChild(); while (!child.isNull()) { extractRecursive(child, result); child = child.nextSibling(); } } QJsonArray StructuredDataExtractor::extract(HtmlDocument *doc) { Q_ASSERT(doc); QJsonArray result; if (doc->root().isNull()) { return result; } extractRecursive(doc->root(), result); return result; }