diff --git a/kdepim-addons.categories b/kdepim-addons.categories --- a/kdepim-addons.categories +++ b/kdepim-addons.categories @@ -25,4 +25,5 @@ org.kde.pim.imapfoldercompletion kdepim-addons (SieveEditor imap completion folder) org.kde.pim.regexpeditorlineedit kdepim-addons (SieveEditor Regexp Editor plugin) org.kde.pim.addresslocationeditorplugin kdepim-addons (kaddressbook contact editor) -org.kde.pim.pkpass ldepim-addons (pkpass formater) +org.kde.pim.pkpass kdepim-addons (pkpass formater) +org.kde.pim.messageviewer.semantic kdepim-addons (Semantic plugin) diff --git a/plugins/messageviewer/bodypartformatter/CMakeLists.txt b/plugins/messageviewer/bodypartformatter/CMakeLists.txt --- a/plugins/messageviewer/bodypartformatter/CMakeLists.txt +++ b/plugins/messageviewer/bodypartformatter/CMakeLists.txt @@ -6,3 +6,4 @@ add_subdirectory(calendar) add_subdirectory(gnupgwks) add_subdirectory(pkpass) +add_subdirectory(semantic) diff --git a/plugins/messageviewer/bodypartformatter/autotests/CMakeLists.txt b/plugins/messageviewer/bodypartformatter/autotests/CMakeLists.txt --- a/plugins/messageviewer/bodypartformatter/autotests/CMakeLists.txt +++ b/plugins/messageviewer/bodypartformatter/autotests/CMakeLists.txt @@ -42,3 +42,13 @@ endmacro () add_diff_bodyformatter_class_unittest(diffhighlightertest.cpp "../highlighter/highlighter.cpp") + +add_definitions(-DSOURCE_DIR="${CMAKE_CURRENT_SOURCE_DIR}" ) +include_directories(${CMAKE_CURRENT_BINARY_DIR}/../semantic/) +ecm_add_test( + structureddataextractortest.cpp + ${CMAKE_CURRENT_BINARY_DIR}/../semantic/semantic_debug.cpp + TEST_NAME structureddataextractortest + NAME_PREFIX "messageviewerplugins" + LINK_LIBRARIES Qt5::Test +) diff --git a/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-flight-reservation-json-ld.html b/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-flight-reservation-json-ld.html new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-flight-reservation-json-ld.html @@ -0,0 +1,34 @@ + + diff --git a/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-flight-reservation-json-ld.json b/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-flight-reservation-json-ld.json new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-flight-reservation-json-ld.json @@ -0,0 +1,31 @@ +[{ + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationNumber": "RXJ34P", + "reservationStatus": "http://schema.org/Confirmed", + "underName": { + "@type": "Person", + "name": "Eva Green" + }, + "reservationFor": { + "@type": "Flight", + "flightNumber": "110", + "airline": { + "@type": "Airline", + "name": "United", + "iataCode": "UA" + }, + "departureAirport": { + "@type": "Airport", + "name": "San Francisco Airport", + "iataCode": "SFO" + }, + "departureTime": "2027-03-04T20:15:00-08:00", + "arrivalAirport": { + "@type": "Airport", + "name": "John F. Kennedy International Airport", + "iataCode": "JFK" + }, + "arrivalTime": "2027-03-05T06:30:00-05:00" + } +}] diff --git a/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-flight-reservation-microdata.html b/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-flight-reservation-microdata.html new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-flight-reservation-microdata.html @@ -0,0 +1,25 @@ + +
+ + +
+ +
+
+ +
+ + +
+
+ + +
+ +
+ + +
+ +
+
diff --git a/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-flight-reservation-microdata.json b/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-flight-reservation-microdata.json new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-flight-reservation-microdata.json @@ -0,0 +1,32 @@ +[{ + "@context": "http://schema.org", + "@type": "FlightReservation", + "reservationNumber": "RXJ34P", + "reservationStatus": "http://schema.org/Confirmed", + "underName": { + "@type": "Person", + "name": "Eva Green" + }, + "reservationFor": { + "@type": "Flight", + "flightNumber": "110", + "airline": { + "@type": "Airline", + "name": "United", + "iataCode": "UA" + }, + "departureAirport": { + "@type": "Airport", + "name": "San Francisco Airport", + "iataCode": "SFO" + }, + "departureTime": "2027-03-04T20:15:00-08:00", + "arrivalAirport": { + "@type": "Airport", + "name": "John F. Kennedy International Airport", + "iataCode": "JFK" + }, + "arrivalTime": "2027-03-05T06:30:00-05:00" + } +}] + diff --git a/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-json-ld.html b/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-json-ld.html new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-json-ld.html @@ -0,0 +1,44 @@ + + + + +

+ Dear John, thanks for booking your Google I/O ticket with us. +

+

+ BOOKING DETAILS
+ Reservation number: IO12345
+ Order for: John Smith
+ Event: Google I/O 2013
+ Start time: May 15th 2013 8:00am PST
+ Venue: Moscone Center, 800 Howard St., San Francisco, CA 94103
+

+ + diff --git a/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-json-ld.json b/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-json-ld.json new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-json-ld.json @@ -0,0 +1,26 @@ +[{ + "@context": "http://schema.org", + "@type": "EventReservation", + "reservationNumber": "IO12345", + "underName": { + "@type": "Person", + "name": "John Smith" + }, + "reservationFor": { + "@type": "Event", + "name": "Google I/O 2013", + "startDate": "2013-05-15T08:30:00-08:00", + "location": { + "@type": "Place", + "name": "Moscone Center", + "address": { + "@type": "PostalAddress", + "streetAddress": "800 Howard St.", + "addressLocality": "San Francisco", + "addressRegion": "CA", + "postalCode": "94103", + "addressCountry": "US" + } + } + } +}] diff --git a/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-microdata-inline.html b/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-microdata-inline.html new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-microdata-inline.html @@ -0,0 +1,29 @@ + + + +

+ Dear John, thanks for booking your Google I/O ticket with us. +

+

+ BOOKING DETAILS
+ Reservation number: IO12345
+ Order for: + John Smith +
+

+ Event: Google I/O 2013
+
+ Venue: + Moscone Center + + 800 Howard St., + San Francisco, + CA, + 94103, + US + + +
+

+ + diff --git a/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-microdata-inline.json b/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-microdata-inline.json new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-microdata-inline.json @@ -0,0 +1,27 @@ +[{ + "@context": "http://schema.org", + "@type": "EventReservation", + "reservationNumber": "IO12345", + "underName": { + "@type": "Person", + "name": "John Smith" + }, + "reservationFor": { + "@type": "Event", + "name": "Google I/O 2013", + "startDate": "2013-05-15T08:30:00-08:00", + "location": { + "@type": "Place", + "name": "Moscone Center", + "address": { + "@type": "PostalAddress", + "streetAddress": "800 Howard St.", + "addressLocality": "San Francisco", + "addressRegion": "CA", + "postalCode": "94103", + "addressCountry": "US" + } + } + } +}] + diff --git a/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-microdata.html b/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-microdata.html new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-microdata.html @@ -0,0 +1,36 @@ + + + +
+ +
+ +
+
+ +
+
+

+ Dear John, thanks for booking your Google I/O ticket with us. +

+

+ BOOKING DETAILS
+ Reservation number: IO12345
+ Order for: John Smith
+ Event: Google I/O 2013
+ Start time: May 15th 2013 8:00am PST
+ Venue: Moscone Center, 800 Howard St., San Francisco, CA 94103
+

+ + diff --git a/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-microdata.json b/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-microdata.json new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/autotests/structureddata/google-microdata.json @@ -0,0 +1,26 @@ +[{ + "@context": "http://schema.org", + "@type": "EventReservation", + "reservationNumber": "IO12345", + "underName": { + "@type": "Person", + "name": "John Smith" + }, + "reservationFor": { + "@type": "Event", + "name": "Google I/O 2013", + "startDate": "2013-05-15T08:30:00-08:00", + "location": { + "@type": "Place", + "name": "Moscone Center", + "address": { + "@type": "PostalAddress", + "streetAddress": "800 Howard St.", + "addressLocality": "San Francisco", + "addressRegion": "CA", + "postalCode": "94103", + "addressCountry": "US" + } + } + } +}] diff --git a/plugins/messageviewer/bodypartformatter/autotests/structureddata/hotel-json-ld-fallback.html b/plugins/messageviewer/bodypartformatter/autotests/structureddata/hotel-json-ld-fallback.html new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/autotests/structureddata/hotel-json-ld-fallback.html @@ -0,0 +1,36 @@ + + + + Reservation Confirmation + + + +

random content
can be invalid + + diff --git a/plugins/messageviewer/bodypartformatter/autotests/structureddata/hotel-json-ld-fallback.json b/plugins/messageviewer/bodypartformatter/autotests/structureddata/hotel-json-ld-fallback.json new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/autotests/structureddata/hotel-json-ld-fallback.json @@ -0,0 +1,25 @@ +[{ + "@context": "http://schema.org", + "@type": "LodgingReservation", + "checkinDate": "2017-06-15T14:00:00+0:00", + "checkoutDate": "2017-06-18T11:00:00+0:00", + "reservationFor": { + "@type": "LodgingBusiness", + "address": { + "@type": "PostalAddress", + "addressCountry": "GB", + "addressLocality": "London", + "addressRegion": "LONDON", + "postalCode": "123 ABC", + "streetAddress": "10 Downing Street" + }, + "name": "Parser & Breaking Hotels", + "telephone": "+44-123-4-567-890" + }, + "reservationNumber": "KDEKDEKDE", + "reservationStatus": "http://schema.org/Confirmed", + "underName": { + "@type": "Person", + "name": "Volker Krause" + } +}] diff --git a/plugins/messageviewer/bodypartformatter/autotests/structureddata/lh-invalid-microdata.html b/plugins/messageviewer/bodypartformatter/autotests/structureddata/lh-invalid-microdata.html new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/autotests/structureddata/lh-invalid-microdata.html @@ -0,0 +1,116 @@ + + + + + + +

+ +
+ +
+
+ +
+ + +
+
+ + +
+ +
+ + +
+ +
+
+ +
+
+
+ +
+ +
+
+ +
+ + +
+
+ + +
+ +
+ + +
+ +
+
+ +
+
+
+ +
+ +
+
+ +
+ + +
+
+ + +
+ +
+ + +
+ +
+
+ +
+
+
+ +
+ +
+
+ +
+ + +
+
+ + +
+ +
+ + +
+ +
+
+ +
+
+ + + +lots of actual content
can be invalid to parse + + diff --git a/plugins/messageviewer/bodypartformatter/autotests/structureddata/lh-invalid-microdata.json b/plugins/messageviewer/bodypartformatter/autotests/structureddata/lh-invalid-microdata.json new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/autotests/structureddata/lh-invalid-microdata.json @@ -0,0 +1,136 @@ +[{ + "@context": "http://schema.org", + "@type": "FlightReservation", + "airplaneSeatClass": { + "@type": "AirplaneSeatClass", + "name": "ECONOMY" + }, + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "LH", + "name": "LUFTHANSA" + }, + "arrivalAirport": { + "@type": "Airport", + "iataCode": "MUC", + "name": "MUNICH DE MUNICH INTERNATIONAL" + }, + "arrivalTime": "2017-04-07T09:35:00+02:00", + "departureAirport": { + "@type": "Airport", + "iataCode": "TXL", + "name": "BERLIN DE TEGEL" + }, + "departureTime": "2017-04-07T08:30:00+02:00", + "flightNumber": "2029" + }, + "reservationNumber": "KDEKDE", + "underName": { + "@type": "Person", + "name": "VOLKER KRAUSE" + } +}, +{ + "@context": "http://schema.org", + "@type": "FlightReservation", + "airplaneSeatClass": { + "@type": "AirplaneSeatClass", + "name": "ECONOMY" + }, + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "LH", + "name": "LUFTHANSA" + }, + "arrivalAirport": { + "@type": "Airport", + "iataCode": "TLS", + "name": "TOULOUSE FR BLAGNAC" + }, + "arrivalTime": "2017-04-07T13:00:00+02:00", + "departureAirport": { + "@type": "Airport", + "iataCode": "MUC", + "name": "MUNICH DE MUNICH INTERNATIONAL" + }, + "departureTime": "2017-04-07T11:10:00+02:00", + "flightNumber": "2218" + }, + "reservationNumber": "KDEKDE", + "underName": { + "@type": "Person", + "name": "VOLKER KRAUSE" + } +}, +{ + "@context": "http://schema.org", + "@type": "FlightReservation", + "airplaneSeatClass": { + "@type": "AirplaneSeatClass", + "name": "ECONOMY" + }, + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "LH", + "name": "LUFTHANSA" + }, + "arrivalAirport": { + "@type": "Airport", + "iataCode": "MUC", + "name": "MUNICH DE MUNICH INTERNATIONAL" + }, + "arrivalTime": "2017-04-09T19:55:00+02:00", + "departureAirport": { + "@type": "Airport", + "iataCode": "TLS", + "name": "TOULOUSE FR BLAGNAC" + }, + "departureTime": "2017-04-09T18:15:00+02:00", + "flightNumber": "2221" + }, + "reservationNumber": "KDEKDE", + "underName": { + "@type": "Person", + "name": "VOLKER KRAUSE" + } +}, +{ + "@context": "http://schema.org", + "@type": "FlightReservation", + "airplaneSeatClass": { + "@type": "AirplaneSeatClass", + "name": "ECONOMY" + }, + "reservationFor": { + "@type": "Flight", + "airline": { + "@type": "Airline", + "iataCode": "LH", + "name": "LUFTHANSA" + }, + "arrivalAirport": { + "@type": "Airport", + "iataCode": "TXL", + "name": "BERLIN DE TEGEL" + }, + "arrivalTime": "2017-04-09T22:35:00+02:00", + "departureAirport": { + "@type": "Airport", + "iataCode": "MUC", + "name": "MUNICH DE MUNICH INTERNATIONAL" + }, + "departureTime": "2017-04-09T21:30:00+02:00", + "flightNumber": "2054" + }, + "reservationNumber": "KDEKDE", + "underName": { + "@type": "Person", + "name": "VOLKER KRAUSE" + } +}] diff --git a/plugins/messageviewer/bodypartformatter/autotests/structureddataextractortest.cpp b/plugins/messageviewer/bodypartformatter/autotests/structureddataextractortest.cpp new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/autotests/structureddataextractortest.cpp @@ -0,0 +1,72 @@ +/* + Copyright (c) 2017 Volker Krause + + This library is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published by + the Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + This library is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to the + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. +*/ + +#include "../semantic/structureddataextractor.cpp" + +#include +#include +#include +#include +#include + +class StructuredDataExtractorTest : public QObject +{ + Q_OBJECT +private Q_SLOTS: + void testExtract_data() + { + QTest::addColumn("inputFile"); + QTest::addColumn("jsonFile"); + + QDir dir(QStringLiteral(SOURCE_DIR "/structureddata")); + const auto lst = dir.entryList(QStringList(QStringLiteral("*.html")), QDir::Files | QDir::Readable | QDir::NoSymLinks); + for (const auto &file : lst) { + const auto refFile = dir.path() + QLatin1Char('/') + file.left(file.size() - 5) + QStringLiteral(".json"); + if (!QFile::exists(refFile)) { + qDebug() << "reference file" << refFile << "does not exist, skipping test file" << file; + continue; + } + QTest::newRow(file.toLatin1()) << QString(dir.path() + QLatin1Char('/') + file) << refFile; + } + } + + void testExtract() + { + QFETCH(QString, inputFile); + QFETCH(QString, jsonFile); + + StructuredDataExtractor extractor; + QFile f(inputFile); + QVERIFY(f.open(QFile::ReadOnly)); + extractor.parse(QString::fromUtf8(f.readAll())); + + QFile ref(jsonFile); + QVERIFY(ref.open(QFile::ReadOnly)); + const auto doc = QJsonDocument::fromJson(ref.readAll()); + QVERIFY(doc.isArray()); + + if (extractor.data() != doc.array()) + qDebug().noquote() << QJsonDocument(extractor.data()).toJson(); + QCOMPARE(extractor.data(), doc.array()); + } +}; + +QTEST_APPLESS_MAIN(StructuredDataExtractorTest) + +#include "structureddataextractortest.moc" diff --git a/plugins/messageviewer/bodypartformatter/semantic/CMakeLists.txt b/plugins/messageviewer/bodypartformatter/semantic/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/semantic/CMakeLists.txt @@ -0,0 +1,18 @@ +set(semantic_srcs + datatypes.cpp + jsonlddocument.cpp + semantic_plugin.cpp + semanticmemento.cpp + semanticprocessor.cpp + semanticrenderer.cpp + structureddataextractor.cpp +) +qt5_add_resources(semantic_srcs templates.qrc) +ecm_qt_declare_logging_category(semantic_srcs HEADER semantic_debug.h IDENTIFIER SEMANTIC_LOG CATEGORY_NAME org.kde.pim.messageviewer.semantic) + +add_library(messageviewer_bodypartformatter_semantic MODULE ${semantic_srcs}) +target_link_libraries(messageviewer_bodypartformatter_semantic + KF5::MessageViewer + Grantlee5::Templates +) +install(TARGETS messageviewer_bodypartformatter_semantic DESTINATION ${KDE_INSTALL_PLUGINDIR}/messageviewer/bodypartformatter) diff --git a/plugins/messageviewer/bodypartformatter/semantic/datatypes.h b/plugins/messageviewer/bodypartformatter/semantic/datatypes.h new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/semantic/datatypes.h @@ -0,0 +1,125 @@ +/* + Copyright (c) 2017 Volker Krause + + This library is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published by + the Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + This library is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to the + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. +*/ + +#ifndef DATATYPES_H +#define DATATYPES_H + +#include +#include +#include + +#define SEMANTIC_GADGET \ + Q_GADGET \ + Q_PROPERTY(QString className READ className CONSTANT) \ + inline QString className() const { return QString::fromUtf8(staticMetaObject.className()); } + +#define SEMANTIC_PROPERTY(Type, Name) \ + Q_PROPERTY(Type Name MEMBER m_##Name) \ + Type m_##Name; + +/** @file + * The classes in here could possibly be auto-generated from the ontology defined by http://schema.org... + */ + +class Airport +{ + SEMANTIC_GADGET + SEMANTIC_PROPERTY(QString, name) + SEMANTIC_PROPERTY(QString, iataCode) +public: + bool operator!=(const Airport &other) const; +}; + +class Airline { + SEMANTIC_GADGET + SEMANTIC_PROPERTY(QString, name) + SEMANTIC_PROPERTY(QString, iataCode) +public: + bool operator!=(const Airline &other) const; +}; + +class Flight +{ + SEMANTIC_GADGET + SEMANTIC_PROPERTY(QString, flightNumber) + SEMANTIC_PROPERTY(Airline, airline) + SEMANTIC_PROPERTY(Airport, departureAirport) + SEMANTIC_PROPERTY(QDateTime, departureTime) + SEMANTIC_PROPERTY(Airport, arrivalAirport) + SEMANTIC_PROPERTY(QDateTime, arrivalTime) + + Q_PROPERTY(QString departureTimeLocalized READ departureTimeLocalized CONSTANT) + Q_PROPERTY(QString arrivalTimeLocalized READ arrivalTimeLocalized CONSTANT) +private: + QString departureTimeLocalized() const; + QString arrivalTimeLocalized() const; +}; + +class PostalAddress +{ + SEMANTIC_GADGET + SEMANTIC_PROPERTY(QString, streetAddress) + SEMANTIC_PROPERTY(QString, addressLocality) + SEMANTIC_PROPERTY(QString, postalCode) + SEMANTIC_PROPERTY(QString, addressCountry) +}; + +class LodgingBusiness +{ + SEMANTIC_GADGET + SEMANTIC_PROPERTY(QString, name) + SEMANTIC_PROPERTY(QVariant, address) +}; + +class Reservation +{ + Q_GADGET + SEMANTIC_PROPERTY(QString, reservationNumber) + SEMANTIC_PROPERTY(QVariant, reservationFor) +}; + +class LodgingReservation : protected Reservation +{ + SEMANTIC_GADGET + SEMANTIC_PROPERTY(QDateTime, checkinDate) + SEMANTIC_PROPERTY(QDateTime, checkoutDate) + + Q_PROPERTY(QString checkinDateLocalized READ checkinDateLocalized CONSTANT) + Q_PROPERTY(QString checkoutDateLocalized READ checkoutDateLocalized CONSTANT) +private: + QString checkinDateLocalized() const; + QString checkoutDateLocalized() const; +}; + +class FlightReservation : protected Reservation +{ + SEMANTIC_GADGET +}; + +Q_DECLARE_METATYPE(Airport) +Q_DECLARE_METATYPE(Airline) +Q_DECLARE_METATYPE(Flight) +Q_DECLARE_METATYPE(LodgingBusiness) +Q_DECLARE_METATYPE(LodgingReservation) +Q_DECLARE_METATYPE(FlightReservation) +Q_DECLARE_METATYPE(PostalAddress) + +#undef SEMANTIC_GADGET + +#endif // DATATYPES_H diff --git a/plugins/messageviewer/bodypartformatter/semantic/datatypes.cpp b/plugins/messageviewer/bodypartformatter/semantic/datatypes.cpp new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/semantic/datatypes.cpp @@ -0,0 +1,61 @@ +/* + Copyright (c) 2017 Volker Krause + + This library is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published by + the Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + This library is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to the + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. +*/ + +#include "datatypes.h" + +#include +#include + +bool Airport::operator!=(const Airport &other) const +{ + return m_iataCode != other.m_iataCode && m_name != other.m_name; +} + +bool Airline::operator!=(const Airline &other) const +{ + return m_iataCode != other.m_iataCode && m_name != other.m_name; +} + +static QString localizedDateTime(const QDateTime &dt) +{ + auto s = QLocale().toString(dt, QLocale::ShortFormat); + if (dt.timeSpec() == Qt::TimeZone || dt.timeSpec() == Qt::OffsetFromUTC) + s += QLatin1Char(' ') + dt.timeZone().abbreviation(dt); + return s; +} + +QString Flight::departureTimeLocalized() const +{ + return localizedDateTime(m_departureTime); +} + +QString Flight::arrivalTimeLocalized() const +{ + return localizedDateTime(m_arrivalTime); +} + +QString LodgingReservation::checkinDateLocalized() const +{ + return QLocale().toString(m_checkinDate.date(), QLocale::ShortFormat); +} + +QString LodgingReservation::checkoutDateLocalized() const +{ + return QLocale().toString(m_checkoutDate.date(), QLocale::ShortFormat); +} diff --git a/plugins/messageviewer/bodypartformatter/semantic/jsonlddocument.h b/plugins/messageviewer/bodypartformatter/semantic/jsonlddocument.h new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/semantic/jsonlddocument.h @@ -0,0 +1,33 @@ +/* + Copyright (c) 2017 Volker Krause + + This library is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published by + the Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + This library is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to the + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. +*/ + +#ifndef JSONLDDOCUMENT_H +#define JSONLDDOCUMENT_H + +#include + +class QJsonArray; + +/** Serialization/deserialization code for JSON-LD data. */ +namespace JsonLdDocument +{ +QVariantList fromJson(const QJsonArray &array); +} + +#endif // JSONLDDOCUMENT_H diff --git a/plugins/messageviewer/bodypartformatter/semantic/jsonlddocument.cpp b/plugins/messageviewer/bodypartformatter/semantic/jsonlddocument.cpp new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/semantic/jsonlddocument.cpp @@ -0,0 +1,108 @@ +/* + Copyright (c) 2017 Volker Krause + + This library is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published by + the Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + This library is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to the + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. +*/ + +#include "jsonlddocument.h" +#include "datatypes.h" +#include "semantic_debug.h" + +#include +#include +#include +#include +#include + +static QVariant createInstance(const QJsonObject &obj); + +// Eurowings workarounds... +static const char* fallbackDateTimePattern[] = { + "yyyy-MM-dd HH:mm:ss", + "yyyy-MM-dd HH:mm", + "MM-dd-yyyy HH:mm" // yes, seriously ;( +}; +static const auto fallbackDateTimePatternCount = sizeof(fallbackDateTimePattern) / sizeof(const char*); + +static QVariant propertyValue(const QMetaProperty &prop, const QJsonValue &v) +{ + switch (prop.type()) { + case QVariant::String: + return v.toString(); + case QVariant::DateTime: + { + auto str = v.toString(); + auto dt = QDateTime::fromString(str, Qt::ISODate); + for (unsigned int i = 0; i < fallbackDateTimePatternCount && dt.isNull(); ++i) { + dt = QDateTime::fromString(str, QString::fromLatin1(fallbackDateTimePattern[i])); + } + if (dt.isNull()) + qCDebug(SEMANTIC_LOG) << "Datetime parsing failed for" << str; + return dt; + } + default: + break; + } + return createInstance(v.toObject()); +} + +template +static QVariant createInstance(const QJsonObject &obj) +{ + T t; + for (auto it = obj.begin(); it != obj.end(); ++it) { + if (it.key().startsWith(QLatin1Char('@'))) + continue; + const auto idx = T::staticMetaObject.indexOfProperty(it.key().toLatin1()); + if (idx < 0) { + qCDebug(SEMANTIC_LOG) << "property" << it.key() << "could not be set on object of type" << T::staticMetaObject.className(); + continue; + } + const auto prop = T::staticMetaObject.property(idx); + const auto value = propertyValue(prop, it.value()); + prop.writeOnGadget(&t, value); + } + return QVariant::fromValue(t); +} + +#define MAKE_FACTORY(Class) \ + if (type == QLatin1String(#Class)) \ + return createInstance(obj) + +static QVariant createInstance(const QJsonObject &obj) +{ + const auto type = obj.value(QLatin1String("@type")).toString(); + MAKE_FACTORY(Airline); + MAKE_FACTORY(Airport); + MAKE_FACTORY(FlightReservation); + MAKE_FACTORY(Flight); + MAKE_FACTORY(LodgingBusiness); + MAKE_FACTORY(LodgingReservation); + MAKE_FACTORY(PostalAddress); + return {}; +} + +#undef MAKE_FACTORY + +QVariantList JsonLdDocument::fromJson(const QJsonArray &array) +{ + QVariantList l; + for (const auto &obj : array) { + const auto v = createInstance(obj.toObject()); + if (!v.isNull()) + l.push_back(v); + } return l; +} diff --git a/plugins/messageviewer/bodypartformatter/semantic/semantic_plugin.cpp b/plugins/messageviewer/bodypartformatter/semantic/semantic_plugin.cpp new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/semantic/semantic_plugin.cpp @@ -0,0 +1,58 @@ +/* + Copyright (c) 2017 Volker Krause + + This library is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published by + the Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + This library is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to the + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. +*/ + +#include "semanticprocessor.h" +#include "semanticrenderer.h" + +#include +#include + +namespace { +class SemanticPlugin : public QObject, public MimeTreeParser::Interface::BodyPartFormatterPlugin, public MessageViewer::MessagePartRenderPlugin +{ + Q_OBJECT + Q_INTERFACES(MimeTreeParser::Interface::BodyPartFormatterPlugin) + Q_INTERFACES(MessageViewer::MessagePartRenderPlugin) + Q_PLUGIN_METADATA(IID "com.kde.messageviewer.bodypartformatter" FILE "semantic_plugin.json") +public: + SemanticPlugin() = default; + + const MimeTreeParser::Interface::BodyPartFormatter *bodyPartFormatter(int idx) const override + { + if (idx == 0) + return new SemanticProcessor(); + return nullptr; + } + + MessageViewer::MessagePartRendererBase *renderer(int idx) override + { + if (idx == 0) + return new SemanticRenderer(); + return nullptr; + } + + const MessageViewer::Interface::BodyPartURLHandler *urlHandler(int idx) const override + { + Q_UNUSED(idx); + return nullptr; + } +}; +} + +#include "semantic_plugin.moc" diff --git a/plugins/messageviewer/bodypartformatter/semantic/semantic_plugin.json b/plugins/messageviewer/bodypartformatter/semantic/semantic_plugin.json new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/semantic/semantic_plugin.json @@ -0,0 +1,8 @@ +{ + "formatter": [ + { "mimetype": "text/html" } + ], + "renderer": [ + { "type": "MimeTreeParser::MessagePartList" } + ] +} diff --git a/plugins/messageviewer/bodypartformatter/semantic/semanticmemento.h b/plugins/messageviewer/bodypartformatter/semantic/semanticmemento.h new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/semantic/semanticmemento.h @@ -0,0 +1,42 @@ +/* + Copyright (c) 2017 Volker Krause + + This library is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published by + the Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + This library is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to the + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. +*/ + +#ifndef SEMANTICMEMENTO_H +#define SEMANTICMEMENTO_H + +#include + +#include + +/** Memento holding the semantic information extracted for an email. */ +class SemanticMemento : public MimeTreeParser::Interface::BodyPartMemento +{ +public: + ~SemanticMemento(); + void detach() override; + bool isEmpty() const; + + QJsonArray data() const; + void setData(const QJsonArray &data); + +private: + QJsonArray m_data; +}; + +#endif // SEMANTICMEMENTO_H diff --git a/plugins/messageviewer/bodypartformatter/semantic/semanticmemento.cpp b/plugins/messageviewer/bodypartformatter/semantic/semanticmemento.cpp new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/semantic/semanticmemento.cpp @@ -0,0 +1,41 @@ +/* + Copyright (c) 2017 Volker Krause + + This library is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published by + the Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + This library is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to the + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. +*/ + +#include "semanticmemento.h" + +SemanticMemento::~SemanticMemento() = default; + +void SemanticMemento::detach() +{ +} + +bool SemanticMemento::isEmpty() const +{ + return m_data.isEmpty(); +} + +QJsonArray SemanticMemento::data() const +{ + return m_data; +} + +void SemanticMemento::setData(const QJsonArray &data) +{ + m_data = data; +} diff --git a/plugins/messageviewer/bodypartformatter/semantic/semanticprocessor.h b/plugins/messageviewer/bodypartformatter/semantic/semanticprocessor.h new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/semantic/semanticprocessor.h @@ -0,0 +1,34 @@ +/* + Copyright (c) 2017 Volker Krause + + This library is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published by + the Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + This library is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to the + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. +*/ + +#ifndef SEMANTICPROCESSOR_H +#define SEMANTICPROCESSOR_H + +#include +#include +#include + +/** Processor plugin for MimeTreeParser. */ +class SemanticProcessor : public MimeTreeParser::Interface::BodyPartFormatter +{ +public: + MimeTreeParser::MessagePart::Ptr process(MimeTreeParser::Interface::BodyPart &part) const override; +}; + +#endif // SEMANTICPROCESSOR_H diff --git a/plugins/messageviewer/bodypartformatter/semantic/semanticprocessor.cpp b/plugins/messageviewer/bodypartformatter/semantic/semanticprocessor.cpp new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/semantic/semanticprocessor.cpp @@ -0,0 +1,44 @@ +/* + Copyright (c) 2017 Volker Krause + + This library is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published by + the Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + This library is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to the + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. +*/ + +#include "semanticprocessor.h" +#include "structureddataextractor.h" +#include "semanticmemento.h" +#include "semantic_debug.h" + +#include + +MimeTreeParser::MessagePart::Ptr SemanticProcessor::process(MimeTreeParser::Interface::BodyPart &part) const +{ + auto nodeHelper = part.nodeHelper(); + if (!nodeHelper) + return {}; + auto memento = dynamic_cast(nodeHelper->bodyPartMemento(part.topLevelContent(), "org.kde.messageviewer.semanticData")); + if (memento) + return {}; + + qCDebug(SEMANTIC_LOG) << "-------------------------------------------- BEGIN SEMANTIC PARSING"; + StructuredDataExtractor extractor; + extractor.parse(part.content()->decodedText()); + memento = new SemanticMemento; + nodeHelper->setBodyPartMemento(part.topLevelContent(), "org.kde.messageviewer.semanticData", memento); + memento->setData(extractor.data()); + qCDebug(SEMANTIC_LOG) << "-------------------------------------------- END SEMANTIC PARSING"; + return {}; +} diff --git a/plugins/messageviewer/bodypartformatter/semantic/semanticrenderer.h b/plugins/messageviewer/bodypartformatter/semantic/semanticrenderer.h new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/semantic/semanticrenderer.h @@ -0,0 +1,33 @@ +/* + Copyright (c) 2017 Volker Krause + + This library is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published by + the Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + This library is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to the + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. +*/ + +#ifndef SEMANTICRENDERER_H +#define SEMANTICRENDERER_H + +#include + +/** Rendering plugin for semantic information about the email content. */ +class SemanticRenderer : public MessageViewer::MessagePartRendererBase +{ +public: + SemanticRenderer(); + bool render(const MimeTreeParser::MessagePartPtr &msgPart, MimeTreeParser::HtmlWriter *htmlWriter, MessageViewer::RenderContext *context) const override; +}; + +#endif // SEMANTICRENDERER_H diff --git a/plugins/messageviewer/bodypartformatter/semantic/semanticrenderer.cpp b/plugins/messageviewer/bodypartformatter/semantic/semanticrenderer.cpp new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/semantic/semanticrenderer.cpp @@ -0,0 +1,102 @@ +/* + Copyright (c) 2017 Volker Krause + + This library is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published by + the Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + This library is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to the + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. +*/ + +#include "semanticrenderer.h" +#include "datatypes.h" +#include "jsonlddocument.h" +#include "semanticmemento.h" +#include "semantic_debug.h" + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +// Grantlee has no Q_GADGET support yet +#define GRANTLEE_MAKE_GADGET(Class) \ + GRANTLEE_BEGIN_LOOKUP(Class) \ + const auto idx = Class::staticMetaObject.indexOfProperty(property.toUtf8().constData()); \ + if (idx < 0) \ + return {}; \ + const auto mp = Class::staticMetaObject.property(idx); \ + return mp.readOnGadget(&object); \ + GRANTLEE_END_LOOKUP + +GRANTLEE_MAKE_GADGET(Airport) +GRANTLEE_MAKE_GADGET(Airline) +GRANTLEE_MAKE_GADGET(Flight) +GRANTLEE_MAKE_GADGET(FlightReservation) +GRANTLEE_MAKE_GADGET(LodgingBusiness) +GRANTLEE_MAKE_GADGET(LodgingReservation) +GRANTLEE_MAKE_GADGET(PostalAddress) + +SemanticRenderer::SemanticRenderer() +{ + Grantlee::registerMetaType(); + Grantlee::registerMetaType(); + Grantlee::registerMetaType(); + Grantlee::registerMetaType(); + Grantlee::registerMetaType(); + Grantlee::registerMetaType(); + Grantlee::registerMetaType(); +} + +bool SemanticRenderer::render(const MimeTreeParser::MessagePartPtr &msgPart, MimeTreeParser::HtmlWriter *htmlWriter, MessageViewer::RenderContext *context) const +{ + Q_UNUSED(context); + const auto mpList = msgPart.dynamicCast(); + if (!msgPart->isRoot() || !mpList->hasSubParts()) + return false; + + qCDebug(SEMANTIC_LOG) << "========================================= Semantic Rendering"; + const auto node = mpList->subParts().at(0)->content(); + const auto nodeHelper = msgPart->nodeHelper(); + if (!nodeHelper || !node) + return false; + + auto memento = dynamic_cast(nodeHelper->bodyPartMemento(node->topLevel(), "org.kde.messageviewer.semanticData")); + if (!memento || memento->isEmpty()) + return false; + + const auto decodedData = JsonLdDocument::fromJson(memento->data()); + if (decodedData.isEmpty()) { + qCDebug(SEMANTIC_LOG) << "Unhandled content:" << QJsonDocument(memento->data()).toJson(); + return false; + } + + auto c = MessageViewer::MessagePartRendererManager::self()->createContext(); + c.insert(QStringLiteral("data"), decodedData); + + const auto pal = qGuiApp->palette(); + QVariantMap style; + style.insert(QStringLiteral("frameColor"), pal.link().color().name()); + c.insert(QStringLiteral("style"), style); + + auto t = MessageViewer::MessagePartRendererManager::self()->loadByName(QStringLiteral(":/org.kde.messageviewer/semantic/semantic.html")); + Grantlee::OutputStream s(htmlWriter->stream()); + t->render(&s, &c); + return false; // yes, false, we want the rest of the email rendered normally after this +} diff --git a/plugins/messageviewer/bodypartformatter/semantic/structureddataextractor.h b/plugins/messageviewer/bodypartformatter/semantic/structureddataextractor.h new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/semantic/structureddataextractor.h @@ -0,0 +1,53 @@ +/* + Copyright (c) 2017 Volker Krause + + This library is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published by + the Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + This library is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to the + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. +*/ + +#ifndef STRUCTUREDDATAEXTRACTOR_H +#define STRUCTUREDDATAEXTRACTOR_H + +#include + +class QJsonObject; +class QString; +class QXmlStreamReader; + +/** Extract schema.org structured data from HTML text. + * @see https://developers.google.com/gmail/markup/getting-started + */ +class StructuredDataExtractor +{ +public: + void parse(const QString &text); + QJsonArray data() const { return m_data; } + +private: + /** Try to parse using an actual XML parser. */ + void parseXml(const QString &text); + /** Try to find application/ld+json content with basic string search. */ + void findLdJson(const QString &text); + /** Try to fix some common HTML4 damage to make @p text consumable for parseXml(). */ + QString fixupHtml4(const QString &text) const; + /** Recursive microdata parsing. */ + QJsonObject parseMicroData(QXmlStreamReader &reader) const; + /** Element-dependent Microdata property value. */ + QString valueForItemProperty(QXmlStreamReader &reader) const; + + QJsonArray m_data; +}; + +#endif // STRUCTUREDDATAEXTRACTOR_H diff --git a/plugins/messageviewer/bodypartformatter/semantic/structureddataextractor.cpp b/plugins/messageviewer/bodypartformatter/semantic/structureddataextractor.cpp new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/semantic/structureddataextractor.cpp @@ -0,0 +1,174 @@ +/* + Copyright (c) 2017 Volker Krause + + This library is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published by + the Free Software Foundation; either version 2 of the License, or (at your + option) any later version. + + This library is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to the + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. +*/ + +#include "structureddataextractor.h" +#include "semantic_debug.h" + +#include +#include +#include +#include +#include +#include +#include + +void StructuredDataExtractor::parse(const QString &text) +{ + parseXml(text); + if (m_data.isEmpty()) { + findLdJson(text); + if (m_data.isEmpty()) { + parseXml(fixupHtml4(text)); + } + } +} + +void StructuredDataExtractor::parseXml(const QString &text) +{ + QXmlStreamReader reader(text); + while (!reader.atEnd()) { + if (reader.tokenType() == QXmlStreamReader::StartElement) { + // JSON-LD + if (reader.name() == QLatin1String("script") && reader.attributes().value(QLatin1String("type")) == QLatin1String("application/ld+json")) { + const auto jsonData = reader.readElementText(QXmlStreamReader::IncludeChildElements); + const auto jsonDoc = QJsonDocument::fromJson(jsonData.toUtf8()); + if (jsonDoc.isNull()) + continue; + if (jsonDoc.isArray()) + m_data.append(jsonDoc.array()); + else if (jsonDoc.isObject()) + m_data.push_back(jsonDoc.object()); + } + + // Microdata + const auto itemType = reader.attributes().value(QLatin1String("itemtype")).toString(); + if (itemType.startsWith(QLatin1String("http://schema.org/"))) { + auto obj = parseMicroData(reader); + if (obj.isEmpty()) + continue; + obj.insert(QStringLiteral("@context"), QStringLiteral("http://schema.org")); + const QUrl typeUrl(itemType); + obj.insert(QStringLiteral("@type"), typeUrl.fileName()); + m_data.push_back(obj); + continue; + } + + } + reader.readNext(); + } + + if (reader.hasError()) + qCDebug(SEMANTIC_LOG) << reader.errorString() << reader.lineNumber() << reader.columnNumber(); +} + +void StructuredDataExtractor::findLdJson(const QString &text) +{ + for (int i = 0; i < text.size();) { + i = text.indexOf(QLatin1String("'), i) + 1; + if (i < 0) + break; + i = text.indexOf(QLatin1String(""), begin, Qt::CaseInsensitive); + const auto jsonData = text.mid(begin, i - begin); + auto jsonDoc = QJsonDocument::fromJson(jsonData.toUtf8()); + if (jsonDoc.isNull()) + continue; + if (jsonDoc.isArray()) + m_data.append(jsonDoc.array()); + else if (jsonDoc.isObject()) + m_data.push_back(jsonDoc.object()); + } +} + +QString StructuredDataExtractor::fixupHtml4(const QString &text) const +{ + auto output(text); + + // close single-element tags + output.replace(QRegularExpression(QStringLiteral("(]*[^>/])>")), QStringLiteral("\\1/>")); + + // fix value-less attributes + output.replace(QRegularExpression(QStringLiteral("(<[^>]+ )itemscope( [^>]*>)")), QStringLiteral("\\1itemscope=\"\"\\2")); + + // TODO remove legacy entities like   + return output; +} + +QJsonObject StructuredDataExtractor::parseMicroData(QXmlStreamReader &reader) const +{ + QJsonObject obj; + reader.readNext(); + int depth = 1; + + while (!reader.atEnd()) { + if (reader.tokenType() == QXmlStreamReader::StartElement) { + ++depth; + const auto prop = reader.attributes().value(QLatin1String("itemprop")).toString(); + const auto type = reader.attributes().value(QLatin1String("itemtype")).toString(); + if (type.startsWith(QLatin1String("http://schema.org/"))) { + auto subObj = parseMicroData(reader); + const QUrl typeUrl(type); + subObj.insert(QStringLiteral("@type"), typeUrl.fileName()); + + obj.insert(prop, subObj); + continue; + } + if (!prop.isEmpty()) { + obj.insert(prop, valueForItemProperty(reader)); + continue; + } + + } else if (reader.tokenType() == QXmlStreamReader::EndElement) { + --depth; + if (depth == 0) + return obj; + } + reader.readNext(); + } + + if (reader.hasError()) + qCDebug(SEMANTIC_LOG) << reader.errorString() << reader.lineNumber() << reader.columnNumber(); + return {}; +} + +QString StructuredDataExtractor::valueForItemProperty(QXmlStreamReader &reader) const +{ + // TODO see https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes/itemprop#Values + const auto elemName = reader.name(); + if (elemName == QLatin1String("span")) + return reader.readElementText(QXmlStreamReader::IncludeChildElements); + + QString v; + if (elemName == QLatin1String("meta")) + v = reader.attributes().value(QLatin1String("content")).toString(); + else if (elemName == QLatin1String("time")) + v = reader.attributes().value(QLatin1String("datetime")).toString(); + else if (elemName == QLatin1String("link") || elemName == QLatin1String("a")) + v = reader.attributes().value(QLatin1String("href")).toString(); + else + qCDebug(SEMANTIC_LOG) << "TODO:" << elemName; + + reader.readNext(); + return v; +} diff --git a/plugins/messageviewer/bodypartformatter/semantic/templates.qrc b/plugins/messageviewer/bodypartformatter/semantic/templates.qrc new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/semantic/templates.qrc @@ -0,0 +1,7 @@ + + + templates/semantic.html + templates/flightreservation.html + templates/lodgingreservation.html + + diff --git a/plugins/messageviewer/bodypartformatter/semantic/templates/flightreservation.html b/plugins/messageviewer/bodypartformatter/semantic/templates/flightreservation.html new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/semantic/templates/flightreservation.html @@ -0,0 +1,12 @@ + + + + + + + + + + + +
{{ item.reservationFor.departureAirport.iataCode }}{{ item.reservationFor.arrivalAirport.iataCode }}
{{ item.reservationFor.departureTimeLocalized }}{{ item.reservationFor.airline.iataCode }} {{ item.reservationFor.flightNumber }}{{ item.reservationFor.arrivalTimeLocalized }}
diff --git a/plugins/messageviewer/bodypartformatter/semantic/templates/lodgingreservation.html b/plugins/messageviewer/bodypartformatter/semantic/templates/lodgingreservation.html new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/semantic/templates/lodgingreservation.html @@ -0,0 +1,4 @@ +
{{ item.reservationFor.name }} {{ item.checkinDateLocalized }} - {{ item.checkoutDateLocalized }}
+
{{ item.reservationFor.address.streetAddress }}
+
{{ item.reservationFor.address.postalCode }} {{ item.reservationFor.address.addressLocality }}
+
{{ item.reservationFor.address.addressCountry }}
diff --git a/plugins/messageviewer/bodypartformatter/semantic/templates/semantic.html b/plugins/messageviewer/bodypartformatter/semantic/templates/semantic.html new file mode 100644 --- /dev/null +++ b/plugins/messageviewer/bodypartformatter/semantic/templates/semantic.html @@ -0,0 +1,12 @@ +
+{% for item in data %} + {% if item.className == "FlightReservation" %} + {% include ":/org.kde.messageviewer/semantic/flightreservation.html" %} + {% elif item.className == "LodgingReservation" %} + {% include ":/org.kde.messageviewer/semantic/lodgingreservation.html" %} + {% endif %} + {% if not forloop.last %} +
+ {% endif %} +{% endfor %} +