+ Dear John, thanks for booking your Google I/O ticket with us.
+
+
+ BOOKING DETAILS
+ Reservation number: IO12345
+ Order for: John Smith
+ Event: Google I/O 2013
+ Start time: May 15th 2013 8:00am PST
+ Venue: Moscone Center, 800 Howard St., San Francisco, CA 94103
+
+ Dear John, thanks for booking your Google I/O ticket with us.
+
+
+ BOOKING DETAILS
+ Reservation number: IO12345
+ Order for: John Smith
+ Event: Google I/O 2013
+ Start time: May 15th 2013 8:00am PST
+ Venue: Moscone Center, 800 Howard St., San Francisco, CA 94103
+
+
+
+
+lots of actual content can be invalid to parse
+
+
diff --git a/plugins/messageviewer/bodypartformatter/autotests/structureddata/lh-invalid-microdata.json b/plugins/messageviewer/bodypartformatter/autotests/structureddata/lh-invalid-microdata.json
new file mode 100644
--- /dev/null
+++ b/plugins/messageviewer/bodypartformatter/autotests/structureddata/lh-invalid-microdata.json
@@ -0,0 +1,136 @@
+[{
+ "@context": "http://schema.org",
+ "@type": "FlightReservation",
+ "airplaneSeatClass": {
+ "@type": "AirplaneSeatClass",
+ "name": "ECONOMY"
+ },
+ "reservationFor": {
+ "@type": "Flight",
+ "airline": {
+ "@type": "Airline",
+ "iataCode": "LH",
+ "name": "LUFTHANSA"
+ },
+ "arrivalAirport": {
+ "@type": "Airport",
+ "iataCode": "MUC",
+ "name": "MUNICH DE MUNICH INTERNATIONAL"
+ },
+ "arrivalTime": "2017-04-07T09:35:00+02:00",
+ "departureAirport": {
+ "@type": "Airport",
+ "iataCode": "TXL",
+ "name": "BERLIN DE TEGEL"
+ },
+ "departureTime": "2017-04-07T08:30:00+02:00",
+ "flightNumber": "2029"
+ },
+ "reservationNumber": "KDEKDE",
+ "underName": {
+ "@type": "Person",
+ "name": "VOLKER KRAUSE"
+ }
+},
+{
+ "@context": "http://schema.org",
+ "@type": "FlightReservation",
+ "airplaneSeatClass": {
+ "@type": "AirplaneSeatClass",
+ "name": "ECONOMY"
+ },
+ "reservationFor": {
+ "@type": "Flight",
+ "airline": {
+ "@type": "Airline",
+ "iataCode": "LH",
+ "name": "LUFTHANSA"
+ },
+ "arrivalAirport": {
+ "@type": "Airport",
+ "iataCode": "TLS",
+ "name": "TOULOUSE FR BLAGNAC"
+ },
+ "arrivalTime": "2017-04-07T13:00:00+02:00",
+ "departureAirport": {
+ "@type": "Airport",
+ "iataCode": "MUC",
+ "name": "MUNICH DE MUNICH INTERNATIONAL"
+ },
+ "departureTime": "2017-04-07T11:10:00+02:00",
+ "flightNumber": "2218"
+ },
+ "reservationNumber": "KDEKDE",
+ "underName": {
+ "@type": "Person",
+ "name": "VOLKER KRAUSE"
+ }
+},
+{
+ "@context": "http://schema.org",
+ "@type": "FlightReservation",
+ "airplaneSeatClass": {
+ "@type": "AirplaneSeatClass",
+ "name": "ECONOMY"
+ },
+ "reservationFor": {
+ "@type": "Flight",
+ "airline": {
+ "@type": "Airline",
+ "iataCode": "LH",
+ "name": "LUFTHANSA"
+ },
+ "arrivalAirport": {
+ "@type": "Airport",
+ "iataCode": "MUC",
+ "name": "MUNICH DE MUNICH INTERNATIONAL"
+ },
+ "arrivalTime": "2017-04-09T19:55:00+02:00",
+ "departureAirport": {
+ "@type": "Airport",
+ "iataCode": "TLS",
+ "name": "TOULOUSE FR BLAGNAC"
+ },
+ "departureTime": "2017-04-09T18:15:00+02:00",
+ "flightNumber": "2221"
+ },
+ "reservationNumber": "KDEKDE",
+ "underName": {
+ "@type": "Person",
+ "name": "VOLKER KRAUSE"
+ }
+},
+{
+ "@context": "http://schema.org",
+ "@type": "FlightReservation",
+ "airplaneSeatClass": {
+ "@type": "AirplaneSeatClass",
+ "name": "ECONOMY"
+ },
+ "reservationFor": {
+ "@type": "Flight",
+ "airline": {
+ "@type": "Airline",
+ "iataCode": "LH",
+ "name": "LUFTHANSA"
+ },
+ "arrivalAirport": {
+ "@type": "Airport",
+ "iataCode": "TXL",
+ "name": "BERLIN DE TEGEL"
+ },
+ "arrivalTime": "2017-04-09T22:35:00+02:00",
+ "departureAirport": {
+ "@type": "Airport",
+ "iataCode": "MUC",
+ "name": "MUNICH DE MUNICH INTERNATIONAL"
+ },
+ "departureTime": "2017-04-09T21:30:00+02:00",
+ "flightNumber": "2054"
+ },
+ "reservationNumber": "KDEKDE",
+ "underName": {
+ "@type": "Person",
+ "name": "VOLKER KRAUSE"
+ }
+}]
diff --git a/plugins/messageviewer/bodypartformatter/autotests/structureddataextractortest.cpp b/plugins/messageviewer/bodypartformatter/autotests/structureddataextractortest.cpp
new file mode 100644
--- /dev/null
+++ b/plugins/messageviewer/bodypartformatter/autotests/structureddataextractortest.cpp
@@ -0,0 +1,72 @@
+/*
+ Copyright (c) 2017 Volker Krause
+
+ This library is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ This library is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ License for more details.
+
+ You should have received a copy of the GNU Library General Public License
+ along with this library; see the file COPYING.LIB. If not, write to the
+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+*/
+
+#include "../semantic/structureddataextractor.cpp"
+
+#include
+#include
+#include
+#include
+#include
+
+class StructuredDataExtractorTest : public QObject
+{
+ Q_OBJECT
+private Q_SLOTS:
+ void testExtract_data()
+ {
+ QTest::addColumn("inputFile");
+ QTest::addColumn("jsonFile");
+
+ QDir dir(QStringLiteral(SOURCE_DIR "/structureddata"));
+ const auto lst = dir.entryList(QStringList(QStringLiteral("*.html")), QDir::Files | QDir::Readable | QDir::NoSymLinks);
+ for (const auto &file : lst) {
+ const auto refFile = dir.path() + QLatin1Char('/') + file.left(file.size() - 5) + QStringLiteral(".json");
+ if (!QFile::exists(refFile)) {
+ qDebug() << "reference file" << refFile << "does not exist, skipping test file" << file;
+ continue;
+ }
+ QTest::newRow(file.toLatin1()) << QString(dir.path() + QLatin1Char('/') + file) << refFile;
+ }
+ }
+
+ void testExtract()
+ {
+ QFETCH(QString, inputFile);
+ QFETCH(QString, jsonFile);
+
+ StructuredDataExtractor extractor;
+ QFile f(inputFile);
+ QVERIFY(f.open(QFile::ReadOnly));
+ extractor.parse(QString::fromUtf8(f.readAll()));
+
+ QFile ref(jsonFile);
+ QVERIFY(ref.open(QFile::ReadOnly));
+ const auto doc = QJsonDocument::fromJson(ref.readAll());
+ QVERIFY(doc.isArray());
+
+ if (extractor.data() != doc.array())
+ qDebug().noquote() << QJsonDocument(extractor.data()).toJson();
+ QCOMPARE(extractor.data(), doc.array());
+ }
+};
+
+QTEST_APPLESS_MAIN(StructuredDataExtractorTest)
+
+#include "structureddataextractortest.moc"
diff --git a/plugins/messageviewer/bodypartformatter/semantic/CMakeLists.txt b/plugins/messageviewer/bodypartformatter/semantic/CMakeLists.txt
new file mode 100644
--- /dev/null
+++ b/plugins/messageviewer/bodypartformatter/semantic/CMakeLists.txt
@@ -0,0 +1,18 @@
+set(semantic_srcs
+ datatypes.cpp
+ jsonlddocument.cpp
+ semantic_plugin.cpp
+ semanticmemento.cpp
+ semanticprocessor.cpp
+ semanticrenderer.cpp
+ structureddataextractor.cpp
+)
+qt5_add_resources(semantic_srcs templates.qrc)
+ecm_qt_declare_logging_category(semantic_srcs HEADER semantic_debug.h IDENTIFIER SEMANTIC_LOG CATEGORY_NAME org.kde.pim.messageviewer.semantic)
+
+add_library(messageviewer_bodypartformatter_semantic MODULE ${semantic_srcs})
+target_link_libraries(messageviewer_bodypartformatter_semantic
+ KF5::MessageViewer
+ Grantlee5::Templates
+)
+install(TARGETS messageviewer_bodypartformatter_semantic DESTINATION ${KDE_INSTALL_PLUGINDIR}/messageviewer/bodypartformatter)
diff --git a/plugins/messageviewer/bodypartformatter/semantic/datatypes.h b/plugins/messageviewer/bodypartformatter/semantic/datatypes.h
new file mode 100644
--- /dev/null
+++ b/plugins/messageviewer/bodypartformatter/semantic/datatypes.h
@@ -0,0 +1,125 @@
+/*
+ Copyright (c) 2017 Volker Krause
+
+ This library is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ This library is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ License for more details.
+
+ You should have received a copy of the GNU Library General Public License
+ along with this library; see the file COPYING.LIB. If not, write to the
+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+*/
+
+#ifndef DATATYPES_H
+#define DATATYPES_H
+
+#include
+#include
+#include
+
+#define SEMANTIC_GADGET \
+ Q_GADGET \
+ Q_PROPERTY(QString className READ className CONSTANT) \
+ inline QString className() const { return QString::fromUtf8(staticMetaObject.className()); }
+
+#define SEMANTIC_PROPERTY(Type, Name) \
+ Q_PROPERTY(Type Name MEMBER m_##Name) \
+ Type m_##Name;
+
+/** @file
+ * The classes in here could possibly be auto-generated from the ontology defined by http://schema.org...
+ */
+
+class Airport
+{
+ SEMANTIC_GADGET
+ SEMANTIC_PROPERTY(QString, name)
+ SEMANTIC_PROPERTY(QString, iataCode)
+public:
+ bool operator!=(const Airport &other) const;
+};
+
+class Airline {
+ SEMANTIC_GADGET
+ SEMANTIC_PROPERTY(QString, name)
+ SEMANTIC_PROPERTY(QString, iataCode)
+public:
+ bool operator!=(const Airline &other) const;
+};
+
+class Flight
+{
+ SEMANTIC_GADGET
+ SEMANTIC_PROPERTY(QString, flightNumber)
+ SEMANTIC_PROPERTY(Airline, airline)
+ SEMANTIC_PROPERTY(Airport, departureAirport)
+ SEMANTIC_PROPERTY(QDateTime, departureTime)
+ SEMANTIC_PROPERTY(Airport, arrivalAirport)
+ SEMANTIC_PROPERTY(QDateTime, arrivalTime)
+
+ Q_PROPERTY(QString departureTimeLocalized READ departureTimeLocalized CONSTANT)
+ Q_PROPERTY(QString arrivalTimeLocalized READ arrivalTimeLocalized CONSTANT)
+private:
+ QString departureTimeLocalized() const;
+ QString arrivalTimeLocalized() const;
+};
+
+class PostalAddress
+{
+ SEMANTIC_GADGET
+ SEMANTIC_PROPERTY(QString, streetAddress)
+ SEMANTIC_PROPERTY(QString, addressLocality)
+ SEMANTIC_PROPERTY(QString, postalCode)
+ SEMANTIC_PROPERTY(QString, addressCountry)
+};
+
+class LodgingBusiness
+{
+ SEMANTIC_GADGET
+ SEMANTIC_PROPERTY(QString, name)
+ SEMANTIC_PROPERTY(QVariant, address)
+};
+
+class Reservation
+{
+ Q_GADGET
+ SEMANTIC_PROPERTY(QString, reservationNumber)
+ SEMANTIC_PROPERTY(QVariant, reservationFor)
+};
+
+class LodgingReservation : protected Reservation
+{
+ SEMANTIC_GADGET
+ SEMANTIC_PROPERTY(QDateTime, checkinDate)
+ SEMANTIC_PROPERTY(QDateTime, checkoutDate)
+
+ Q_PROPERTY(QString checkinDateLocalized READ checkinDateLocalized CONSTANT)
+ Q_PROPERTY(QString checkoutDateLocalized READ checkoutDateLocalized CONSTANT)
+private:
+ QString checkinDateLocalized() const;
+ QString checkoutDateLocalized() const;
+};
+
+class FlightReservation : protected Reservation
+{
+ SEMANTIC_GADGET
+};
+
+Q_DECLARE_METATYPE(Airport)
+Q_DECLARE_METATYPE(Airline)
+Q_DECLARE_METATYPE(Flight)
+Q_DECLARE_METATYPE(LodgingBusiness)
+Q_DECLARE_METATYPE(LodgingReservation)
+Q_DECLARE_METATYPE(FlightReservation)
+Q_DECLARE_METATYPE(PostalAddress)
+
+#undef SEMANTIC_GADGET
+
+#endif // DATATYPES_H
diff --git a/plugins/messageviewer/bodypartformatter/semantic/datatypes.cpp b/plugins/messageviewer/bodypartformatter/semantic/datatypes.cpp
new file mode 100644
--- /dev/null
+++ b/plugins/messageviewer/bodypartformatter/semantic/datatypes.cpp
@@ -0,0 +1,61 @@
+/*
+ Copyright (c) 2017 Volker Krause
+
+ This library is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ This library is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ License for more details.
+
+ You should have received a copy of the GNU Library General Public License
+ along with this library; see the file COPYING.LIB. If not, write to the
+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+*/
+
+#include "datatypes.h"
+
+#include
+#include
+
+bool Airport::operator!=(const Airport &other) const
+{
+ return m_iataCode != other.m_iataCode && m_name != other.m_name;
+}
+
+bool Airline::operator!=(const Airline &other) const
+{
+ return m_iataCode != other.m_iataCode && m_name != other.m_name;
+}
+
+static QString localizedDateTime(const QDateTime &dt)
+{
+ auto s = QLocale().toString(dt, QLocale::ShortFormat);
+ if (dt.timeSpec() == Qt::TimeZone || dt.timeSpec() == Qt::OffsetFromUTC)
+ s += QLatin1Char(' ') + dt.timeZone().abbreviation(dt);
+ return s;
+}
+
+QString Flight::departureTimeLocalized() const
+{
+ return localizedDateTime(m_departureTime);
+}
+
+QString Flight::arrivalTimeLocalized() const
+{
+ return localizedDateTime(m_arrivalTime);
+}
+
+QString LodgingReservation::checkinDateLocalized() const
+{
+ return QLocale().toString(m_checkinDate.date(), QLocale::ShortFormat);
+}
+
+QString LodgingReservation::checkoutDateLocalized() const
+{
+ return QLocale().toString(m_checkoutDate.date(), QLocale::ShortFormat);
+}
diff --git a/plugins/messageviewer/bodypartformatter/semantic/jsonlddocument.h b/plugins/messageviewer/bodypartformatter/semantic/jsonlddocument.h
new file mode 100644
--- /dev/null
+++ b/plugins/messageviewer/bodypartformatter/semantic/jsonlddocument.h
@@ -0,0 +1,33 @@
+/*
+ Copyright (c) 2017 Volker Krause
+
+ This library is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ This library is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ License for more details.
+
+ You should have received a copy of the GNU Library General Public License
+ along with this library; see the file COPYING.LIB. If not, write to the
+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+*/
+
+#ifndef JSONLDDOCUMENT_H
+#define JSONLDDOCUMENT_H
+
+#include
+
+class QJsonArray;
+
+/** Serialization/deserialization code for JSON-LD data. */
+namespace JsonLdDocument
+{
+QVariantList fromJson(const QJsonArray &array);
+}
+
+#endif // JSONLDDOCUMENT_H
diff --git a/plugins/messageviewer/bodypartformatter/semantic/jsonlddocument.cpp b/plugins/messageviewer/bodypartformatter/semantic/jsonlddocument.cpp
new file mode 100644
--- /dev/null
+++ b/plugins/messageviewer/bodypartformatter/semantic/jsonlddocument.cpp
@@ -0,0 +1,108 @@
+/*
+ Copyright (c) 2017 Volker Krause
+
+ This library is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ This library is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ License for more details.
+
+ You should have received a copy of the GNU Library General Public License
+ along with this library; see the file COPYING.LIB. If not, write to the
+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+*/
+
+#include "jsonlddocument.h"
+#include "datatypes.h"
+#include "semantic_debug.h"
+
+#include
+#include
+#include
+#include
+#include
+
+static QVariant createInstance(const QJsonObject &obj);
+
+// Eurowings workarounds...
+static const char* fallbackDateTimePattern[] = {
+ "yyyy-MM-dd HH:mm:ss",
+ "yyyy-MM-dd HH:mm",
+ "MM-dd-yyyy HH:mm" // yes, seriously ;(
+};
+static const auto fallbackDateTimePatternCount = sizeof(fallbackDateTimePattern) / sizeof(const char*);
+
+static QVariant propertyValue(const QMetaProperty &prop, const QJsonValue &v)
+{
+ switch (prop.type()) {
+ case QVariant::String:
+ return v.toString();
+ case QVariant::DateTime:
+ {
+ auto str = v.toString();
+ auto dt = QDateTime::fromString(str, Qt::ISODate);
+ for (unsigned int i = 0; i < fallbackDateTimePatternCount && dt.isNull(); ++i) {
+ dt = QDateTime::fromString(str, QString::fromLatin1(fallbackDateTimePattern[i]));
+ }
+ if (dt.isNull())
+ qCDebug(SEMANTIC_LOG) << "Datetime parsing failed for" << str;
+ return dt;
+ }
+ default:
+ break;
+ }
+ return createInstance(v.toObject());
+}
+
+template
+static QVariant createInstance(const QJsonObject &obj)
+{
+ T t;
+ for (auto it = obj.begin(); it != obj.end(); ++it) {
+ if (it.key().startsWith(QLatin1Char('@')))
+ continue;
+ const auto idx = T::staticMetaObject.indexOfProperty(it.key().toLatin1());
+ if (idx < 0) {
+ qCDebug(SEMANTIC_LOG) << "property" << it.key() << "could not be set on object of type" << T::staticMetaObject.className();
+ continue;
+ }
+ const auto prop = T::staticMetaObject.property(idx);
+ const auto value = propertyValue(prop, it.value());
+ prop.writeOnGadget(&t, value);
+ }
+ return QVariant::fromValue(t);
+}
+
+#define MAKE_FACTORY(Class) \
+ if (type == QLatin1String(#Class)) \
+ return createInstance(obj)
+
+static QVariant createInstance(const QJsonObject &obj)
+{
+ const auto type = obj.value(QLatin1String("@type")).toString();
+ MAKE_FACTORY(Airline);
+ MAKE_FACTORY(Airport);
+ MAKE_FACTORY(FlightReservation);
+ MAKE_FACTORY(Flight);
+ MAKE_FACTORY(LodgingBusiness);
+ MAKE_FACTORY(LodgingReservation);
+ MAKE_FACTORY(PostalAddress);
+ return {};
+}
+
+#undef MAKE_FACTORY
+
+QVariantList JsonLdDocument::fromJson(const QJsonArray &array)
+{
+ QVariantList l;
+ for (const auto &obj : array) {
+ const auto v = createInstance(obj.toObject());
+ if (!v.isNull())
+ l.push_back(v);
+ } return l;
+}
diff --git a/plugins/messageviewer/bodypartformatter/semantic/semantic_plugin.cpp b/plugins/messageviewer/bodypartformatter/semantic/semantic_plugin.cpp
new file mode 100644
--- /dev/null
+++ b/plugins/messageviewer/bodypartformatter/semantic/semantic_plugin.cpp
@@ -0,0 +1,58 @@
+/*
+ Copyright (c) 2017 Volker Krause
+
+ This library is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ This library is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ License for more details.
+
+ You should have received a copy of the GNU Library General Public License
+ along with this library; see the file COPYING.LIB. If not, write to the
+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+*/
+
+#include "semanticprocessor.h"
+#include "semanticrenderer.h"
+
+#include
+#include
+
+namespace {
+class SemanticPlugin : public QObject, public MimeTreeParser::Interface::BodyPartFormatterPlugin, public MessageViewer::MessagePartRenderPlugin
+{
+ Q_OBJECT
+ Q_INTERFACES(MimeTreeParser::Interface::BodyPartFormatterPlugin)
+ Q_INTERFACES(MessageViewer::MessagePartRenderPlugin)
+ Q_PLUGIN_METADATA(IID "com.kde.messageviewer.bodypartformatter" FILE "semantic_plugin.json")
+public:
+ SemanticPlugin() = default;
+
+ const MimeTreeParser::Interface::BodyPartFormatter *bodyPartFormatter(int idx) const override
+ {
+ if (idx == 0)
+ return new SemanticProcessor();
+ return nullptr;
+ }
+
+ MessageViewer::MessagePartRendererBase *renderer(int idx) override
+ {
+ if (idx == 0)
+ return new SemanticRenderer();
+ return nullptr;
+ }
+
+ const MessageViewer::Interface::BodyPartURLHandler *urlHandler(int idx) const override
+ {
+ Q_UNUSED(idx);
+ return nullptr;
+ }
+};
+}
+
+#include "semantic_plugin.moc"
diff --git a/plugins/messageviewer/bodypartformatter/semantic/semantic_plugin.json b/plugins/messageviewer/bodypartformatter/semantic/semantic_plugin.json
new file mode 100644
--- /dev/null
+++ b/plugins/messageviewer/bodypartformatter/semantic/semantic_plugin.json
@@ -0,0 +1,8 @@
+{
+ "formatter": [
+ { "mimetype": "text/html" }
+ ],
+ "renderer": [
+ { "type": "MimeTreeParser::MessagePartList" }
+ ]
+}
diff --git a/plugins/messageviewer/bodypartformatter/semantic/semanticmemento.h b/plugins/messageviewer/bodypartformatter/semantic/semanticmemento.h
new file mode 100644
--- /dev/null
+++ b/plugins/messageviewer/bodypartformatter/semantic/semanticmemento.h
@@ -0,0 +1,42 @@
+/*
+ Copyright (c) 2017 Volker Krause
+
+ This library is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ This library is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ License for more details.
+
+ You should have received a copy of the GNU Library General Public License
+ along with this library; see the file COPYING.LIB. If not, write to the
+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+*/
+
+#ifndef SEMANTICMEMENTO_H
+#define SEMANTICMEMENTO_H
+
+#include
+
+#include
+
+/** Memento holding the semantic information extracted for an email. */
+class SemanticMemento : public MimeTreeParser::Interface::BodyPartMemento
+{
+public:
+ ~SemanticMemento();
+ void detach() override;
+ bool isEmpty() const;
+
+ QJsonArray data() const;
+ void setData(const QJsonArray &data);
+
+private:
+ QJsonArray m_data;
+};
+
+#endif // SEMANTICMEMENTO_H
diff --git a/plugins/messageviewer/bodypartformatter/semantic/semanticmemento.cpp b/plugins/messageviewer/bodypartformatter/semantic/semanticmemento.cpp
new file mode 100644
--- /dev/null
+++ b/plugins/messageviewer/bodypartformatter/semantic/semanticmemento.cpp
@@ -0,0 +1,41 @@
+/*
+ Copyright (c) 2017 Volker Krause
+
+ This library is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ This library is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ License for more details.
+
+ You should have received a copy of the GNU Library General Public License
+ along with this library; see the file COPYING.LIB. If not, write to the
+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+*/
+
+#include "semanticmemento.h"
+
+SemanticMemento::~SemanticMemento() = default;
+
+void SemanticMemento::detach()
+{
+}
+
+bool SemanticMemento::isEmpty() const
+{
+ return m_data.isEmpty();
+}
+
+QJsonArray SemanticMemento::data() const
+{
+ return m_data;
+}
+
+void SemanticMemento::setData(const QJsonArray &data)
+{
+ m_data = data;
+}
diff --git a/plugins/messageviewer/bodypartformatter/semantic/semanticprocessor.h b/plugins/messageviewer/bodypartformatter/semantic/semanticprocessor.h
new file mode 100644
--- /dev/null
+++ b/plugins/messageviewer/bodypartformatter/semantic/semanticprocessor.h
@@ -0,0 +1,34 @@
+/*
+ Copyright (c) 2017 Volker Krause
+
+ This library is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ This library is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ License for more details.
+
+ You should have received a copy of the GNU Library General Public License
+ along with this library; see the file COPYING.LIB. If not, write to the
+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+*/
+
+#ifndef SEMANTICPROCESSOR_H
+#define SEMANTICPROCESSOR_H
+
+#include
+#include
+#include
+
+/** Processor plugin for MimeTreeParser. */
+class SemanticProcessor : public MimeTreeParser::Interface::BodyPartFormatter
+{
+public:
+ MimeTreeParser::MessagePart::Ptr process(MimeTreeParser::Interface::BodyPart &part) const override;
+};
+
+#endif // SEMANTICPROCESSOR_H
diff --git a/plugins/messageviewer/bodypartformatter/semantic/semanticprocessor.cpp b/plugins/messageviewer/bodypartformatter/semantic/semanticprocessor.cpp
new file mode 100644
--- /dev/null
+++ b/plugins/messageviewer/bodypartformatter/semantic/semanticprocessor.cpp
@@ -0,0 +1,44 @@
+/*
+ Copyright (c) 2017 Volker Krause
+
+ This library is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ This library is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ License for more details.
+
+ You should have received a copy of the GNU Library General Public License
+ along with this library; see the file COPYING.LIB. If not, write to the
+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+*/
+
+#include "semanticprocessor.h"
+#include "structureddataextractor.h"
+#include "semanticmemento.h"
+#include "semantic_debug.h"
+
+#include
+
+MimeTreeParser::MessagePart::Ptr SemanticProcessor::process(MimeTreeParser::Interface::BodyPart &part) const
+{
+ auto nodeHelper = part.nodeHelper();
+ if (!nodeHelper)
+ return {};
+ auto memento = dynamic_cast(nodeHelper->bodyPartMemento(part.topLevelContent(), "org.kde.messageviewer.semanticData"));
+ if (memento)
+ return {};
+
+ qCDebug(SEMANTIC_LOG) << "-------------------------------------------- BEGIN SEMANTIC PARSING";
+ StructuredDataExtractor extractor;
+ extractor.parse(part.content()->decodedText());
+ memento = new SemanticMemento;
+ nodeHelper->setBodyPartMemento(part.topLevelContent(), "org.kde.messageviewer.semanticData", memento);
+ memento->setData(extractor.data());
+ qCDebug(SEMANTIC_LOG) << "-------------------------------------------- END SEMANTIC PARSING";
+ return {};
+}
diff --git a/plugins/messageviewer/bodypartformatter/semantic/semanticrenderer.h b/plugins/messageviewer/bodypartformatter/semantic/semanticrenderer.h
new file mode 100644
--- /dev/null
+++ b/plugins/messageviewer/bodypartformatter/semantic/semanticrenderer.h
@@ -0,0 +1,33 @@
+/*
+ Copyright (c) 2017 Volker Krause
+
+ This library is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ This library is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ License for more details.
+
+ You should have received a copy of the GNU Library General Public License
+ along with this library; see the file COPYING.LIB. If not, write to the
+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+*/
+
+#ifndef SEMANTICRENDERER_H
+#define SEMANTICRENDERER_H
+
+#include
+
+/** Rendering plugin for semantic information about the email content. */
+class SemanticRenderer : public MessageViewer::MessagePartRendererBase
+{
+public:
+ SemanticRenderer();
+ bool render(const MimeTreeParser::MessagePartPtr &msgPart, MimeTreeParser::HtmlWriter *htmlWriter, MessageViewer::RenderContext *context) const override;
+};
+
+#endif // SEMANTICRENDERER_H
diff --git a/plugins/messageviewer/bodypartformatter/semantic/semanticrenderer.cpp b/plugins/messageviewer/bodypartformatter/semantic/semanticrenderer.cpp
new file mode 100644
--- /dev/null
+++ b/plugins/messageviewer/bodypartformatter/semantic/semanticrenderer.cpp
@@ -0,0 +1,102 @@
+/*
+ Copyright (c) 2017 Volker Krause
+
+ This library is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ This library is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ License for more details.
+
+ You should have received a copy of the GNU Library General Public License
+ along with this library; see the file COPYING.LIB. If not, write to the
+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+*/
+
+#include "semanticrenderer.h"
+#include "datatypes.h"
+#include "jsonlddocument.h"
+#include "semanticmemento.h"
+#include "semantic_debug.h"
+
+#include
+#include
+
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+// Grantlee has no Q_GADGET support yet
+#define GRANTLEE_MAKE_GADGET(Class) \
+ GRANTLEE_BEGIN_LOOKUP(Class) \
+ const auto idx = Class::staticMetaObject.indexOfProperty(property.toUtf8().constData()); \
+ if (idx < 0) \
+ return {}; \
+ const auto mp = Class::staticMetaObject.property(idx); \
+ return mp.readOnGadget(&object); \
+ GRANTLEE_END_LOOKUP
+
+GRANTLEE_MAKE_GADGET(Airport)
+GRANTLEE_MAKE_GADGET(Airline)
+GRANTLEE_MAKE_GADGET(Flight)
+GRANTLEE_MAKE_GADGET(FlightReservation)
+GRANTLEE_MAKE_GADGET(LodgingBusiness)
+GRANTLEE_MAKE_GADGET(LodgingReservation)
+GRANTLEE_MAKE_GADGET(PostalAddress)
+
+SemanticRenderer::SemanticRenderer()
+{
+ Grantlee::registerMetaType();
+ Grantlee::registerMetaType();
+ Grantlee::registerMetaType();
+ Grantlee::registerMetaType();
+ Grantlee::registerMetaType();
+ Grantlee::registerMetaType();
+ Grantlee::registerMetaType();
+}
+
+bool SemanticRenderer::render(const MimeTreeParser::MessagePartPtr &msgPart, MimeTreeParser::HtmlWriter *htmlWriter, MessageViewer::RenderContext *context) const
+{
+ Q_UNUSED(context);
+ const auto mpList = msgPart.dynamicCast();
+ if (!msgPart->isRoot() || !mpList->hasSubParts())
+ return false;
+
+ qCDebug(SEMANTIC_LOG) << "========================================= Semantic Rendering";
+ const auto node = mpList->subParts().at(0)->content();
+ const auto nodeHelper = msgPart->nodeHelper();
+ if (!nodeHelper || !node)
+ return false;
+
+ auto memento = dynamic_cast(nodeHelper->bodyPartMemento(node->topLevel(), "org.kde.messageviewer.semanticData"));
+ if (!memento || memento->isEmpty())
+ return false;
+
+ const auto decodedData = JsonLdDocument::fromJson(memento->data());
+ if (decodedData.isEmpty()) {
+ qCDebug(SEMANTIC_LOG) << "Unhandled content:" << QJsonDocument(memento->data()).toJson();
+ return false;
+ }
+
+ auto c = MessageViewer::MessagePartRendererManager::self()->createContext();
+ c.insert(QStringLiteral("data"), decodedData);
+
+ const auto pal = qGuiApp->palette();
+ QVariantMap style;
+ style.insert(QStringLiteral("frameColor"), pal.link().color().name());
+ c.insert(QStringLiteral("style"), style);
+
+ auto t = MessageViewer::MessagePartRendererManager::self()->loadByName(QStringLiteral(":/org.kde.messageviewer/semantic/semantic.html"));
+ Grantlee::OutputStream s(htmlWriter->stream());
+ t->render(&s, &c);
+ return false; // yes, false, we want the rest of the email rendered normally after this
+}
diff --git a/plugins/messageviewer/bodypartformatter/semantic/structureddataextractor.h b/plugins/messageviewer/bodypartformatter/semantic/structureddataextractor.h
new file mode 100644
--- /dev/null
+++ b/plugins/messageviewer/bodypartformatter/semantic/structureddataextractor.h
@@ -0,0 +1,53 @@
+/*
+ Copyright (c) 2017 Volker Krause
+
+ This library is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ This library is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ License for more details.
+
+ You should have received a copy of the GNU Library General Public License
+ along with this library; see the file COPYING.LIB. If not, write to the
+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+*/
+
+#ifndef STRUCTUREDDATAEXTRACTOR_H
+#define STRUCTUREDDATAEXTRACTOR_H
+
+#include
+
+class QJsonObject;
+class QString;
+class QXmlStreamReader;
+
+/** Extract schema.org structured data from HTML text.
+ * @see https://developers.google.com/gmail/markup/getting-started
+ */
+class StructuredDataExtractor
+{
+public:
+ void parse(const QString &text);
+ QJsonArray data() const { return m_data; }
+
+private:
+ /** Try to parse using an actual XML parser. */
+ void parseXml(const QString &text);
+ /** Try to find application/ld+json content with basic string search. */
+ void findLdJson(const QString &text);
+ /** Try to fix some common HTML4 damage to make @p text consumable for parseXml(). */
+ QString fixupHtml4(const QString &text) const;
+ /** Recursive microdata parsing. */
+ QJsonObject parseMicroData(QXmlStreamReader &reader) const;
+ /** Element-dependent Microdata property value. */
+ QString valueForItemProperty(QXmlStreamReader &reader) const;
+
+ QJsonArray m_data;
+};
+
+#endif // STRUCTUREDDATAEXTRACTOR_H
diff --git a/plugins/messageviewer/bodypartformatter/semantic/structureddataextractor.cpp b/plugins/messageviewer/bodypartformatter/semantic/structureddataextractor.cpp
new file mode 100644
--- /dev/null
+++ b/plugins/messageviewer/bodypartformatter/semantic/structureddataextractor.cpp
@@ -0,0 +1,174 @@
+/*
+ Copyright (c) 2017 Volker Krause
+
+ This library is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ This library is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ License for more details.
+
+ You should have received a copy of the GNU Library General Public License
+ along with this library; see the file COPYING.LIB. If not, write to the
+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+*/
+
+#include "structureddataextractor.h"
+#include "semantic_debug.h"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+void StructuredDataExtractor::parse(const QString &text)
+{
+ parseXml(text);
+ if (m_data.isEmpty()) {
+ findLdJson(text);
+ if (m_data.isEmpty()) {
+ parseXml(fixupHtml4(text));
+ }
+ }
+}
+
+void StructuredDataExtractor::parseXml(const QString &text)
+{
+ QXmlStreamReader reader(text);
+ while (!reader.atEnd()) {
+ if (reader.tokenType() == QXmlStreamReader::StartElement) {
+ // JSON-LD
+ if (reader.name() == QLatin1String("script") && reader.attributes().value(QLatin1String("type")) == QLatin1String("application/ld+json")) {
+ const auto jsonData = reader.readElementText(QXmlStreamReader::IncludeChildElements);
+ const auto jsonDoc = QJsonDocument::fromJson(jsonData.toUtf8());
+ if (jsonDoc.isNull())
+ continue;
+ if (jsonDoc.isArray())
+ m_data.append(jsonDoc.array());
+ else if (jsonDoc.isObject())
+ m_data.push_back(jsonDoc.object());
+ }
+
+ // Microdata
+ const auto itemType = reader.attributes().value(QLatin1String("itemtype")).toString();
+ if (itemType.startsWith(QLatin1String("http://schema.org/"))) {
+ auto obj = parseMicroData(reader);
+ if (obj.isEmpty())
+ continue;
+ obj.insert(QStringLiteral("@context"), QStringLiteral("http://schema.org"));
+ const QUrl typeUrl(itemType);
+ obj.insert(QStringLiteral("@type"), typeUrl.fileName());
+ m_data.push_back(obj);
+ continue;
+ }
+
+ }
+ reader.readNext();
+ }
+
+ if (reader.hasError())
+ qCDebug(SEMANTIC_LOG) << reader.errorString() << reader.lineNumber() << reader.columnNumber();
+}
+
+void StructuredDataExtractor::findLdJson(const QString &text)
+{
+ for (int i = 0; i < text.size();) {
+ i = text.indexOf(QLatin1String(""), begin, Qt::CaseInsensitive);
+ const auto jsonData = text.mid(begin, i - begin);
+ auto jsonDoc = QJsonDocument::fromJson(jsonData.toUtf8());
+ if (jsonDoc.isNull())
+ continue;
+ if (jsonDoc.isArray())
+ m_data.append(jsonDoc.array());
+ else if (jsonDoc.isObject())
+ m_data.push_back(jsonDoc.object());
+ }
+}
+
+QString StructuredDataExtractor::fixupHtml4(const QString &text) const
+{
+ auto output(text);
+
+ // close single-element tags
+ output.replace(QRegularExpression(QStringLiteral("(]*[^>/])>")), QStringLiteral("\\1/>"));
+
+ // fix value-less attributes
+ output.replace(QRegularExpression(QStringLiteral("(<[^>]+ )itemscope( [^>]*>)")), QStringLiteral("\\1itemscope=\"\"\\2"));
+
+ // TODO remove legacy entities like
+ return output;
+}
+
+QJsonObject StructuredDataExtractor::parseMicroData(QXmlStreamReader &reader) const
+{
+ QJsonObject obj;
+ reader.readNext();
+ int depth = 1;
+
+ while (!reader.atEnd()) {
+ if (reader.tokenType() == QXmlStreamReader::StartElement) {
+ ++depth;
+ const auto prop = reader.attributes().value(QLatin1String("itemprop")).toString();
+ const auto type = reader.attributes().value(QLatin1String("itemtype")).toString();
+ if (type.startsWith(QLatin1String("http://schema.org/"))) {
+ auto subObj = parseMicroData(reader);
+ const QUrl typeUrl(type);
+ subObj.insert(QStringLiteral("@type"), typeUrl.fileName());
+
+ obj.insert(prop, subObj);
+ continue;
+ }
+ if (!prop.isEmpty()) {
+ obj.insert(prop, valueForItemProperty(reader));
+ continue;
+ }
+
+ } else if (reader.tokenType() == QXmlStreamReader::EndElement) {
+ --depth;
+ if (depth == 0)
+ return obj;
+ }
+ reader.readNext();
+ }
+
+ if (reader.hasError())
+ qCDebug(SEMANTIC_LOG) << reader.errorString() << reader.lineNumber() << reader.columnNumber();
+ return {};
+}
+
+QString StructuredDataExtractor::valueForItemProperty(QXmlStreamReader &reader) const
+{
+ // TODO see https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes/itemprop#Values
+ const auto elemName = reader.name();
+ if (elemName == QLatin1String("span"))
+ return reader.readElementText(QXmlStreamReader::IncludeChildElements);
+
+ QString v;
+ if (elemName == QLatin1String("meta"))
+ v = reader.attributes().value(QLatin1String("content")).toString();
+ else if (elemName == QLatin1String("time"))
+ v = reader.attributes().value(QLatin1String("datetime")).toString();
+ else if (elemName == QLatin1String("link") || elemName == QLatin1String("a"))
+ v = reader.attributes().value(QLatin1String("href")).toString();
+ else
+ qCDebug(SEMANTIC_LOG) << "TODO:" << elemName;
+
+ reader.readNext();
+ return v;
+}
diff --git a/plugins/messageviewer/bodypartformatter/semantic/templates.qrc b/plugins/messageviewer/bodypartformatter/semantic/templates.qrc
new file mode 100644
--- /dev/null
+++ b/plugins/messageviewer/bodypartformatter/semantic/templates.qrc
@@ -0,0 +1,7 @@
+
+
+ templates/semantic.html
+ templates/flightreservation.html
+ templates/lodgingreservation.html
+
+
diff --git a/plugins/messageviewer/bodypartformatter/semantic/templates/flightreservation.html b/plugins/messageviewer/bodypartformatter/semantic/templates/flightreservation.html
new file mode 100644
--- /dev/null
+++ b/plugins/messageviewer/bodypartformatter/semantic/templates/flightreservation.html
@@ -0,0 +1,12 @@
+