Differential D8583 Diff 22184 plugins/messageviewer/bodypartformatter/semantic/structureddataextractor.h
Changeset View
Changeset View
Standalone View
Standalone View
plugins/messageviewer/bodypartformatter/semantic/structureddataextractor.h
- This file was added.
1 | /* | ||||
---|---|---|---|---|---|
2 | Copyright (c) 2017 Volker Krause <vkrause@kde.org> | ||||
3 | | ||||
4 | This library is free software; you can redistribute it and/or modify it | ||||
5 | under the terms of the GNU Library General Public License as published by | ||||
6 | the Free Software Foundation; either version 2 of the License, or (at your | ||||
7 | option) any later version. | ||||
8 | | ||||
9 | This library is distributed in the hope that it will be useful, but WITHOUT | ||||
10 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||||
11 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public | ||||
12 | License for more details. | ||||
13 | | ||||
14 | You should have received a copy of the GNU Library General Public License | ||||
15 | along with this library; see the file COPYING.LIB. If not, write to the | ||||
16 | Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | ||||
17 | 02110-1301, USA. | ||||
18 | */ | ||||
19 | | ||||
20 | #ifndef STRUCTUREDDATAEXTRACTOR_H | ||||
21 | #define STRUCTUREDDATAEXTRACTOR_H | ||||
22 | | ||||
23 | #include <QJsonArray> | ||||
24 | | ||||
25 | class QJsonObject; | ||||
26 | class QString; | ||||
27 | class QXmlStreamReader; | ||||
28 | | ||||
29 | /** Extract schema.org structured data from HTML text. | ||||
30 | * @see https://developers.google.com/gmail/markup/getting-started | ||||
31 | */ | ||||
32 | class StructuredDataExtractor | ||||
33 | { | ||||
34 | public: | ||||
35 | void parse(const QString &text); | ||||
36 | QJsonArray data() const { return m_data; } | ||||
37 | | ||||
38 | private: | ||||
39 | /** Try to parse using an actual XML parser. */ | ||||
40 | void parseXml(const QString &text); | ||||
41 | /** Try to find application/ld+json content with basic string search. */ | ||||
42 | void findLdJson(const QString &text); | ||||
43 | /** Try to fix some common HTML4 damage to make @p text consumable for parseXml(). */ | ||||
44 | QString fixupHtml4(const QString &text) const; | ||||
45 | /** Recursive microdata parsing. */ | ||||
46 | QJsonObject parseMicroData(QXmlStreamReader &reader) const; | ||||
47 | /** Element-dependent Microdata property value. */ | ||||
48 | QString valueForItemProperty(QXmlStreamReader &reader) const; | ||||
49 | | ||||
50 | QJsonArray m_data; | ||||
51 | }; | ||||
52 | | ||||
53 | #endif // STRUCTUREDDATAEXTRACTOR_H |