diff --git a/src/extractors/CMakeLists.txt b/src/extractors/CMakeLists.txt --- a/src/extractors/CMakeLists.txt +++ b/src/extractors/CMakeLists.txt @@ -105,6 +105,26 @@ TARGETS kfilemetadata_poextractor DESTINATION ${PLUGIN_INSTALL_DIR}/kf5/kfilemetadata) +# +# XML +# +add_library(kfilemetadata_xmlextractor MODULE + dublincoreextractor.cpp + xmlextractor.cpp + ../kfilemetadata_debug.cpp +) +target_link_libraries( kfilemetadata_xmlextractor + KF5::FileMetaData + Qt5::Core + Qt5::Xml +) + +set_target_properties(kfilemetadata_xmlextractor PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/kf5/kfilemetadata") +install( + TARGETS kfilemetadata_xmlextractor + DESTINATION ${PLUGIN_INSTALL_DIR}/kf5/kfilemetadata +) + # # ODF # diff --git a/src/extractors/xmlextractor.h b/src/extractors/xmlextractor.h new file mode 100644 --- /dev/null +++ b/src/extractors/xmlextractor.h @@ -0,0 +1,45 @@ +/* + Copyright (C) 2018 Stefan Brüns + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + + +#ifndef XML_EXTRACTOR_H +#define XML_EXTRACTOR_H + +#include "extractorplugin.h" + +namespace KFileMetaData +{ + +class XmlExtractor : public ExtractorPlugin +{ + Q_OBJECT + Q_PLUGIN_METADATA(IID "org.kde.kf5.kfilemetadata.ExtractorPlugin") + Q_INTERFACES(KFileMetaData::ExtractorPlugin) + +public: + explicit XmlExtractor(QObject* parent = nullptr); + + QStringList mimetypes() const override; + void extract(ExtractionResult* result) override; + +private: +}; + +} + +#endif // XML_EXTRACTOR_H diff --git a/src/extractors/xmlextractor.cpp b/src/extractors/xmlextractor.cpp new file mode 100644 --- /dev/null +++ b/src/extractors/xmlextractor.cpp @@ -0,0 +1,135 @@ +/* + Copyright (C) 2018 Stefan Brüns + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + + +#include "xmlextractor.h" +#include "kfilemetadata_debug.h" +#include "dublincoreextractor.h" + +#include +#include +#include + +namespace { + +inline QString dcNS() { return QStringLiteral("http://purl.org/dc/elements/1.1/"); } +inline QString svgNS() { return QStringLiteral("http://www.w3.org/2000/svg"); } +inline QString rdfNS() { return QStringLiteral("http://www.w3.org/1999/02/22-rdf-syntax-ns#"); } +inline QString ccNS() { return QStringLiteral("http://creativecommons.org/ns#"); } + +void extractSvgText(KFileMetaData::ExtractionResult* result, const QDomElement &node) +{ + if (node.namespaceURI() != svgNS()) { + return; + } + + if (node.localName() == QLatin1String("g")) { + QDomElement e = node.firstChildElement(); + for (; !e.isNull(); e = e.nextSiblingElement()) { + extractSvgText(result, e); + } + } else if (node.localName() == QLatin1String("text")) { + qCDebug(KFILEMETADATA_LOG) << node.text(); + result->append(node.text()); + } +} + +static const QStringList supportedMimeTypes = { + QStringLiteral("application/xml"), + QStringLiteral("image/svg+xml"), + QStringLiteral("image/svg"), +}; + +} + +namespace KFileMetaData +{ + +XmlExtractor::XmlExtractor(QObject* parent) + : ExtractorPlugin(parent) +{ + +} + +QStringList XmlExtractor::mimetypes() const +{ + return supportedMimeTypes; +} + +void XmlExtractor::extract(ExtractionResult* result) +{ + auto flags = result->inputFlags(); + QFile file(result->inputUrl()); + if (!file.open(QIODevice::ReadOnly)) { + qCWarning(KFILEMETADATA_LOG) << "Document is not a valid file"; + return; + } + + QDomDocument doc; + const bool processNamespaces = true; + doc.setContent(&file, processNamespaces); + + if ((result->inputMimetype() == QLatin1String("image/svg")) || + (result->inputMimetype() == QLatin1String("image/svg+xml"))) { + result->addType(Type::Image); + + QDomElement svg = doc.firstChildElement(); + + if (!svg.isNull() + && svg.localName() == QLatin1String("svg") + && svg.namespaceURI() == svgNS()) { + + QDomElement e = svg.firstChildElement(); + for (; !e.isNull(); e = e.nextSiblingElement()) { + if (e.namespaceURI() != svgNS()) { + continue; + } + + if (e.localName() == QLatin1String("metadata")) { + auto rdf = e.firstChildElement(QLatin1String("RDF")); + if (rdf.isNull() || rdf.namespaceURI() != rdfNS()) { + continue; + } + + auto cc = rdf.firstChildElement(QLatin1String("Work")); + if (cc.isNull() || cc.namespaceURI() != ccNS()) { + continue; + } + + DublinCoreExtractor::extract(result, cc); + + } else if (e.localName() == QLatin1String("defs")) { + // skip + continue; + } else if (flags & ExtractionResult::ExtractPlainText) { + // extract + extractSvgText(result, e); + } + } + } + } else { + result->addType(Type::Text); + + if (flags & ExtractionResult::ExtractPlainText) { + QDomElement n = doc.firstChildElement(); + result->append(n.text()); + } + } +} + +} // namespace KFileMetaData