diff --git a/src/extractors/CMakeLists.txt b/src/extractors/CMakeLists.txt index 97b8afc..017cfec 100644 --- a/src/extractors/CMakeLists.txt +++ b/src/extractors/CMakeLists.txt @@ -1,179 +1,199 @@ if(Poppler_Qt5_FOUND) add_library(kfilemetadata_popplerextractor MODULE popplerextractor.cpp) target_link_libraries(kfilemetadata_popplerextractor KF5::FileMetaData Poppler::Qt5 ) set_target_properties(kfilemetadata_popplerextractor PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/kf5/kfilemetadata") install( TARGETS kfilemetadata_popplerextractor DESTINATION ${PLUGIN_INSTALL_DIR}/kf5/kfilemetadata) endif() if(TAGLIB_FOUND) add_library(kfilemetadata_taglibextractor MODULE taglibextractor.cpp ) target_include_directories(kfilemetadata_taglibextractor SYSTEM PRIVATE ${TAGLIB_INCLUDES}) target_link_libraries( kfilemetadata_taglibextractor KF5::FileMetaData ${TAGLIB_LIBRARIES} ) set_target_properties(kfilemetadata_taglibextractor PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/kf5/kfilemetadata") install( TARGETS kfilemetadata_taglibextractor DESTINATION ${PLUGIN_INSTALL_DIR}/kf5/kfilemetadata) endif() if(EXIV2_FOUND) add_library(kfilemetadata_exiv2extractor MODULE exiv2extractor.cpp) target_include_directories(kfilemetadata_exiv2extractor SYSTEM PRIVATE ${EXIV2_INCLUDE_DIR}) kde_target_enable_exceptions(kfilemetadata_exiv2extractor PRIVATE) target_link_libraries(kfilemetadata_exiv2extractor KF5::FileMetaData ${EXIV2_LIBRARIES} ) set_target_properties(kfilemetadata_exiv2extractor PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/kf5/kfilemetadata") install( TARGETS kfilemetadata_exiv2extractor DESTINATION ${PLUGIN_INSTALL_DIR}/kf5/kfilemetadata) endif() if(FFMPEG_FOUND) add_library(kfilemetadata_ffmpegextractor MODULE ffmpegextractor.cpp) target_include_directories(kfilemetadata_ffmpegextractor SYSTEM PRIVATE ${AVCODEC_INCLUDE_DIRS} ${AVFORMAT_INCLUDE_DIRS} ${AVUTIL_INCLUDE_DIRS}) target_link_libraries(kfilemetadata_ffmpegextractor KF5::FileMetaData ${AVCODEC_LIBRARIES} ${AVFORMAT_LIBRARIES} ${AVUTIL_LIBRARIES} ) set_target_properties(kfilemetadata_ffmpegextractor PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/kf5/kfilemetadata") install( TARGETS kfilemetadata_ffmpegextractor DESTINATION ${PLUGIN_INSTALL_DIR}/kf5/kfilemetadata) endif() if(EPUB_FOUND) add_library(kfilemetadata_epubextractor MODULE epubextractor.cpp) target_include_directories(kfilemetadata_epubextractor SYSTEM PRIVATE ${EPUB_INCLUDE_DIR}) target_link_libraries(kfilemetadata_epubextractor KF5::FileMetaData ${EPUB_LIBRARIES} ) set_target_properties(kfilemetadata_epubextractor PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/kf5/kfilemetadata") install( TARGETS kfilemetadata_epubextractor DESTINATION ${PLUGIN_INSTALL_DIR}/kf5/kfilemetadata) endif() # # Plain Text # add_library(kfilemetadata_plaintextextractor MODULE plaintextextractor.cpp) target_link_libraries( kfilemetadata_plaintextextractor KF5::FileMetaData ) set_target_properties(kfilemetadata_plaintextextractor PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/kf5/kfilemetadata") install( TARGETS kfilemetadata_plaintextextractor DESTINATION ${PLUGIN_INSTALL_DIR}/kf5/kfilemetadata) # # PO # add_library(kfilemetadata_poextractor MODULE poextractor.cpp) target_link_libraries( kfilemetadata_poextractor KF5::FileMetaData ) set_target_properties(kfilemetadata_poextractor PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/kf5/kfilemetadata") install( TARGETS kfilemetadata_poextractor DESTINATION ${PLUGIN_INSTALL_DIR}/kf5/kfilemetadata) +# +# XML +# +add_library(kfilemetadata_xmlextractor MODULE + dublincoreextractor.cpp + xmlextractor.cpp + ../kfilemetadata_debug.cpp +) +target_link_libraries( kfilemetadata_xmlextractor + KF5::FileMetaData + Qt5::Core + Qt5::Xml +) + +set_target_properties(kfilemetadata_xmlextractor PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/kf5/kfilemetadata") +install( + TARGETS kfilemetadata_xmlextractor + DESTINATION ${PLUGIN_INSTALL_DIR}/kf5/kfilemetadata +) + # # ODF # if(KF5Archive_FOUND) add_library(kfilemetadata_odfextractor MODULE odfextractor.cpp) target_link_libraries(kfilemetadata_odfextractor KF5::FileMetaData Qt5::Core Qt5::Xml KF5::Archive ) set_target_properties(kfilemetadata_odfextractor PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/kf5/kfilemetadata") install( TARGETS kfilemetadata_odfextractor DESTINATION ${PLUGIN_INSTALL_DIR}/kf5/kfilemetadata) endif() # # Office 2007 # if(KF5Archive_FOUND) add_library(kfilemetadata_office2007extractor MODULE office2007extractor.cpp) target_link_libraries(kfilemetadata_office2007extractor KF5::FileMetaData Qt5::Core Qt5::Xml KF5::Archive ) set_target_properties(kfilemetadata_office2007extractor PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/kf5/kfilemetadata") install( TARGETS kfilemetadata_office2007extractor DESTINATION ${PLUGIN_INSTALL_DIR}/kf5/kfilemetadata) endif() # # Office (binary formats) # add_library(kfilemetadata_officeextractor MODULE officeextractor.cpp) target_link_libraries(kfilemetadata_officeextractor KF5::FileMetaData ) set_target_properties(kfilemetadata_officeextractor PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/kf5/kfilemetadata") install( TARGETS kfilemetadata_officeextractor DESTINATION ${PLUGIN_INSTALL_DIR}/kf5/kfilemetadata) # # Mobipocket # if (QMOBIPOCKET_FOUND) add_library(kfilemetadata_mobiextractor MODULE mobiextractor.cpp) target_include_directories(kfilemetadata_mobiextractor SYSTEM PRIVATE ${QMOBIPOCKET_INCLUDE_DIR}) target_link_libraries(kfilemetadata_mobiextractor KF5::FileMetaData ${QMOBIPOCKET_LIBRARIES} ) set_target_properties(kfilemetadata_mobiextractor PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/kf5/kfilemetadata") install( TARGETS kfilemetadata_mobiextractor DESTINATION ${PLUGIN_INSTALL_DIR}/kf5/kfilemetadata) endif() diff --git a/src/extractors/xmlextractor.cpp b/src/extractors/xmlextractor.cpp new file mode 100644 index 0000000..198b599 --- /dev/null +++ b/src/extractors/xmlextractor.cpp @@ -0,0 +1,135 @@ +/* + Copyright (C) 2018 Stefan Brüns + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + + +#include "xmlextractor.h" +#include "kfilemetadata_debug.h" +#include "dublincoreextractor.h" + +#include +#include +#include + +namespace { + +inline QString dcNS() { return QStringLiteral("http://purl.org/dc/elements/1.1/"); } +inline QString svgNS() { return QStringLiteral("http://www.w3.org/2000/svg"); } +inline QString rdfNS() { return QStringLiteral("http://www.w3.org/1999/02/22-rdf-syntax-ns#"); } +inline QString ccNS() { return QStringLiteral("http://creativecommons.org/ns#"); } + +void extractSvgText(KFileMetaData::ExtractionResult* result, const QDomElement &node) +{ + if (node.namespaceURI() != svgNS()) { + return; + } + + if (node.localName() == QLatin1String("g")) { + QDomElement e = node.firstChildElement(); + for (; !e.isNull(); e = e.nextSiblingElement()) { + extractSvgText(result, e); + } + } else if (node.localName() == QLatin1String("text")) { + qCDebug(KFILEMETADATA_LOG) << node.text(); + result->append(node.text()); + } +} + +static const QStringList supportedMimeTypes = { + QStringLiteral("application/xml"), + QStringLiteral("image/svg+xml"), + QStringLiteral("image/svg"), +}; + +} + +namespace KFileMetaData +{ + +XmlExtractor::XmlExtractor(QObject* parent) + : ExtractorPlugin(parent) +{ + +} + +QStringList XmlExtractor::mimetypes() const +{ + return supportedMimeTypes; +} + +void XmlExtractor::extract(ExtractionResult* result) +{ + auto flags = result->inputFlags(); + QFile file(result->inputUrl()); + if (!file.open(QIODevice::ReadOnly)) { + qCWarning(KFILEMETADATA_LOG) << "Document is not a valid file"; + return; + } + + QDomDocument doc; + const bool processNamespaces = true; + doc.setContent(&file, processNamespaces); + + if ((result->inputMimetype() == QLatin1String("image/svg")) || + (result->inputMimetype() == QLatin1String("image/svg+xml"))) { + result->addType(Type::Image); + + QDomElement svg = doc.firstChildElement(); + + if (!svg.isNull() + && svg.localName() == QLatin1String("svg") + && svg.namespaceURI() == svgNS()) { + + QDomElement e = svg.firstChildElement(); + for (; !e.isNull(); e = e.nextSiblingElement()) { + if (e.namespaceURI() != svgNS()) { + continue; + } + + if (e.localName() == QLatin1String("metadata")) { + auto rdf = e.firstChildElement(QLatin1String("RDF")); + if (rdf.isNull() || rdf.namespaceURI() != rdfNS()) { + continue; + } + + auto cc = rdf.firstChildElement(QLatin1String("Work")); + if (cc.isNull() || cc.namespaceURI() != ccNS()) { + continue; + } + + DublinCoreExtractor::extract(result, cc); + + } else if (e.localName() == QLatin1String("defs")) { + // skip + continue; + } else if (flags & ExtractionResult::ExtractPlainText) { + // extract + extractSvgText(result, e); + } + } + } + } else { + result->addType(Type::Text); + + if (flags & ExtractionResult::ExtractPlainText) { + QDomElement n = doc.firstChildElement(); + result->append(n.text()); + } + } +} + +} // namespace KFileMetaData diff --git a/src/extractors/xmlextractor.h b/src/extractors/xmlextractor.h new file mode 100644 index 0000000..8e44c1e --- /dev/null +++ b/src/extractors/xmlextractor.h @@ -0,0 +1,45 @@ +/* + Copyright (C) 2018 Stefan Brüns + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + + +#ifndef XML_EXTRACTOR_H +#define XML_EXTRACTOR_H + +#include "extractorplugin.h" + +namespace KFileMetaData +{ + +class XmlExtractor : public ExtractorPlugin +{ + Q_OBJECT + Q_PLUGIN_METADATA(IID "org.kde.kf5.kfilemetadata.ExtractorPlugin") + Q_INTERFACES(KFileMetaData::ExtractorPlugin) + +public: + explicit XmlExtractor(QObject* parent = nullptr); + + QStringList mimetypes() const override; + void extract(ExtractionResult* result) override; + +private: +}; + +} + +#endif // XML_EXTRACTOR_H