diff --git a/autotests/CMakeLists.txt b/autotests/CMakeLists.txt
--- a/autotests/CMakeLists.txt
+++ b/autotests/CMakeLists.txt
@@ -137,6 +137,21 @@
LINK_LIBRARIES Qt5::Test KF5::FileMetaData
)
+#
+# XML
+#
+set(xmlextractor_SRCS
+ xmlextractortest.cpp
+ ../src/extractors/xmlextractor.cpp
+ ../src/extractors/dublincoreextractor.cpp
+ ../src/kfilemetadata_debug.cpp
+)
+
+ecm_add_test(${xmlextractor_SRCS}
+ TEST_NAME "xmlextractortest"
+ LINK_LIBRARIES Qt5::Test Qt5::Xml KF5::FileMetaData
+)
+
################
# Writer tests #
################
diff --git a/autotests/samplefiles/test_with_metadata.svg b/autotests/samplefiles/test_with_metadata.svg
new file mode 100644
--- /dev/null
+++ b/autotests/samplefiles/test_with_metadata.svg
@@ -0,0 +1,22 @@
+
+
+
diff --git a/autotests/xmlextractortest.h b/autotests/xmlextractortest.h
new file mode 100644
--- /dev/null
+++ b/autotests/xmlextractortest.h
@@ -0,0 +1,42 @@
+/*
+ Copyright (C) 2018 Stefan Brüns
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) version 3, or any
+ later version accepted by the membership of KDE e.V. (or its
+ successor approved by the membership of KDE e.V.), which shall
+ act as a proxy defined in Section 6 of version 3 of the license.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library. If not, see .
+*/
+
+#ifndef XMLEXTRACTORTESTS_H
+#define XMLEXTRACTORTESTS_H
+
+#include
+#include
+
+class XmlExtractorTests : public QObject
+{
+ Q_OBJECT
+public:
+ explicit XmlExtractorTests(QObject* parent = nullptr);
+
+private:
+ QString testFilePath(const QString& fileName) const;
+
+private Q_SLOTS:
+ void benchMarkXmlExtractor();
+ void testXmlExtractor();
+ void testXmlExtractorNoContent();
+};
+
+#endif // XMLEXTRACTORTESTS_H
diff --git a/autotests/xmlextractortest.cpp b/autotests/xmlextractortest.cpp
new file mode 100644
--- /dev/null
+++ b/autotests/xmlextractortest.cpp
@@ -0,0 +1,117 @@
+/*
+ Copyright (C) 2018 Stefan Brüns
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) version 3, or any
+ later version accepted by the membership of KDE e.V. (or its
+ successor approved by the membership of KDE e.V.), which shall
+ act as a proxy defined in Section 6 of version 3 of the license.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library. If not, see .
+*/
+
+#include "xmlextractortest.h"
+
+#include
+
+#include "simpleextractionresult.h"
+#include "indexerextractortestsconfig.h"
+#include "extractors/xmlextractor.h"
+
+using namespace KFileMetaData;
+
+XmlExtractorTests::XmlExtractorTests(QObject* parent) :
+ QObject(parent)
+{
+}
+
+QString XmlExtractorTests::testFilePath(const QString& fileName) const
+{
+ return QLatin1String(INDEXER_TESTS_SAMPLE_FILES_PATH) + QLatin1Char('/') + fileName;
+}
+
+void XmlExtractorTests::benchMarkXmlExtractor()
+{
+ XmlExtractor plugin(this);
+
+ // generate a test file with varying number of words per line
+ QTemporaryFile file(QStringLiteral("XXXXXX.xml"));
+ QVERIFY(file.open());
+
+ int count = 0;
+ file.write("\n");
+ QByteArray chunk("foo bar ");
+ for (int line = 0; line < 10000; ++line) {
+ // staircase pattern, 0, 1, 2, ... 98, 0, 0, 1 ... chunks per line
+ for (int i = 0; i < line % 100; ++i) {
+ count++;
+ file.write(chunk);
+ }
+ file.write("\n");
+ }
+ file.write("\n");
+ file.close();
+
+ SimpleExtractionResult result(file.fileName(), QStringLiteral("application/xml"));
+
+ plugin.extract(&result);
+
+ QString content = QStringLiteral("foo bar\n");
+ content.replace(QLatin1Char('\n'), QLatin1Char(' '));
+ QCOMPARE(result.text().leftRef(8), content.leftRef(8));
+ QCOMPARE(result.text().size(), 1 + 8 * count);
+
+ QBENCHMARK {
+ plugin.extract(&result);
+ }
+}
+
+void XmlExtractorTests::testXmlExtractor()
+{
+ XmlExtractor plugin{this};
+
+ SimpleExtractionResult result(testFilePath(QStringLiteral("test_with_metadata.svg")),
+ QStringLiteral("image/svg"),
+ ExtractionResult::ExtractEverything);
+ plugin.extract(&result);
+
+ QString content = QStringLiteral("Some text\n");
+
+ QCOMPARE(result.types().size(), 1);
+ QCOMPARE(result.types().at(0), Type::Image);
+
+ QCOMPARE(result.properties().size(), 1);
+ QCOMPARE(result.properties().value(Property::Title), QStringLiteral("Document Title"));
+
+ content.replace(QLatin1Char('\n'), QLatin1Char(' '));
+ QCOMPARE(result.text(), content);
+}
+
+void XmlExtractorTests::testXmlExtractorNoContent()
+{
+ XmlExtractor plugin{this};
+
+ SimpleExtractionResult result(testFilePath(QStringLiteral("test_with_metadata.svg")),
+ QStringLiteral("image/svg"),
+ ExtractionResult::ExtractMetaData);
+ plugin.extract(&result);
+
+ QCOMPARE(result.types().size(), 1);
+ QCOMPARE(result.types().at(0), Type::Image);
+
+ QCOMPARE(result.properties().size(), 1);
+ QCOMPARE(result.properties().value(Property::Title), QStringLiteral("Document Title"));
+
+ QVERIFY(result.text().isEmpty());
+}
+
+QTEST_GUILESS_MAIN(XmlExtractorTests)
+