diff --git a/autotests/xmlextractortest.cpp b/autotests/xmlextractortest.cpp --- a/autotests/xmlextractortest.cpp +++ b/autotests/xmlextractortest.cpp @@ -67,7 +67,7 @@ QString content = QStringLiteral("foo bar\n"); content.replace(QLatin1Char('\n'), QLatin1Char(' ')); QCOMPARE(result.text().leftRef(8), content.leftRef(8)); - QCOMPARE(result.text().size(), 1 + 8 * count); + QCOMPARE(result.text().size(), 8 * count); QBENCHMARK { plugin.extract(&result); diff --git a/src/extractors/xmlextractor.cpp b/src/extractors/xmlextractor.cpp --- a/src/extractors/xmlextractor.cpp +++ b/src/extractors/xmlextractor.cpp @@ -24,6 +24,7 @@ #include #include #include +#include namespace { @@ -80,14 +81,13 @@ return; } - QDomDocument doc; - const bool processNamespaces = true; - doc.setContent(&file, processNamespaces); - if ((result->inputMimetype() == QLatin1String("image/svg")) || (result->inputMimetype() == QLatin1String("image/svg+xml"))) { result->addType(Type::Image); + QDomDocument doc; + const bool processNamespaces = true; + doc.setContent(&file, processNamespaces); QDomElement svg = doc.firstChildElement(); if (!svg.isNull() @@ -126,8 +126,17 @@ result->addType(Type::Text); if (flags & ExtractionResult::ExtractPlainText) { - QDomElement n = doc.firstChildElement(); - result->append(n.text()); + QXmlStreamReader stream(&file); + while (!stream.atEnd()) { + QXmlStreamReader::TokenType token = stream.readNext(); + + if (token == QXmlStreamReader::Characters) { + QString text = stream.text().trimmed().toString(); + if (!text.isEmpty()) { + result->append(text); + } + } + } } } }