diff --git a/src/file/extractor/app.cpp b/src/file/extractor/app.cpp --- a/src/file/extractor/app.cpp +++ b/src/file/extractor/app.cpp @@ -164,17 +164,6 @@ return; } - // HACK: Also, we're ignoring ttext files which are greater tha 10 Mb as we - // have trouble processing them - // - if (mimetype.startsWith(QLatin1String("text/"))) { - QFileInfo fileInfo(url); - if (fileInfo.size() >= 10 * 1024 * 1024) { - tr->removePhaseOne(id); - return; - } - } - // We always run the basic indexing again. This is mostly so that the proper // mimetype is set and we get proper type information. // The mimetype fetched in the BasicIndexingJob is fast but not accurate @@ -188,7 +177,14 @@ const QList exList = m_extractorCollection.fetchExtractors(mimetype); + quint64 filesize = QFileInfo(url).size(); for (KFileMetaData::Extractor* ex : exList) { + // HACK: We're ignoring text files which are greater tha 10 Mb as we + // have trouble processing them + if ((filesize >= 10*1024*1024) && (ex->extractorProperties()["Id"].toString() == QLatin1String("org.kde.plaintextextractor"))) { + qCWarning(BALOO) << "Skipping" << url << ": large plain text file"; + continue; + } ex->extract(&result); }