diff --git a/src/file/extractor/app.cpp b/src/file/extractor/app.cpp --- a/src/file/extractor/app.cpp +++ b/src/file/extractor/app.cpp @@ -127,11 +127,19 @@ void App::index(Transaction* tr, const QString& url, quint64 id) { QString mimetype = m_mimeDb.mimeTypeForFile(url).name(); + qCDebug(BALOO) << "Indexing" << id << url << mimetype; - bool shouldIndex = m_config.shouldBeIndexed(url) && m_config.shouldMimeTypeBeIndexed(mimetype); - if (!shouldIndex) { + if (!m_config.shouldBeIndexed(url)) { // FIXME: This should never be happening! + qCWarning(BALOO) << "Found" << url << "in the ContentIndexingDB, although it should be skipped"; tr->removeDocument(id); + } + + // The initial BasicIndexingJob run has been supplied with the file extension + // mimetype only, skip based on the "real" mimetype + if (!m_config.shouldMimeTypeBeIndexed(mimetype)) { + qCDebug(BALOO) << "Skipping" << url << "- mimetype:" << mimetype; + tr->removePhaseOne(id); return; } @@ -148,7 +156,7 @@ // We always run the basic indexing again. This is mostly so that the proper // mimetype is set and we get proper type information. - // The mimetype fetched in the BasicIQ is fast but not accurate + // The mimetype fetched in the BasicIndexingJob is fast but not accurate BasicIndexingJob basicIndexer(url, mimetype, BasicIndexingJob::NoLevel); basicIndexer.index();