diff --git a/src/file/extractor/app.cpp b/src/file/extractor/app.cpp
--- a/src/file/extractor/app.cpp
+++ b/src/file/extractor/app.cpp
@@ -127,11 +127,19 @@
 void App::index(Transaction* tr, const QString& url, quint64 id)
 {
     QString mimetype = m_mimeDb.mimeTypeForFile(url).name();
+    qCDebug(BALOO) << "Indexing" << id << url << mimetype;
 
-    bool shouldIndex = m_config.shouldBeIndexed(url) && m_config.shouldMimeTypeBeIndexed(mimetype);
-    if (!shouldIndex) {
+    if (!m_config.shouldBeIndexed(url)) {
         // FIXME: This should never be happening!
+        qCWarning(BALOO) << "Found" << url << "in the ContentIndexingDB, although it should be skipped";
         tr->removeDocument(id);
+    }
+
+    // The initial BasicIndexingJob run has been supplied with the file extension
+    // mimetype only, skip based on the "real" mimetype
+    if (!m_config.shouldMimeTypeBeIndexed(mimetype)) {
+        qCDebug(BALOO) << "Skipping" << url << "- mimetype:" << mimetype;
+        tr->removePhaseOne(id);
         return;
     }
 
@@ -148,7 +156,7 @@
 
     // We always run the basic indexing again. This is mostly so that the proper
     // mimetype is set and we get proper type information.
-    // The mimetype fetched in the BasicIQ is fast but not accurate
+    // The mimetype fetched in the BasicIndexingJob is fast but not accurate
     BasicIndexingJob basicIndexer(url, mimetype, BasicIndexingJob::NoLevel);
     basicIndexer.index();