diff --git a/src/file/extractor/app.cpp b/src/file/extractor/app.cpp
--- a/src/file/extractor/app.cpp
+++ b/src/file/extractor/app.cpp
@@ -146,9 +146,6 @@
 
 void App::index(Transaction* tr, const QString& url, quint64 id)
 {
-    QString mimetype = KFileMetaData::MimeUtils::strictMimeType(url, m_mimeDb).name();
-    qCDebug(BALOO) << "Indexing" << id << url << mimetype;
-
     if (!m_config.shouldBeIndexed(url)) {
         // This apparently happens when the config has changed after the document
         // was added to the content indexing db
@@ -159,6 +156,7 @@
 
     // The initial BasicIndexingJob run has been supplied with the file extension
     // mimetype only, skip based on the "real" mimetype
+    QString mimetype = KFileMetaData::MimeUtils::strictMimeType(url, m_mimeDb).name();
     if (!m_config.shouldMimeTypeBeIndexed(mimetype)) {
         qCDebug(BALOO) << "Skipping" << url << "- mimetype:" << mimetype;
         tr->removePhaseOne(id);
@@ -171,10 +169,12 @@
     if (mimetype.startsWith(QLatin1String("text/"))) {
         QFileInfo fileInfo(url);
         if (fileInfo.size() >= 10 * 1024 * 1024) {
+            qCDebug(BALOO) << "Skipping large " << url << "- mimetype:" << mimetype;
             tr->removePhaseOne(id);
             return;
         }
     }
+    qCDebug(BALOO) << "Indexing" << id << url << mimetype;
 
     // We always run the basic indexing again. This is mostly so that the proper
     // mimetype is set and we get proper type information.