diff --git a/src/extractors/epubextractor.cpp b/src/extractors/epubextractor.cpp --- a/src/extractors/epubextractor.cpp +++ b/src/extractors/epubextractor.cpp @@ -78,34 +78,36 @@ result->addType(Type::Document); - QString value = fetchMetadata(ePubDoc, EPUB_TITLE); - if (!value.isEmpty()) { - result->add(Property::Title, value); - } + if (result->inputFlags() & ExtractionResult::ExtractMetaData) { - value = fetchMetadata(ePubDoc, EPUB_SUBJECT); - if (!value.isEmpty()) { - result->add(Property::Subject, value); - } + QString value = fetchMetadata(ePubDoc, EPUB_TITLE); + if (!value.isEmpty()) { + result->add(Property::Title, value); + } - value = fetchMetadata(ePubDoc, EPUB_CREATOR); - if (!value.isEmpty()) { - if (value.startsWith(QLatin1String("aut:"), Qt::CaseInsensitive)) { - value = value.mid(4).simplified(); - } else if (value.startsWith(QLatin1String("author:"), Qt::CaseInsensitive)) { - value = value.mid(7).simplified(); + value = fetchMetadata(ePubDoc, EPUB_SUBJECT); + if (!value.isEmpty()) { + result->add(Property::Subject, value); } - // A lot of authors have their name written in () again. We discard that part - int index = value.indexOf(QLatin1Char('(')); - if (index) - value = value.mid(0, index); + value = fetchMetadata(ePubDoc, EPUB_CREATOR); + if (!value.isEmpty()) { + if (value.startsWith(QLatin1String("aut:"), Qt::CaseInsensitive)) { + value = value.mid(4).simplified(); + } else if (value.startsWith(QLatin1String("author:"), Qt::CaseInsensitive)) { + value = value.mid(7).simplified(); + } + + // A lot of authors have their name written in () again. We discard that part + int index = value.indexOf(QLatin1Char('(')); + if (index) + value = value.mid(0, index); - result->add(Property::Author, value); - } + result->add(Property::Author, value); + } - // The Contributor just seems to be mostly Calibre aka the Generator - /* + // The Contributor just seems to be mostly Calibre aka the Generator + /* value = fetchMetadata(ePubDoc, EPUB_CONTRIB); if( !value.isEmpty() ) { SimpleResource con; @@ -116,29 +118,30 @@ graph << con; }*/ - value = fetchMetadata(ePubDoc, EPUB_PUBLISHER); - if (!value.isEmpty()) { - result->add(Property::Publisher, value); - } - - value = fetchMetadata(ePubDoc, EPUB_DESCRIPTION); - if (!value.isEmpty()) { - result->add(Property::Description, value); - } - - value = fetchMetadata(ePubDoc, EPUB_DATE); - if (!value.isEmpty()) { - if (value.startsWith(QLatin1String("Unspecified:"), Qt::CaseInsensitive)) { - value = value.mid(QByteArray("Unspecified:").size()).simplified(); + value = fetchMetadata(ePubDoc, EPUB_PUBLISHER); + if (!value.isEmpty()) { + result->add(Property::Publisher, value); } - int ind = value.indexOf(QLatin1String("publication:"), Qt::CaseInsensitive); - if (ind != -1) { - value = value.mid(ind + QByteArray("publication:").size()).simplified(); + + value = fetchMetadata(ePubDoc, EPUB_DESCRIPTION); + if (!value.isEmpty()) { + result->add(Property::Description, value); } - QDateTime dt = ExtractorPlugin::dateTimeFromString(value); - if (!dt.isNull()) { - result->add(Property::CreationDate, dt); - result->add(Property::ReleaseYear, dt.date().year()); + + value = fetchMetadata(ePubDoc, EPUB_DATE); + if (!value.isEmpty()) { + if (value.startsWith(QLatin1String("Unspecified:"), Qt::CaseInsensitive)) { + value = value.mid(QByteArray("Unspecified:").size()).simplified(); + } + int ind = value.indexOf(QLatin1String("publication:"), Qt::CaseInsensitive); + if (ind != -1) { + value = value.mid(ind + QByteArray("publication:").size()).simplified(); + } + QDateTime dt = ExtractorPlugin::dateTimeFromString(value); + if (!dt.isNull()) { + result->add(Property::CreationDate, dt); + result->add(Property::ReleaseYear, dt.date().year()); + } } } diff --git a/src/extractors/exiv2extractor.cpp b/src/extractors/exiv2extractor.cpp --- a/src/extractors/exiv2extractor.cpp +++ b/src/extractors/exiv2extractor.cpp @@ -169,6 +169,10 @@ } result->addType(Type::Image); + if (!(result->inputFlags() & ExtractionResult::ExtractMetaData)) { + return; + } + if (image->pixelHeight()) { result->add(Property::Height, image->pixelHeight()); } diff --git a/src/extractors/ffmpegextractor.cpp b/src/extractors/ffmpegextractor.cpp --- a/src/extractors/ffmpegextractor.cpp +++ b/src/extractors/ffmpegextractor.cpp @@ -89,93 +89,95 @@ result->addType(Type::Video); - int totalSecs = fmt_ctx->duration / AV_TIME_BASE; - int bitrate = fmt_ctx->bit_rate; + if (result->inputFlags() & ExtractionResult::ExtractMetaData) { + int totalSecs = fmt_ctx->duration / AV_TIME_BASE; + int bitrate = fmt_ctx->bit_rate; - result->add(Property::Duration, totalSecs); - result->add(Property::BitRate, bitrate); + result->add(Property::Duration, totalSecs); + result->add(Property::BitRate, bitrate); - for (uint i = 0; i < fmt_ctx->nb_streams; i++) { - AVStream* stream = fmt_ctx->streams[i]; + for (uint i = 0; i < fmt_ctx->nb_streams; i++) { + AVStream* stream = fmt_ctx->streams[i]; #if defined HAVE_AVSTREAM_CODECPAR && HAVE_AVSTREAM_CODECPAR - const AVCodecParameters* codec = stream->codecpar; + const AVCodecParameters* codec = stream->codecpar; #else - const AVCodecContext* codec = stream->codec; + const AVCodecContext* codec = stream->codec; #endif - if (codec->codec_type == AVMEDIA_TYPE_VIDEO) { - result->add(Property::Width, codec->width); - result->add(Property::Height, codec->height); - - AVRational avSampleAspectRatio = av_guess_sample_aspect_ratio(fmt_ctx, stream, nullptr); - AVRational avDisplayAspectRatio; - av_reduce(&avDisplayAspectRatio.num, &avDisplayAspectRatio.den, - codec->width * avSampleAspectRatio.num, - codec->height * avSampleAspectRatio.den, - 1024*1024); - double displayAspectRatio = avDisplayAspectRatio.num; - if (avDisplayAspectRatio.den) - displayAspectRatio /= avDisplayAspectRatio.den; - if (displayAspectRatio) - result->add(Property::AspectRatio, displayAspectRatio); - - AVRational avFrameRate = av_guess_frame_rate(fmt_ctx, stream, nullptr); - double frameRate = avFrameRate.num; - if (avFrameRate.den) - frameRate /= avFrameRate.den; - if (frameRate) - result->add(Property::FrameRate, frameRate); + if (codec->codec_type == AVMEDIA_TYPE_VIDEO) { + result->add(Property::Width, codec->width); + result->add(Property::Height, codec->height); + + AVRational avSampleAspectRatio = av_guess_sample_aspect_ratio(fmt_ctx, stream, nullptr); + AVRational avDisplayAspectRatio; + av_reduce(&avDisplayAspectRatio.num, &avDisplayAspectRatio.den, + codec->width * avSampleAspectRatio.num, + codec->height * avSampleAspectRatio.den, + 1024*1024); + double displayAspectRatio = avDisplayAspectRatio.num; + if (avDisplayAspectRatio.den) + displayAspectRatio /= avDisplayAspectRatio.den; + if (displayAspectRatio) + result->add(Property::AspectRatio, displayAspectRatio); + + AVRational avFrameRate = av_guess_frame_rate(fmt_ctx, stream, nullptr); + double frameRate = avFrameRate.num; + if (avFrameRate.den) + frameRate /= avFrameRate.den; + if (frameRate) + result->add(Property::FrameRate, frameRate); + } } - } - AVDictionary* dict = fmt_ctx->metadata; - AVDictionaryEntry* entry; + AVDictionary* dict = fmt_ctx->metadata; + AVDictionaryEntry* entry; - entry = av_dict_get(dict, "title", nullptr, 0); - if (entry) { - result->add(Property::Title, QString::fromUtf8(entry->value)); - } + entry = av_dict_get(dict, "title", nullptr, 0); + if (entry) { + result->add(Property::Title, QString::fromUtf8(entry->value)); + } - entry = av_dict_get(dict, "author", nullptr, 0); - if (entry) { - result->add(Property::Author, QString::fromUtf8(entry->value)); - } + entry = av_dict_get(dict, "author", nullptr, 0); + if (entry) { + result->add(Property::Author, QString::fromUtf8(entry->value)); + } - entry = av_dict_get(dict, "copyright", nullptr, 0); - if (entry) { - result->add(Property::Copyright, QString::fromUtf8(entry->value)); - } + entry = av_dict_get(dict, "copyright", nullptr, 0); + if (entry) { + result->add(Property::Copyright, QString::fromUtf8(entry->value)); + } - entry = av_dict_get(dict, "comment", nullptr, 0); - if (entry) { - result->add(Property::Comment, QString::fromUtf8(entry->value)); - } + entry = av_dict_get(dict, "comment", nullptr, 0); + if (entry) { + result->add(Property::Comment, QString::fromUtf8(entry->value)); + } - entry = av_dict_get(dict, "album", nullptr, 0); - if (entry) { - result->add(Property::Album, QString::fromUtf8(entry->value)); - } + entry = av_dict_get(dict, "album", nullptr, 0); + if (entry) { + result->add(Property::Album, QString::fromUtf8(entry->value)); + } - entry = av_dict_get(dict, "genre", nullptr, 0); - if (entry) { - result->add(Property::Genre, QString::fromUtf8(entry->value)); - } + entry = av_dict_get(dict, "genre", nullptr, 0); + if (entry) { + result->add(Property::Genre, QString::fromUtf8(entry->value)); + } - entry = av_dict_get(dict, "track", nullptr, 0); - if (entry) { - QString value = QString::fromUtf8(entry->value); + entry = av_dict_get(dict, "track", nullptr, 0); + if (entry) { + QString value = QString::fromUtf8(entry->value); - bool ok = false; - int track = value.toInt(&ok); - if (ok && track) - result->add(Property::TrackNumber, track); - } + bool ok = false; + int track = value.toInt(&ok); + if (ok && track) + result->add(Property::TrackNumber, track); + } - entry = av_dict_get(dict, "year", nullptr, 0); - if (entry) { - int year = QString::fromUtf8(entry->value).toInt(); - result->add(Property::ReleaseYear, year); + entry = av_dict_get(dict, "year", nullptr, 0); + if (entry) { + int year = QString::fromUtf8(entry->value).toInt(); + result->add(Property::ReleaseYear, year); + } } avformat_close_input(&fmt_ctx); diff --git a/src/extractors/odfextractor.cpp b/src/extractors/odfextractor.cpp --- a/src/extractors/odfextractor.cpp +++ b/src/extractors/odfextractor.cpp @@ -87,61 +87,63 @@ return; } - QDomDocument metaData(QStringLiteral("metaData")); - metaData.setContent(static_cast(metaXml)->data(), true); - - // parse metadata ... - QDomElement meta = firstChildElementNS(firstChildElementNS(metaData, - officeNS(), QStringLiteral("document-meta")), - officeNS(), QStringLiteral("meta")); - - QDomNode n = meta.firstChild(); - while (!n.isNull()) { - QDomElement e = n.toElement(); - if (!e.isNull()) { - const QString namespaceURI = e.namespaceURI(); - const QString localName = e.localName(); - - // Dublin Core - if (namespaceURI == dcNS()) { - if (localName == QLatin1String("description")) { - result->add(Property::Description, e.text()); - } else if (localName == QLatin1String("subject")) { - result->add(Property::Subject, e.text()); - } else if (localName == QLatin1String("title")) { - result->add(Property::Title, e.text()); - } else if (localName == QLatin1String("creator")) { - result->add(Property::Author, e.text()); - } else if (localName == QLatin1String("language")) { - result->add(Property::Language, e.text()); - } - } - // Meta Properties - else if (namespaceURI == metaNS()) { - if (localName == QLatin1String("document-statistic")) { - bool ok = false; - int pageCount = e.attributeNS(metaNS(), QStringLiteral("page-count")).toInt(&ok); - if (ok) { - result->add(Property::PageCount, pageCount); + if (result->inputFlags() & ExtractionResult::ExtractMetaData) { + QDomDocument metaData(QStringLiteral("metaData")); + metaData.setContent(static_cast(metaXml)->data(), true); + + // parse metadata ... + QDomElement meta = firstChildElementNS(firstChildElementNS(metaData, + officeNS(), QStringLiteral("document-meta")), + officeNS(), QStringLiteral("meta")); + + QDomNode n = meta.firstChild(); + while (!n.isNull()) { + QDomElement e = n.toElement(); + if (!e.isNull()) { + const QString namespaceURI = e.namespaceURI(); + const QString localName = e.localName(); + + // Dublin Core + if (namespaceURI == dcNS()) { + if (localName == QLatin1String("description")) { + result->add(Property::Description, e.text()); + } else if (localName == QLatin1String("subject")) { + result->add(Property::Subject, e.text()); + } else if (localName == QLatin1String("title")) { + result->add(Property::Title, e.text()); + } else if (localName == QLatin1String("creator")) { + result->add(Property::Author, e.text()); + } else if (localName == QLatin1String("language")) { + result->add(Property::Language, e.text()); } - - int wordCount = e.attributeNS(metaNS(), QStringLiteral("word-count")).toInt(&ok); - if (ok) { - result->add(Property::WordCount, wordCount); + } + // Meta Properties + else if (namespaceURI == metaNS()) { + if (localName == QLatin1String("document-statistic")) { + bool ok = false; + int pageCount = e.attributeNS(metaNS(), QStringLiteral("page-count")).toInt(&ok); + if (ok) { + result->add(Property::PageCount, pageCount); + } + + int wordCount = e.attributeNS(metaNS(), QStringLiteral("word-count")).toInt(&ok); + if (ok) { + result->add(Property::WordCount, wordCount); + } + } else if (localName == QLatin1String("keyword")) { + QString keywords = e.text(); + result->add(Property::Keywords, keywords); + } else if (localName == QLatin1String("generator")) { + result->add(Property::Generator, e.text()); + } else if (localName == QLatin1String("creation-date")) { + QDateTime dt = ExtractorPlugin::dateTimeFromString(e.text()); + if (!dt.isNull()) + result->add(Property::CreationDate, dt); } - } else if (localName == QLatin1String("keyword")) { - QString keywords = e.text(); - result->add(Property::Keywords, keywords); - } else if (localName == QLatin1String("generator")) { - result->add(Property::Generator, e.text()); - } else if (localName == QLatin1String("creation-date")) { - QDateTime dt = ExtractorPlugin::dateTimeFromString(e.text()); - if (!dt.isNull()) - result->add(Property::CreationDate, dt); } } + n = n.nextSibling(); } - n = n.nextSibling(); } result->addType(Type::Document); diff --git a/src/extractors/office2007extractor.cpp b/src/extractors/office2007extractor.cpp --- a/src/extractors/office2007extractor.cpp +++ b/src/extractors/office2007extractor.cpp @@ -74,7 +74,9 @@ const KArchiveDirectory* docPropDirectory = dynamic_cast(docPropEntry); const QStringList docPropsEntries = docPropDirectory->entries(); - if (docPropsEntries.contains(QStringLiteral("core.xml"))) { + const bool extractMetaData = result->inputFlags() & ExtractionResult::ExtractMetaData; + + if (extractMetaData && docPropsEntries.contains(QStringLiteral("core.xml"))) { QDomDocument coreDoc(QStringLiteral("core")); const KArchiveFile* file = static_cast(docPropDirectory->entry(QStringLiteral("core.xml"))); coreDoc.setContent(file->data()); @@ -139,7 +141,7 @@ } } - if (docPropsEntries.contains(QStringLiteral("app.xml"))) { + if (extractMetaData && docPropsEntries.contains(QStringLiteral("app.xml"))) { QDomDocument appDoc(QStringLiteral("app")); const KArchiveFile* file = static_cast(docPropDirectory->entry(QStringLiteral("app.xml"))); appDoc.setContent(file->data()); diff --git a/src/extractors/plaintextextractor.cpp b/src/extractors/plaintextextractor.cpp --- a/src/extractors/plaintextextractor.cpp +++ b/src/extractors/plaintextextractor.cpp @@ -98,8 +98,9 @@ lines += 1; } - - result->add(Property::LineCount, lines); + if (result->inputFlags() & ExtractionResult::ExtractMetaData) { + result->add(Property::LineCount, lines); + } free(line); close(fd); diff --git a/src/extractors/popplerextractor.cpp b/src/extractors/popplerextractor.cpp --- a/src/extractors/popplerextractor.cpp +++ b/src/extractors/popplerextractor.cpp @@ -52,31 +52,33 @@ result->addType(Type::Document); - QString title = pdfDoc->info(QStringLiteral("Title")).trimmed(); + if (result->inputFlags() & ExtractionResult::ExtractMetaData) { + QString title = pdfDoc->info(QStringLiteral("Title")).trimmed(); - if (!title.isEmpty()) { - result->add(Property::Title, title); - } + if (!title.isEmpty()) { + result->add(Property::Title, title); + } - QString subject = pdfDoc->info(QStringLiteral("Subject")); - if (!subject.isEmpty()) { - result->add(Property::Subject, subject); - } + QString subject = pdfDoc->info(QStringLiteral("Subject")); + if (!subject.isEmpty()) { + result->add(Property::Subject, subject); + } - QString author = pdfDoc->info(QStringLiteral("Author")); - if (!author.isEmpty()) { - result->add(Property::Author, author); - } + QString author = pdfDoc->info(QStringLiteral("Author")); + if (!author.isEmpty()) { + result->add(Property::Author, author); + } - QString generator = pdfDoc->info(QStringLiteral("Producer")); - if (!generator.isEmpty()) { - result->add(Property::Generator, generator); - } + QString generator = pdfDoc->info(QStringLiteral("Producer")); + if (!generator.isEmpty()) { + result->add(Property::Generator, generator); + } - QString creationDate = pdfDoc->info(QStringLiteral("CreationDate")); - if (!creationDate.isEmpty()) { - QByteArray utf8 = creationDate.toUtf8(); - result->add(Property::CreationDate, Poppler::convertDate(utf8.data())); + QString creationDate = pdfDoc->info(QStringLiteral("CreationDate")); + if (!creationDate.isEmpty()) { + QByteArray utf8 = creationDate.toUtf8(); + result->add(Property::CreationDate, Poppler::convertDate(utf8.data())); + } } if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) { diff --git a/src/extractors/postscriptdscextractor.cpp b/src/extractors/postscriptdscextractor.cpp --- a/src/extractors/postscriptdscextractor.cpp +++ b/src/extractors/postscriptdscextractor.cpp @@ -42,11 +42,6 @@ void DscExtractor::extract(ExtractionResult* result) { - auto flags = result->inputFlags(); - if (!(flags & ExtractionResult::ExtractMetaData)) { - return; - } - QFile file(result->inputUrl()); if (!file.open(QIODevice::ReadOnly)) { qCWarning(KFILEMETADATA_LOG) << "Document is not a valid file"; @@ -60,6 +55,9 @@ result->addType(Type::Image); } + if (!(result->inputFlags() & ExtractionResult::ExtractMetaData)) { + return; + } // Try to find some DSC (PostScript Language Document Structuring Conventions) conforming data QTextStream stream(&file); QString line; diff --git a/src/extractors/taglibextractor.cpp b/src/extractors/taglibextractor.cpp --- a/src/extractors/taglibextractor.cpp +++ b/src/extractors/taglibextractor.cpp @@ -74,7 +74,7 @@ void extractAudioProperties(TagLib::File* file, ExtractionResult* result) { TagLib::AudioProperties* audioProp = file->audioProperties(); - if (audioProp) { + if (audioProp && (result->inputFlags() & ExtractionResult::ExtractMetaData)) { if (audioProp->length()) { // What about the xml duration? result->add(Property::Duration, audioProp->length()); @@ -96,9 +96,10 @@ void readGenericProperties(const TagLib::PropertyMap &savedProperties, ExtractionResult* result) { - if (savedProperties.isEmpty()) { + if (!(result->inputFlags() & ExtractionResult::ExtractMetaData) || savedProperties.isEmpty()) { return; } + if (savedProperties.contains("TITLE")) { result->add(Property::Title, TStringToQString(savedProperties["TITLE"].toString()).trimmed()); } @@ -254,9 +255,10 @@ void extractId3Tags(TagLib::ID3v2::Tag* Id3Tags, ExtractionResult* result) { - if (Id3Tags->isEmpty()) { + if (!(result->inputFlags() & ExtractionResult::ExtractMetaData) || Id3Tags->isEmpty()) { return; } + TagLib::ID3v2::FrameList lstID3v2; /* @@ -303,9 +305,10 @@ void extractMp4Tags(TagLib::MP4::Tag* mp4Tags, ExtractionResult* result) { - if (mp4Tags->isEmpty()) { + if (!(result->inputFlags() & ExtractionResult::ExtractMetaData) || mp4Tags->isEmpty()) { return; } + TagLib::MP4::ItemListMap allTags = mp4Tags->itemListMap(); /* @@ -321,7 +324,7 @@ void extractAsfTags(TagLib::ASF::Tag* asfTags, ExtractionResult* result) { - if (asfTags->isEmpty()) { + if (!(result->inputFlags() & ExtractionResult::ExtractMetaData) || asfTags->isEmpty()) { return; } diff --git a/src/extractors/xmlextractor.cpp b/src/extractors/xmlextractor.cpp --- a/src/extractors/xmlextractor.cpp +++ b/src/extractors/xmlextractor.cpp @@ -101,6 +101,10 @@ } if (e.localName() == QLatin1String("metadata")) { + if (!(flags & ExtractionResult::ExtractMetaData)) { + continue; + } + auto rdf = e.firstChildElement(QLatin1String("RDF")); if (rdf.isNull() || rdf.namespaceURI() != rdfNS()) { continue;