diff --git a/src/engine/documenttimedb.h b/src/engine/documenttimedb.h index 83c34176..2cdc080e 100644 --- a/src/engine/documenttimedb.h +++ b/src/engine/documenttimedb.h @@ -1,66 +1,74 @@ /* This file is part of the KDE Baloo project. * Copyright (C) 2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ #ifndef BALOO_DOCUMENTTIMEDB_H #define BALOO_DOCUMENTTIMEDB_H #include "engine_export.h" #include #include #include #include namespace Baloo { class BALOO_ENGINE_EXPORT DocumentTimeDB { public: DocumentTimeDB(MDB_dbi dbi, MDB_txn* txn); ~DocumentTimeDB(); static MDB_dbi create(MDB_txn* txn); static MDB_dbi open(MDB_txn* txn); - struct TimeInfo { - quint32 mTime; - quint32 cTime; + struct TimeInfo + { + /** Tracking of file time stamps + * + * @sa QDateTime::toTime_t() + * @sa QFileInfo::lastModified() + * @sa QFileInfo::metadataChangeTime() + */ + quint32 mTime; /**< file (data) modification time */ + quint32 cTime; /**< metadata (e.g. XAttr) change time */ + /* No birthtime yet */ explicit TimeInfo(quint32 mt = 0, quint32 ct = 0) : mTime(mt), cTime(ct) {} bool operator == (const TimeInfo& rhs) const { return mTime == rhs.mTime && cTime == rhs.cTime; } }; void put(quint64 docId, const TimeInfo& info); TimeInfo get(quint64 docId); void del(quint64 docId); bool contains(quint64 docId); QMap toTestMap() const; private: MDB_txn* m_txn; MDB_dbi m_dbi; }; } #endif // BALOO_DOCUMENTTIMEDB_H diff --git a/src/file/basicindexingjob.cpp b/src/file/basicindexingjob.cpp index 41f8e7c5..f7258c5b 100644 --- a/src/file/basicindexingjob.cpp +++ b/src/file/basicindexingjob.cpp @@ -1,219 +1,219 @@ /* * This file is part of the KDE Baloo Project * Copyright (C) 2013-2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) version 3, or any * later version accepted by the membership of KDE e.V. (or its * successor approved by the membership of KDE e.V.), which shall * act as a proxy defined in Section 6 of version 3 of the license. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * */ #include "basicindexingjob.h" #include "termgenerator.h" #include "idutils.h" #include #include #include #include #include using namespace Baloo; BasicIndexingJob::BasicIndexingJob(const QString& filePath, const QString& mimetype, IndexingLevel level) : m_filePath(filePath) , m_mimetype(mimetype) , m_indexingLevel(level) { } BasicIndexingJob::~BasicIndexingJob() { } bool BasicIndexingJob::index() { const QByteArray url = QFile::encodeName(m_filePath); QT_STATBUF statBuf; if (filePathToStat(url, statBuf) != 0) { return false; } Document doc; doc.setId(statBufToId(statBuf)); doc.setUrl(url); QString fileName = url.mid(url.lastIndexOf('/') + 1); TermGenerator tg(&doc); tg.indexFileNameText(fileName, 1000); tg.indexFileNameText(fileName, QByteArray("F")); tg.indexText(m_mimetype, QByteArray("M")); - // Time + // (Content) Modification time, Metadata (e.g. XAttr) change time doc.setMTime(statBuf.st_mtime); doc.setCTime(statBuf.st_ctime); // Types QVector tList = typesForMimeType(m_mimetype); for (KFileMetaData::Type::Type type : tList) { QByteArray num = QByteArray::number(static_cast(type)); doc.addBoolTerm(QByteArray("T") + num); } if (S_ISDIR(statBuf.st_mode)) { static const QByteArray type = QByteArray("T") + QByteArray::number(static_cast(KFileMetaData::Type::Folder)); doc.addBoolTerm(type); // For folders we do not need to go through file indexing, so we do not set contentIndexing } else if (m_indexingLevel == MarkForContentIndexing) { doc.setContentIndexing(true); } indexXAttr(m_filePath, doc); m_doc = doc; return true; } bool BasicIndexingJob::indexXAttr(const QString& url, Document& doc) { KFileMetaData::UserMetaData userMetaData(url); TermGenerator tg(&doc); QStringList tags = userMetaData.tags(); for (const QString& tag : tags) { tg.indexXattrText(tag, QByteArray("TA")); doc.addXattrBoolTerm(QByteArray("TAG-") + tag.toUtf8()); } int rating = userMetaData.rating(); if (rating) { doc.addXattrBoolTerm(QByteArray("R") + QByteArray::number(rating)); } QString comment = userMetaData.userComment(); if (!comment.isEmpty()) { tg.indexXattrText(comment, QByteArray("C")); } return (!tags.isEmpty() || rating || !comment.isEmpty()); } QVector BasicIndexingJob::typesForMimeType(const QString& mimeType) { using namespace KFileMetaData; QVector types; // Basic types if (mimeType.startsWith(QLatin1String("audio/"))) types << Type::Audio; if (mimeType.startsWith(QLatin1String("video/"))) types << Type::Video; if (mimeType.startsWith(QLatin1String("image/"))) types << Type::Image; if (mimeType.startsWith(QLatin1String("text/"))) types << Type::Text; if (mimeType.contains(QLatin1String("document"))) types << Type::Document; if (mimeType.contains(QLatin1String("powerpoint"))) { types << Type::Presentation; types << Type::Document; } if (mimeType.contains(QLatin1String("excel"))) { types << Type::Spreadsheet; types << Type::Document; } static QMultiHash typeMapper = { {"text/plain", Type::Document}, // MS Office {"application/msword", Type::Document}, {"application/x-scribus", Type::Document}, {"application/vnd.ms-powerpoint", Type::Document}, {"application/vnd.ms-powerpoint", Type::Presentation}, {"application/vnd.ms-excel", Type::Document}, {"application/vnd.ms-excel", Type::Spreadsheet}, // Office 2007 {"application/vnd.openxmlformats-officedocument.wordprocessingml.document", Type::Document}, {"application/vnd.openxmlformats-officedocument.presentationml.presentation", Type::Document}, {"application/vnd.openxmlformats-officedocument.presentationml.presentation", Type::Presentation}, {"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", Type::Document}, {"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", Type::Spreadsheet}, // Open Document Formats - http://en.wikipedia.org/wiki/OpenDocument_technical_specification {"application/vnd.oasis.opendocument.text", Type::Document}, {"application/vnd.oasis.opendocument.presentation", Type::Document}, {"application/vnd.oasis.opendocument.presentation", Type::Presentation}, {"application/vnd.oasis.opendocument.spreadsheet", Type::Document}, {"application/vnd.oasis.opendocument.spreadsheet", Type::Spreadsheet}, {"application/pdf", Type::Document}, {"application/postscript", Type::Document}, {"application/x-dvi", Type::Document}, {"application/rtf", Type::Document}, // EBooks {"application/epub+zip", Type::Document}, {"application/x-mobipocket-ebook", Type::Document}, // Archives - http://en.wikipedia.org/wiki/List_of_archive_formats {"application/x-tar", Type::Archive}, {"application/x-bzip2", Type::Archive}, {"application/x-gzip", Type::Archive}, {"application/x-lzip", Type::Archive}, {"application/x-lzma", Type::Archive}, {"application/x-lzop", Type::Archive}, {"application/x-compress", Type::Archive}, {"application/x-7z-compressed", Type::Archive}, {"application/x-ace-compressed", Type::Archive}, {"application/x-astrotite-afa", Type::Archive}, {"application/x-alz-compressed", Type::Archive}, {"application/vnd.android.package-archive", Type::Archive}, {"application/x-arj", Type::Archive}, {"application/vnd.ms-cab-compressed", Type::Archive}, {"application/x-cfs-compressed", Type::Archive}, {"application/x-dar", Type::Archive}, {"application/x-lzh", Type::Archive}, {"application/x-lzx", Type::Archive}, {"application/x-rar-compressed", Type::Archive}, {"application/x-stuffit", Type::Archive}, {"application/x-stuffitx", Type::Archive}, {"application/x-gtar", Type::Archive}, {"application/zip", Type::Archive}, {"image/svg+xml", Type::Image}, // WPS office {"application/wps-office.doc", Type::Document}, {"application/wps-office.xls", Type::Document}, {"application/wps-office.xls", Type::Spreadsheet}, {"application/wps-office.pot", Type::Document}, {"application/wps-office.pot", Type::Presentation}, {"application/wps-office.wps", Type::Document}, {"application/wps-office.docx", Type::Document}, {"application/wps-office.xlsx", Type::Document}, {"application/wps-office.xlsx", Type::Spreadsheet}, {"application/wps-office.pptx", Type::Document}, {"application/wps-office.pptx", Type::Presentation}, // Other {"text/markdown", Type::Document}, {"image/vnd.djvu+multipage", Type::Document}, {"application/x-lyx", Type::Document} }; types << typeMapper.values(mimeType).toVector(); return types; } diff --git a/src/file/modifiedfileindexer.cpp b/src/file/modifiedfileindexer.cpp index f52300ef..48f251c7 100644 --- a/src/file/modifiedfileindexer.cpp +++ b/src/file/modifiedfileindexer.cpp @@ -1,104 +1,111 @@ /* * Copyright (C) 2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ #include "modifiedfileindexer.h" #include "basicindexingjob.h" #include "fileindexerconfig.h" #include "idutils.h" #include "database.h" #include "transaction.h" #include #include #include #include using namespace Baloo; ModifiedFileIndexer::ModifiedFileIndexer(Database* db, const FileIndexerConfig* config, const QStringList& files) : m_db(db) , m_config(config) , m_files(files) { Q_ASSERT(m_db); Q_ASSERT(m_config); Q_ASSERT(!m_files.isEmpty()); } void ModifiedFileIndexer::run() { QMimeDatabase mimeDb; Transaction tr(m_db, Transaction::ReadWrite); for (const QString& filePath : qAsConst(m_files)) { Q_ASSERT(!filePath.endsWith('/')); QString fileName = filePath.mid(filePath.lastIndexOf('/') + 1); if (!m_config->shouldFileBeIndexed(fileName)) { continue; } QString mimetype = mimeDb.mimeTypeForFile(filePath, QMimeDatabase::MatchExtension).name(); if (!m_config->shouldMimeTypeBeIndexed(mimetype)) { continue; } quint64 fileId = filePathToId(QFile::encodeName(filePath)); if (!fileId) { continue; } - quint32 mTime = tr.documentTimeInfo(fileId).mTime; + DocumentTimeDB::TimeInfo timeInfo = tr.documentTimeInfo(fileId); // A folders mtime is updated when a new file is added / removed / renamed // we don't really need to reindex a folder when that happens // In fact, we never need to reindex a folder - if (mTime && mimetype == QLatin1String("inode/directory")) { + if (timeInfo.mTime && mimetype == QLatin1String("inode/directory")) { continue; } // FIXME: Using QFileInfo over here is quite expensive! QFileInfo fileInfo(filePath); - if (mTime == fileInfo.lastModified().toTime_t()) { + bool mTimeChanged = timeInfo.mTime != fileInfo.lastModified().toTime_t(); +#if QT_VERSION >= QT_VERSION_CHECK(5,10,0) + bool cTimeChanged = timeInfo.cTime != fileInfo.metadataChangeTime().toTime_t(); +#else + bool cTimeChanged = timeInfo.cTime != fileInfo.created().toTime_t(); +#endif + + if (!mTimeChanged && !cTimeChanged) { continue; } // FIXME: The BasicIndexingJob extracts too much info. We only need the time BasicIndexingJob::IndexingLevel level = m_config->onlyBasicIndexing() ? BasicIndexingJob::NoLevel : BasicIndexingJob::MarkForContentIndexing; BasicIndexingJob job(filePath, mimetype, level); if (!job.index()) { continue; } // we can get modified events for files which do not exist // cause Baloo was not running and missed those events if (tr.hasDocument(job.document().id())) { tr.replaceDocument(job.document(), DocumentTime); } else { tr.addDocument(job.document()); } } tr.commit(); Q_EMIT done(); } diff --git a/src/file/unindexedfileiterator.cpp b/src/file/unindexedfileiterator.cpp index 1d402204..354b5087 100644 --- a/src/file/unindexedfileiterator.cpp +++ b/src/file/unindexedfileiterator.cpp @@ -1,125 +1,134 @@ /* * This file is part of the KDE Baloo project. * Copyright (C) 2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ #include "unindexedfileiterator.h" #include "fileindexerconfig.h" #include "idutils.h" #include "transaction.h" +#include "baloodebug.h" #include #include using namespace Baloo; UnIndexedFileIterator::UnIndexedFileIterator(const FileIndexerConfig* config, Transaction* transaction, const QString& folder) : m_config(config) , m_transaction(transaction) , m_iter(config, folder, FilteredDirIterator::FilesAndDirs) , m_mTimeChanged(false) , m_cTimeChanged(false) { } UnIndexedFileIterator::~UnIndexedFileIterator() { } QString UnIndexedFileIterator::filePath() const { return m_iter.filePath(); } QString UnIndexedFileIterator::mimetype() const { return m_mimetype; } bool UnIndexedFileIterator::mTimeChanged() const { return m_mTimeChanged; } bool UnIndexedFileIterator::cTimeChanged() const { return m_cTimeChanged; } QString UnIndexedFileIterator::next() { while (1) { const QString filePath = m_iter.next(); m_mTimeChanged = false; m_cTimeChanged = false; if (filePath.isEmpty()) { m_mimetype.clear(); return QString(); } // This mimetype may not be completely accurate, but that's okay. This is // just the initial phase of indexing. The second phase can try to find // a more accurate mimetype. m_mimetype = m_mimeDb.mimeTypeForFile(filePath, QMimeDatabase::MatchExtension).name(); if (shouldIndex(filePath, m_mimetype)) { return filePath; } } } bool UnIndexedFileIterator::shouldIndex(const QString& filePath, const QString& mimetype) { bool shouldIndexType = m_config->shouldMimeTypeBeIndexed(mimetype); if (!shouldIndexType) return false; const QFileInfo fileInfo = m_iter.fileInfo(); if (!fileInfo.exists()) return false; quint64 fileId = filePathToId(QFile::encodeName(filePath)); Q_ASSERT_X(fileId, "UnIndexedFileIterator::shouldIndex", "file id is 0"); if (!fileId) { return true; } DocumentTimeDB::TimeInfo timeInfo = m_transaction->documentTimeInfo(fileId); // A folders mtime is updated when a new file is added / removed / renamed // we don't really need to reindex a folder when that happens // In fact, we never need to reindex a folder if (timeInfo.mTime && mimetype == QLatin1String("inode/directory")) { return false; } if (timeInfo.mTime != fileInfo.lastModified().toTime_t()) { m_mTimeChanged = true; } - if (timeInfo.cTime != fileInfo.created().toTime_t()) { +#if QT_VERSION >= QT_VERSION_CHECK(5,10,0) + auto fileMTime = fileInfo.metadataChangeTime().toTime_t(); +#else + auto fileMTime = fileInfo.created().toTime_t(); +#endif + if (timeInfo.cTime != fileMTime) { m_cTimeChanged = true; } if (m_mTimeChanged || m_cTimeChanged) { + qCDebug(BALOO) << "mtime/ctime changed:" + << timeInfo.mTime << fileInfo.lastModified().toTime_t() + << timeInfo.cTime << fileMTime; return true; } return false; }