diff --git a/src/file/fileindexerconfig.cpp b/src/file/fileindexerconfig.cpp index 098f93eb..d29c6676 100644 --- a/src/file/fileindexerconfig.cpp +++ b/src/file/fileindexerconfig.cpp @@ -1,384 +1,416 @@ /* This file is part of the KDE Project Copyright (c) 2008-2010 Sebastian Trueg Copyright (c) 2013-2014 Vishesh Handa Copyright (c) 2020 Benjamin Port This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License version 2 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "fileindexerconfig.h" #include "fileexcludefilters.h" #include "storagedevices.h" +#include "baloodebug.h" #include #include #include #include "baloosettings.h" namespace { /// recursively check if a folder is hidden bool isDirHidden(const QDir& dir) { #ifdef __unix__ return dir.absolutePath().contains(QLatin1String("/.")); #else QDir d = dir; do { if (QFileInfo(d.path()).isHidden()) return true; } while (d.cdUp()); return false; #endif } QString normalizeTrailingSlashes(QString&& path) { while (path.endsWith(QLatin1Char('/'))) { path.chop(1); } path += QLatin1Char('/'); return path; } } -using namespace Baloo; +namespace Baloo +{ FileIndexerConfig::FileIndexerConfig(QObject* parent) : QObject(parent) , m_settings(new BalooSettings(this)) , m_folderCacheDirty(true) , m_indexHidden(false) , m_devices(nullptr) , m_maxUncomittedFiles(40) { forceConfigUpdate(); } FileIndexerConfig::~FileIndexerConfig() { } +QDebug operator<<(QDebug dbg, const FileIndexerConfig::FolderConfig& entry) +{ + QDebugStateSaver saver(dbg); + dbg.nospace() << entry.path << ": " + << (entry.isIncluded ? "included" : "excluded"); + return dbg; +} QStringList FileIndexerConfig::includeFolders() const { const_cast(this)->buildFolderCache(); QStringList fl; - for (int i = 0; i < m_folderCache.count(); ++i) { - if (m_folderCache[i].second) - fl << m_folderCache[i].first; + for (const auto& entry : m_folderCache) { + if (entry.isIncluded) + fl << entry.path; } return fl; } - QStringList FileIndexerConfig::excludeFolders() const { const_cast(this)->buildFolderCache(); QStringList fl; - for (int i = 0; i < m_folderCache.count(); ++i) { - if (!m_folderCache[i].second) - fl << m_folderCache[i].first; + for (const auto& entry : m_folderCache) { + if (!entry.isIncluded) + fl << entry.path; } return fl; } - QStringList FileIndexerConfig::excludeFilters() const { // read configured exclude filters QStringList filters = m_settings->excludedFilters(); // make sure we always keep the latest default exclude filters // TODO: there is one problem here. What if the user removed some of the default filters? if (m_settings->excludedFiltersVersion() < defaultExcludeFilterListVersion()) { filters += defaultExcludeFilterList(); // in case the cfg entry was empty and filters == defaultExcludeFilterList() filters.removeDuplicates(); // write the config directly since the KCM does not have support for the version yet m_settings->setExcludedFilters(filters); m_settings->setExcludedFiltersVersion(defaultExcludeFilterListVersion()); } return filters; } QStringList FileIndexerConfig::excludeMimetypes() const { #if QT_VERSION < QT_VERSION_CHECK(5, 14, 0) return QStringList::fromSet(m_excludeMimetypes); #else return QList(m_excludeMimetypes.begin(), m_excludeMimetypes.end()); #endif } bool FileIndexerConfig::indexHiddenFilesAndFolders() const { return m_indexHidden; } bool FileIndexerConfig::onlyBasicIndexing() const { return m_onlyBasicIndexing; } bool FileIndexerConfig::canBeSearched(const QString& folder) const { QFileInfo fi(folder); QString path = fi.absolutePath(); if (!fi.isDir()) { return false; } else if (shouldFolderBeIndexed(path)) { return true; } const_cast(this)->buildFolderCache(); // Look for included descendants - for (const QPair& fld: qAsConst(m_folderCache)) { - if (fld.second && fld.first.startsWith(path)) { + for (const auto& entry : m_folderCache) { + if (entry.isIncluded && entry.path.startsWith(path)) { return true; } } return false; } bool FileIndexerConfig::shouldBeIndexed(const QString& path) const { QFileInfo fi(path); if (fi.isDir()) { return shouldFolderBeIndexed(path); } else { return (shouldFolderBeIndexed(fi.absolutePath()) && (!fi.isHidden() || indexHiddenFilesAndFolders()) && shouldFileBeIndexed(fi.fileName())); } } bool FileIndexerConfig::shouldFolderBeIndexed(const QString& path) const { QString folder; if (folderInFolderList(path, folder)) { // we always index the folders in the list // ignoring the name filters if (folder == normalizeTrailingSlashes(QString(path))) return true; // check for hidden folders QDir dir(path); if (!indexHiddenFilesAndFolders() && isDirHidden(dir)) return false; // check the exclude filters for all components of the path // after folder const QStringList pathComponents = path.mid(folder.count()).split(QLatin1Char('/'), QString::SkipEmptyParts); for (const QString& c : pathComponents) { if (!shouldFileBeIndexed(c)) { return false; } } return true; } else { return false; } } bool FileIndexerConfig::shouldFileBeIndexed(const QString& fileName) const { if (!indexHiddenFilesAndFolders() && fileName.startsWith(QLatin1Char('.'))) { return false; } return !m_excludeFilterRegExpCache.exactMatch(fileName); } bool FileIndexerConfig::shouldMimeTypeBeIndexed(const QString& mimeType) const { return !m_excludeMimetypes.contains(mimeType); } bool FileIndexerConfig::folderInFolderList(const QString& path) { QString str; return folderInFolderList(path, str); } bool FileIndexerConfig::folderInFolderList(const QString& path, QString& folder) const { const_cast(this)->buildFolderCache(); const QString p = normalizeTrailingSlashes(QString(path)); - // we traverse the list backwards to catch all exclude folders - int i = m_folderCache.count(); - while (--i >= 0) { - const QString& f = m_folderCache[i].first; - const bool include = m_folderCache[i].second; + for (const auto& entry : m_folderCache) { + const QString& f = entry.path; if (p.startsWith(f)) { folder = f; - return include; + return entry.isIncluded; } } // path is not in the list, thus it should not be included folder.clear(); return false; } -namespace -{ -/** - * Returns true if the specified folder f would already be excluded using the list - * folders. - */ -bool alreadyExcluded(const QList >& folders, const QString& f) +void FileIndexerConfig::FolderCache::cleanup() { - bool included = false; - for (int i = 0; i < folders.count(); ++i) { - QString path = folders[i].first; + // TODO There are two cases where "redundant" includes + // should be kept: + // 1. when the "tail" matches a path exclude filter + // (m_excludeFilterRegexpCache) + // 2. when the explicitly adds a hidden directory, and + // we want to index hidden dirs (m_indexHidden) + bool keepAllIncluded = true; + + auto entry = begin(); + while (entry != end()) { + if ((*entry).isIncluded && keepAllIncluded) { + ++entry; + continue; + } - if (f != folders[i].first && f.startsWith(path)) { - included = folders[i].second; + const QString entryPath = (*entry).path; + auto start = entry; ++start; + auto parent = std::find_if(start, end(), + [&entryPath](const FolderConfig& _parent) { + return entryPath.startsWith(_parent.path); + }); + + if (parent != end()) { + if ((*entry).isIncluded == (*parent).isIncluded) { + // remove identical config + entry = erase(entry); + } else { + ++entry; + } + } else { + if (!(*entry).isIncluded) { + // remove excluded a topmost level (default) + entry = erase(entry); + } else { + ++entry; + } } } - return !included; } -/** - * Simple insertion sort - */ -void insertSortFolders(const QStringList& folders, bool include, QList >& result) +bool FileIndexerConfig::FolderConfig::operator<(const FolderConfig& other) const { - for (QString path : folders) { - int pos = 0; - path = normalizeTrailingSlashes(std::move(path)); - while (result.count() > pos && - result[pos].first < path) - ++pos; - result.insert(pos, qMakePair(path, include)); - } + return path.size() > other.path.size() || + (path.size() == other.path.size() && path < other.path); } -/** - * Remove useless exclude entries which would confuse the folderInFolderList algo. - * This makes sure all top-level items are include folders. - * This runs in O(n^2) and could be optimized but what for. - */ -void cleanupList(QList >& result) +bool FileIndexerConfig::FolderCache::addFolderConfig(const FolderConfig& config) { - int i = 0; - while (i < result.count()) { - if (result[i].first.isEmpty() || - (!result[i].second && - alreadyExcluded(result, result[i].first))) - result.removeAt(i); - else - ++i; + if (config.path.isEmpty()) { + qCDebug(BALOO) << "Trying to add folder config entry with empty path"; + return false; } -} + auto newConfig{config}; + newConfig.path = normalizeTrailingSlashes(std::move(newConfig.path)); + + auto it = std::lower_bound(cbegin(), cend(), newConfig); + if (it != cend() && (*it).path == newConfig.path) { + qCDebug(BALOO) << "Folder config entry for" << newConfig.path << "already exists"; + return false; + } + + it = insert(it, newConfig); + return true; } void FileIndexerConfig::buildFolderCache() { if (!m_folderCacheDirty) { return; } if (!m_devices) { m_devices = new StorageDevices(this); } - QStringList includeFoldersPlain = m_settings->folders(); - QStringList excludeFoldersPlain = m_settings->excludedFolders(); + FolderCache cache; + + const QStringList includeFolders = m_settings->folders(); + for (const auto& folder : includeFolders) { + if (!cache.addFolderConfig({folder, true})) { + qCWarning(BALOO) << "Failed to add include folder config entry for" << folder; + } + } + + const QStringList excludeFolders = m_settings->excludedFolders(); + for (const auto& folder : excludeFolders) { + if (!cache.addFolderConfig({folder, false})) { + qCWarning(BALOO) << "Failed to add exclude folder config entry for" << folder; + } + } + // Add all removable media and network shares as ignored unless they have // been explicitly added in the include list const auto allMedia = m_devices->allMedia(); for (const auto& device: allMedia) { const QString mountPath = device.mountPath(); if (!device.isUsable() && !mountPath.isEmpty()) { - if (!includeFoldersPlain.contains(mountPath)) { - excludeFoldersPlain << mountPath; + if (!includeFolders.contains(mountPath)) { + cache.addFolderConfig({mountPath, false}); } } } - m_folderCache.clear(); - insertSortFolders(includeFoldersPlain, true, m_folderCache); - insertSortFolders(excludeFoldersPlain, false, m_folderCache); - - cleanupList(m_folderCache); + cache.cleanup(); + qCDebug(BALOO) << "Folder cache:" << cache; + m_folderCache = cache; m_folderCacheDirty = false; } void FileIndexerConfig::buildExcludeFilterRegExpCache() { QStringList newFilters = excludeFilters(); m_excludeFilterRegExpCache.rebuildCacheFromFilterList(newFilters); } void FileIndexerConfig::buildMimeTypeCache() { const QStringList excludedTypes = m_settings->excludedMimetypes(); #if QT_VERSION < QT_VERSION_CHECK(5, 14, 0) m_excludeMimetypes = excludedTypes.toSet(); #else m_excludeMimetypes = QSet(excludedTypes.begin(), excludedTypes.end()); #endif } void FileIndexerConfig::forceConfigUpdate() { m_settings->load(); m_folderCacheDirty = true; buildExcludeFilterRegExpCache(); buildMimeTypeCache(); m_indexHidden = m_settings->indexHiddenFolders(); m_onlyBasicIndexing = m_settings->onlyBasicIndexing(); } int FileIndexerConfig::databaseVersion() const { return m_settings->dbVersion(); } void FileIndexerConfig::setDatabaseVersion(int version) { m_settings->setDbVersion(version); m_settings->save(); } bool FileIndexerConfig::indexingEnabled() const { return m_settings->indexingEnabled(); } uint FileIndexerConfig::maxUncomittedFiles() const { return m_maxUncomittedFiles; } + +} // namespace Baloo diff --git a/src/file/fileindexerconfig.h b/src/file/fileindexerconfig.h index 727b01e6..de197e1e 100644 --- a/src/file/fileindexerconfig.h +++ b/src/file/fileindexerconfig.h @@ -1,193 +1,216 @@ /* Copyright (c) 2008-2009 Sebastian Trueg Copyright (c) 2012-2014 Vishesh Handa Copyright (c) 2020 Benjamin Port This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License version 2 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef BALOO_FILEINDEXER_CONFIG_H_ #define BALOO_FILEINDEXER_CONFIG_H_ #include #include #include +#include #include "regexpcache.h" class BalooSettings; namespace Baloo { class StorageDevices; /** * Active config class which emits signals if the config * was changed, for example if the KCM saved the config file. */ class FileIndexerConfig : public QObject { Q_OBJECT public: explicit FileIndexerConfig(QObject* parent = nullptr); ~FileIndexerConfig(); /** * Folders to search for files to index and analyze. * \return list of paths. */ QStringList includeFolders() const; /** * Folders that are excluded from indexing. - * (Descendant folders of an excluded folder can be added + * (Descendant folders of an excluded folder can be added * and they will be indexed.) * \return list of paths. */ QStringList excludeFolders() const; QStringList excludeFilters() const; QStringList excludeMimetypes() const; bool indexHiddenFilesAndFolders() const; bool onlyBasicIndexing() const; /** * Check if \p folder can be searched. * \p folder can be searched if itself or one of its descendants is indexed. - * + * * Example: * if ~/foo is not indexed and ~/foo/bar is indexed * then ~/foo can be searched. * * \return \c true if the \p folder can be searched. */ bool canBeSearched(const QString& folder) const; /** * Check if file or folder \p path should be indexed taking into account * the includeFolders(), the excludeFolders(), and the excludeFilters(). * Inclusion takes precedence. * * Be aware that this method does not check if parent dirs * match any of the exclude filters. Only the path of the * dir itself it checked. * * \return \c true if the file or folder at \p path should * be indexed according to the configuration. */ bool shouldBeIndexed(const QString& path) const; /** * Check if the folder at \p path should be indexed. * * Be aware that this method does not check if parent dirs * match any of the exclude filters. Only the name of the * dir itself it checked. * * \return \c true if the folder at \p path should * be indexed according to the configuration. */ bool shouldFolderBeIndexed(const QString& path) const; /** * Check \p fileName for all exclude filters. This does * not take file paths into account. * * \return \c true if a file with name \p filename should * be indexed according to the configuration. */ bool shouldFileBeIndexed(const QString& fileName) const; /** * Checks if \p mimeType should be indexed * * \return \c true if the mimetype should be indexed according * to the configuration */ bool shouldMimeTypeBeIndexed(const QString& mimeType) const; /** * Returns true if the folder is in the list indexed directories * and not in the list of exclude directories */ bool folderInFolderList(const QString& path); /** * Check if \p path is in the list of folders to be indexed taking * include and exclude folders into account. * \p folder is set to the folder which was the reason for the decision. */ bool folderInFolderList(const QString& path, QString& folder) const; /** * Returns the internal version number of the Baloo database */ int databaseVersion() const; void setDatabaseVersion(int version); bool indexingEnabled() const; /** * Returns batch size */ uint maxUncomittedFiles() const; public Q_SLOTS: /** * Reread the config from disk and update the configuration cache. * This is only required for testing as normally the config updates * itself whenever the config file on disk changes. * * \return \c true if the config has actually changed */ void forceConfigUpdate(); private: void buildFolderCache(); void buildExcludeFilterRegExpCache(); void buildMimeTypeCache(); BalooSettings *m_settings; - /// Caching cleaned up list (no duplicates, no useless entries, etc.) - QList > m_folderCache; + struct FolderConfig + { + QString path; + bool isIncluded; + + /// Sort by path length, and on ties lexicographically. + /// Longest path first + bool operator<(const FolderConfig& other) const; + }; + + class FolderCache : public std::vector + { + public: + void cleanup(); + + bool addFolderConfig(const FolderConfig&); + bool updateFolderConfig(const FolderConfig&); + }; + friend QDebug operator<<(QDebug dbg, const FolderConfig& config); + + /// Caching cleaned up list (no duplicates, no non-default entries, etc.) + FolderCache m_folderCache; /// Whether the folder cache needs to be rebuilt the next time it is used bool m_folderCacheDirty; /// cache of regexp objects for all exclude filters /// to prevent regexp parsing over and over RegExpCache m_excludeFilterRegExpCache; /// A set of mimetypes which should never be indexed QSet m_excludeMimetypes; bool m_indexHidden; bool m_onlyBasicIndexing; StorageDevices* m_devices; const uint m_maxUncomittedFiles; }; +QDebug operator<<(QDebug dbg, const FileIndexerConfig::FolderCache::value_type&); + } #endif