diff --git a/src/engine/experimental/databasesanitizer.cpp b/src/engine/experimental/databasesanitizer.cpp index de629ede..a5d67059 100644 --- a/src/engine/experimental/databasesanitizer.cpp +++ b/src/engine/experimental/databasesanitizer.cpp @@ -1,380 +1,380 @@ /* * This file is part of the KDE Baloo project. * Copyright 2018 Michael Heidelbach * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of * the License or (at your option) version 3 or any later version * accepted by the membership of KDE e.V. (or its successor approved * by the membership of KDE e.V.), which shall act as a proxy * defined in Section 14 of version 3 of the license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "databasesanitizer.h" #include "documenturldb.h" #include "idutils.h" #include #include #include #include #include namespace Baloo { class DatabaseSanitizerImpl { public: DatabaseSanitizerImpl(const Database& db, Transaction::TransactionType type) : m_transaction(new Transaction(db, type)) { } public: /** * \brief Basic info about database items */ struct FileInfo { quint32 deviceId = 0; quint32 inode = 0; quint64 id = 0; bool isSymLink = false; bool accessible = true; QString url; }; void printProgress(QTextStream& out, uint& cur, const uint max, const uint step) const { if (cur % step == 0) { out << QStringLiteral("%1%2\r").arg(100 * cur / max, 6).arg("%", -16); out.flush(); } cur++; } /** * Summary of createList() actions */ struct Summary { quint64 total = 0; ///Count of all files quint64 ignored = 0; ///Count of filtered out files quint64 accessible = 0; ///Count of checked and accessible files }; /** * Create a list of \a FileInfo items. * * \p deviceIDs filter by device ids. If the vector is empty no filtering is done * and every item is collected. * Positive numbers are including filters collecting only the mentioned device ids. * Negative numbers are excluding filters collecting everything but the mentioned device ids. * * \p accessFilter Flags to filter items by accessibility. * * \p urlFilter Filter result urls. Default is null = Collect everything. */ QPair, Summary> createList( const QVector& deviceIds, const DatabaseSanitizer::ItemAccessFilters accessFilter, const QSharedPointer& urlFilter ) const { Q_ASSERT(m_transaction); const auto docUrlDb = DocumentUrlDB(m_transaction->m_dbis.idTreeDbi, m_transaction->m_dbis.idFilenameDbi, m_transaction->m_txn); const auto map = docUrlDb.toTestMap(); const auto keys = map.keys(); QVector result; uint max = map.size(); uint i = 0; result.reserve(max); QVector includeIds; QVector excludeIds; for (qint64 deviceId : deviceIds) { if (deviceId > 0) { includeIds.append(deviceId); } else if (deviceId < 0) { excludeIds.append(-deviceId); } } Summary summary; summary.total = max; summary.ignored = max; QTextStream err(stderr); for (auto it = map.constBegin(), end = map.constEnd(); it != end; it++) { printProgress(err, i, max, 100); const quint64 id = it.key(); const quint32 deviceId = idToDeviceId(id); if (!includeIds.isEmpty() && !includeIds.contains(deviceId)) { continue; } else if (excludeIds.contains(deviceId)) { continue; } else if (urlFilter && !urlFilter->match(it.value()).hasMatch()) { continue; } FileInfo info; info.deviceId = deviceId; info.inode = idToInode(id); info.url = QFile::decodeName(it.value()); info.id = id; QFileInfo fileInfo(info.url); info.accessible = !info.url.isEmpty() && fileInfo.exists(); if (info.accessible && (accessFilter & DatabaseSanitizer::IgnoreAvailable)) { continue; } else if (!info.accessible && (accessFilter & DatabaseSanitizer::IgnoreUnavailable)) { continue; } info.isSymLink = fileInfo.isSymLink(); result.append(info); summary.ignored--; if (info.accessible) { summary.accessible++; } } return {result, summary}; } QStorageInfo getStorageInfo(const quint32 id) { static QMap storageInfos = []() { QMap result; const auto volumes = QStorageInfo::mountedVolumes(); for (const auto& vol : volumes) { const QByteArray rootPath = QFile::encodeName(vol.rootPath()); - const auto fsinfo = filePathToStat(rootPath); - const quint32 id = static_cast(fsinfo.st_dev); + const auto id = filePathToId(rootPath); + const quint32 deviceId = idToDeviceId(id); // qDebug() << vol; - result[id] = vol; + result[deviceId] = vol; } return result; }(); QStorageInfo info = storageInfos.value(id); return info; } QMap deviceFilters(QVector& infos, const DatabaseSanitizer::ItemAccessFilters accessFilter) { QMap result; for (const auto& info : infos) { result[info.deviceId] = false; } for (auto it = result.begin(), end = result.end(); it != end; it++) { const auto storageInfo = getStorageInfo(it.key()); it.value() = isIgnored(storageInfo, accessFilter); } return result; } bool isIgnored(const QStorageInfo& storageInfo, const DatabaseSanitizer::ItemAccessFilters accessFilter) { const bool mounted = storageInfo.isValid(); if (mounted && (accessFilter & DatabaseSanitizer::IgnoreMounted)) { return true; } else if (!mounted && (accessFilter & DatabaseSanitizer::IgnoreUnmounted)) { return true; } if (storageInfo.fileSystemType() == QLatin1String("tmpfs")) { // Due to the volatility of device ids, an id known by baloo may // appear as mounted, but is not what baloo expects. // For example at indexing time 43 was the id of a smb share, but // at runtime 43 is the id of /run/media/ when other users are // logged in. The latter have a type of 'tmpfs' and should be ignored. return true; } return false; } void removeDocument(const quint64 id) { m_transaction->removeDocument(id); } void commit() { m_transaction->commit(); } void abort() { m_transaction->abort(); } private: Transaction* m_transaction; }; } using namespace Baloo; DatabaseSanitizer::DatabaseSanitizer(const Database& db, Baloo::Transaction::TransactionType type) : m_pimpl(new DatabaseSanitizerImpl(db, type)) { } DatabaseSanitizer::DatabaseSanitizer(Database* db, Transaction::TransactionType type) : DatabaseSanitizer(*db, type) { } DatabaseSanitizer::~DatabaseSanitizer() { delete m_pimpl; m_pimpl = nullptr; } /** * Create a list of \a FileInfo items and print it to stdout. * * \p deviceIDs filter by device ids. If the vector is empty no filtering is done * and everything is printed. * Positive numbers are including filters printing only the mentioned device ids. * Negative numbers are excluding filters printing everything but the mentioned device ids. * * \p missingOnly Simulate purging operation. Only inaccessible items are printed. * * \p urlFilter Filter result urls. Default is null = Print everything. */ void DatabaseSanitizer::printList( const QVector& deviceIds, const ItemAccessFilters accessFilter, const QSharedPointer& urlFilter) { auto listResult = m_pimpl->createList(deviceIds, accessFilter, urlFilter); const auto sep = QLatin1Char(' '); QTextStream out(stdout); QTextStream err(stderr); for (const auto& info: listResult.first) { out << QStringLiteral("%1").arg(info.accessible ? "+" : "!") << sep << QStringLiteral("device: %1").arg(info.deviceId) << sep << QStringLiteral("inode: %1").arg(info.inode) << sep << QStringLiteral("url: %1").arg(info.url) << endl; } const auto& summary = listResult.second; if (accessFilter & IgnoreAvailable) { err << i18n("Total: %1, Inaccessible: %2", summary.total, summary.total - (summary.ignored + summary.accessible)) << endl; } else { err << i18n("Total: %1, Ignored: %2, Accessible: %3, Inaccessible: %4", summary.total, summary.ignored, summary.accessible, summary.total - (summary.ignored + summary.accessible)) << endl; } } void DatabaseSanitizer::printDevices(const QVector& deviceIds, const ItemAccessFilters accessFilter) { auto infos = m_pimpl->createList(deviceIds, accessFilter, nullptr); QMap useCount; for (const auto& info : infos.first) { useCount[info.deviceId]++; } const auto sep = QLatin1Char(' '); QTextStream out(stdout); QTextStream err(stderr); int matchCount = 0; for (auto it = useCount.cbegin(); it != useCount.cend(); it++) { auto id = it.key(); auto info = m_pimpl->getStorageInfo(id); auto mounted = info.isValid(); if (info.fileSystemType() == QLatin1String("tmpfs")) { continue; } else if (mounted && (accessFilter & IgnoreMounted)) { continue; } else if (!mounted && (accessFilter & IgnoreUnmounted)) { continue; } matchCount++; // TODO coloring would be nice, but "...|grep '^!'" does not work with it. // out << QStringLiteral("%1").arg(dev.mounted ? "+" : "\033[1;31m!") // Can be done, see: https://code.qt.io/cgit/qt/qtbase.git/tree/src/corelib/global/qlogging.cpp#n263 out << QStringLiteral("%1").arg(mounted ? "+" : "!") << sep << QStringLiteral("device:%1").arg(id) << sep << QStringLiteral("[%1:%2]") .arg(major(id), 4, 16, QLatin1Char('0')) .arg(minor(id), 4, 16, QLatin1Char('0')) << sep << QStringLiteral("indexed-items:%1").arg(it.value()); if (mounted) { out << sep << QStringLiteral("fstype:%1").arg(info.fileSystemType().toPercentEncoding().constData()) << sep << QStringLiteral("device:%1").arg(info.device().constData()) << sep << QStringLiteral("path:%1").arg(info.rootPath()) ; } // TODO: see above // out << QStringLiteral("\033[0m") << endl; out << endl; } err << i18n("Found %1 matching in %2 devices", matchCount, useCount.size()) << endl; } void DatabaseSanitizer::removeStaleEntries(const QVector& deviceIds, const DatabaseSanitizer::ItemAccessFilters accessFilter, const bool dryRun, const QSharedPointer& urlFilter) { auto listResult = m_pimpl->createList(deviceIds, IgnoreAvailable, urlFilter); const auto ignoredDevices = m_pimpl->deviceFilters(listResult.first, accessFilter); const auto sep = QLatin1Char(' '); auto& summary = listResult.second; QTextStream out(stdout); QTextStream err(stderr); for (const auto& info: listResult.first) { if (ignoredDevices[info.deviceId] == true) { summary.ignored++; } else { if (info.isSymLink) { out << i18n("IgnoredSymbolicLink:"); summary.ignored++; } else { m_pimpl->removeDocument(info.id); out << i18n("Removing:"); } out << sep << QStringLiteral("device: %1").arg(info.deviceId) << sep << QStringLiteral("inode: %1").arg(info.inode) << sep << QStringLiteral("url: %1").arg(info.url) << endl; } } if (dryRun) { m_pimpl->abort(); } else { m_pimpl->commit(); } Q_ASSERT(summary.accessible == 0); err << i18nc("numbers", "Removed: %1, Total: %2, Ignored: %3", summary.total - summary.ignored, summary.total, summary.ignored) << endl; } diff --git a/src/engine/idutils.h b/src/engine/idutils.h index 6b3a0fc3..9a7e6ad1 100644 --- a/src/engine/idutils.h +++ b/src/engine/idutils.h @@ -1,127 +1,118 @@ /* * This file is part of the KDE Baloo Project * Copyright (C) 2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) version 3, or any * later version accepted by the membership of KDE e.V. (or its * successor approved by the membership of KDE e.V.), which shall * act as a proxy defined in Section 6 of version 3 of the license. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * */ #ifndef BALOO_ID_UTILS_ #define BALOO_ID_UTILS_ #include #include #ifdef Q_OS_WIN # include #endif namespace Baloo { inline quint64 devIdAndInodeToId(quint32 devId, quint32 inode) { quint32 arr[2]; arr[0] = devId; arr[1] = inode; return *(reinterpret_cast(arr)); } /** * Convert the QT_STATBUF into a 64 bit unique identifier for the file. * This identifier is combination of the device id and inode number. */ inline quint64 statBufToId(const QT_STATBUF& stBuf) { // We're losing 32 bits of info, so this could potentially break // on file systems with really large inode and device ids return devIdAndInodeToId(static_cast(stBuf.st_dev), static_cast(stBuf.st_ino)); } inline int filePathToStat(const QByteArray& filePath, QT_STATBUF& statBuf) { #ifndef Q_OS_WIN return QT_LSTAT(filePath.constData(), &statBuf); #else const int ret = QT_STAT(filePath.constData(), &statBuf); if (ret == 0 && QFileInfo(filePath).isSymLink()) { return QT_STAT(QFileInfo(filePath).symLinkTarget().toUtf8().constData(), &statBuf); } else { return ret; } #endif } -inline QT_STATBUF filePathToStat(const QByteArray& filePath) -{ - QT_STATBUF statBuf; - const int ret = filePathToStat(filePath, statBuf); - //TODO: if (ret != 0) qDebug() ? - Q_UNUSED(ret) - return statBuf; -} - inline quint64 filePathToId(const QByteArray& filePath) { QT_STATBUF statBuf; const int ret = filePathToStat(filePath, statBuf); return ret ? 0 : statBufToId(statBuf); } inline quint32 idToInode(quint64 id) { quint32* arr = reinterpret_cast(&id); return arr[1]; } inline quint32 idToDeviceId(quint64 id) { quint32* arr = reinterpret_cast(&id); return arr[0]; } template inline void sortedIdInsert(T& vec, const V& id) { /** * search with normal < */ const auto i(std::lower_bound(vec.begin(), vec.end(), id)); /** * end reached or element found smaller? * => insert new element! */ if (i == vec.end() || (id != *i)) vec.insert(i, id); } template inline void sortedIdRemove(T& vec, const V& id) { const int idx = vec.indexOf(id); if (idx >= 0) { vec.remove(idx); } } } #endif