diff --git a/src/engine/experimental/databasesanitizer.h b/src/engine/experimental/databasesanitizer.h --- a/src/engine/experimental/databasesanitizer.h +++ b/src/engine/experimental/databasesanitizer.h @@ -40,6 +40,8 @@ IgnoreUnavailable = 2, IgnoreMounted = 8, IgnoreUnmounted = 0x10, + FileMask = IgnoreAvailable | IgnoreAvailable, + DeviceMask = IgnoreMounted | IgnoreUnmounted, }; Q_DECLARE_FLAGS(ItemAccessFilters, ItemAccessFilterFlags) diff --git a/src/engine/experimental/databasesanitizer.cpp b/src/engine/experimental/databasesanitizer.cpp --- a/src/engine/experimental/databasesanitizer.cpp +++ b/src/engine/experimental/databasesanitizer.cpp @@ -169,44 +169,78 @@ return info; } + /** + * \return True if device is mounted and not obscured + */ + bool isMounted(const quint32 id) { + const QStorageInfo& info = getStorageInfo(id); + return info.isValid() && !isObscured(info); + } + + /** + * Due to the volatility of device ids, an id known by baloo may + * appear as mounted, but is not what baloo expects. + * For example at indexing time 43 was the id of a smb share, but + * at runtime 43 is the id of /run/media/ when other users are + * logged in. The latter have a type of 'tmpfs' and should be ignored. + * \return Returns true when the device should be ignored + */ + bool isObscured(const QStorageInfo& info) + { + return info.fileSystemType() == QLatin1String("tmpfs"); + } + + Summary removeIgnored(QVector& infos, const DatabaseSanitizer::ItemAccessFilters accessFilter) + { + const auto ignoredDevices = deviceFilters(infos, accessFilter); + auto tail = infos.end(); + Summary summary; + for (const quint32 deviceId : ignoredDevices) { + tail = std::remove_if(infos.begin(), tail, + [deviceId, &summary] (const FileInfo& info) { + if (deviceId != info.deviceId) { + return false; + } + summary.accessible += info.accessible ? 1 : 0; + return true; + }); + } + summary.ignored = infos.end() - tail; + infos.erase(tail, infos.end()); + return summary; + } - QMap deviceFilters(QVector& infos, const DatabaseSanitizer::ItemAccessFilters accessFilter) + QVector deviceFilters(QVector& infos, const DatabaseSanitizer::ItemAccessFilters accessFilter) { - QMap result; + QSet deviceIds; for (const auto& info : infos) { - result[info.deviceId] = false; + deviceIds.insert(info.deviceId); } - for (auto it = result.begin(), end = result.end(); it != end; it++) { - const auto storageInfo = getStorageInfo(it.key()); - it.value() = isIgnored(storageInfo, accessFilter); + QVector result; + for (const quint32 id : deviceIds) { + const auto storageInfo = getStorageInfo(id); + if (isIgnored(storageInfo, accessFilter)) { + result.append(id); + } } return result; } bool isIgnored(const QStorageInfo& storageInfo, const DatabaseSanitizer::ItemAccessFilters accessFilter) { - const bool mounted = storageInfo.isValid(); + const bool mounted = storageInfo.isValid() && !isObscured(storageInfo); if (mounted && (accessFilter & DatabaseSanitizer::IgnoreMounted)) { return true; } else if (!mounted && (accessFilter & DatabaseSanitizer::IgnoreUnmounted)) { return true; } - if (storageInfo.fileSystemType() == QLatin1String("tmpfs")) { - // Due to the volatility of device ids, an id known by baloo may - // appear as mounted, but is not what baloo expects. - // For example at indexing time 43 was the id of a smb share, but - // at runtime 43 is the id of /run/media/ when other users are - // logged in. The latter have a type of 'tmpfs' and should be ignored. - return true; - } - return false; } - void removeDocument(const quint64 id) { - m_transaction->removeDocument(id); + void removeRecursively(const quint64 id) { + m_transaction->removeRecursively(id); } void commit() { @@ -260,21 +294,26 @@ const QSharedPointer& urlFilter) { auto listResult = m_pimpl->createList(deviceIds, accessFilter, urlFilter); + auto& summary = listResult.second; + auto diff = m_pimpl->removeIgnored(listResult.first, accessFilter); + summary.ignored += diff.ignored; + summary.accessible -= diff.accessible; const auto sep = QLatin1Char(' '); QTextStream out(stdout); QTextStream err(stderr); for (const auto& info: listResult.first) { - out << QStringLiteral("%1").arg(info.accessible ? "+" : "!") - << sep << QStringLiteral("device: %1").arg(info.deviceId) - << sep << QStringLiteral("inode: %1").arg(info.inode) - << sep << QStringLiteral("url: %1").arg(info.url) - << endl; + const bool accessible = info.accessible && !m_pimpl->isMounted(info.deviceId); + out << QStringLiteral("%1").arg(accessible ? "+" : "!") + << sep << QStringLiteral("device: %1").arg(info.deviceId) + << sep << QStringLiteral("inode: %1").arg(info.inode) + << sep << QStringLiteral("url: %1").arg(info.url) + << endl; } - const auto& summary = listResult.second; if (accessFilter & IgnoreAvailable) { - err << i18n("Total: %1, Inaccessible: %2", + err << i18n("Total: %1, Ignored: %2, Inaccessible: %3", summary.total, + summary.ignored, summary.total - (summary.ignored + summary.accessible)) << endl; } else { err << i18n("Total: %1, Ignored: %2, Accessible: %3, Inaccessible: %4", @@ -298,19 +337,21 @@ const auto sep = QLatin1Char(' '); QTextStream out(stdout); QTextStream err(stderr); - int matchCount = 0; + DatabaseSanitizerImpl::Summary summary; + summary.total = useCount.size(); + for (auto it = useCount.cbegin(); it != useCount.cend(); it++) { auto id = it.key(); auto info = m_pimpl->getStorageInfo(id); - auto mounted = info.isValid(); - if (info.fileSystemType() == QLatin1String("tmpfs")) { - continue; - } else if (mounted && (accessFilter & IgnoreMounted)) { - continue; - } else if (!mounted && (accessFilter & IgnoreUnmounted)) { + if (m_pimpl->isIgnored(info, accessFilter)) { + summary.ignored++; continue; } - matchCount++; + + auto mounted = info.isValid() && !m_pimpl->isObscured(info); + if (mounted) { + summary.accessible++; + } // TODO coloring would be nice, but "...|grep '^!'" does not work with it. // out << QStringLiteral("%1").arg(dev.mounted ? "+" : "\033[1;31m!") // Can be done, see: https://code.qt.io/cgit/qt/qtbase.git/tree/src/corelib/global/qlogging.cpp#n263 @@ -322,8 +363,7 @@ << sep << QStringLiteral("indexed-items:%1").arg(it.value()); if (mounted) { - out - << sep << QStringLiteral("fstype:%1").arg(info.fileSystemType().toPercentEncoding().constData()) + out << sep << QStringLiteral("fstype:%1").arg(info.fileSystemType().toPercentEncoding().constData()) << sep << QStringLiteral("device:%1").arg(info.device().constData()) << sep << QStringLiteral("path:%1").arg(info.rootPath()) ; @@ -333,48 +373,63 @@ out << endl; } - err << i18n("Found %1 matching in %2 devices", matchCount, useCount.size()) << endl; + err << i18n("Total: %1, Ignored: %2; Mounted: %3, Not mounted: %4", + summary.total, + summary.ignored, + summary.accessible, + summary.total - (summary.ignored + summary.accessible)) + << endl; } void DatabaseSanitizer::removeStaleEntries(const QVector& deviceIds, const DatabaseSanitizer::ItemAccessFilters accessFilter, const bool dryRun, const QSharedPointer& urlFilter) { - auto listResult = m_pimpl->createList(deviceIds, IgnoreAvailable, urlFilter); + // To capture files with wrong device ids, we must not filter by + // accessability, because those are false positives. + auto listResult = m_pimpl->createList(deviceIds, IgnoreNone, urlFilter); + auto& summary = listResult.second; - const auto ignoredDevices = m_pimpl->deviceFilters(listResult.first, accessFilter); + // Same here: Filter only by mount state of drives + auto diff = m_pimpl->removeIgnored(listResult.first, accessFilter & DeviceMask); + summary.ignored += diff.ignored; + summary.accessible -= diff.accessible; const auto sep = QLatin1Char(' '); - auto& summary = listResult.second; QTextStream out(stdout); QTextStream err(stderr); for (const auto& info: listResult.first) { - if (ignoredDevices[info.deviceId] == true) { + if (info.accessible && m_pimpl->isMounted(info.deviceId)) { summary.ignored++; - } else { - if (info.isSymLink) { - out << i18n("IgnoredSymbolicLink:"); - summary.ignored++; - } else { - m_pimpl->removeDocument(info.id); - out << i18n("Removing:"); - } - out << sep << QStringLiteral("device: %1").arg(info.deviceId) - << sep << QStringLiteral("inode: %1").arg(info.inode) - << sep << QStringLiteral("url: %1").arg(info.url) - << endl; + summary.accessible--; + continue; } + if (info.isSymLink) { + // Ignore symlinks! It's impossible obtain the device id of the target. + out << i18n("IgnoredSymbolicLink:"); + summary.ignored++; + summary.accessible--; + continue; + } + + m_pimpl->removeRecursively(info.id); + out << i18n("Removing:"); + out << sep << QStringLiteral("device: %1").arg(info.deviceId) + << sep << QStringLiteral("inode: %1").arg(info.inode) + << sep << QStringLiteral("url: %1").arg(info.url) + << endl; } if (dryRun) { m_pimpl->abort(); } else { m_pimpl->commit(); } - Q_ASSERT(summary.accessible == 0); - err << i18nc("numbers", "Removed: %1, Total: %2, Ignored: %3", + err << i18nc("numbers", "Removed: %1 (Invalid device ids: %2), Total: %3, Ignored: %4, ", summary.total - summary.ignored, + summary.accessible, summary.total, - summary.ignored) + summary.ignored + ) << endl; } diff --git a/src/tools/experimental/baloodb/main.cpp b/src/tools/experimental/baloodb/main.cpp --- a/src/tools/experimental/baloodb/main.cpp +++ b/src/tools/experimental/baloodb/main.cpp @@ -77,14 +77,18 @@ }, QStringList{ QStringLiteral("missing-only"), + QStringLiteral("mounted-only"), QStringLiteral("device-id") } }, Command{ QStringLiteral("devices"), i18n("List devices"), QStringList{}, - QStringList{QStringLiteral("missing-only")} + QStringList{ + QStringLiteral("missing-only"), + QStringLiteral("mounted-only") + } }, /*TODO: Command{ @@ -133,7 +137,7 @@ auto options = getOptions(c.name); const QString optionStr = options.isEmpty() ? QString() - : QStringLiteral(" [--%1]").arg(options.join(QLatin1Literal("] [--"))); + : QStringLiteral(" [--%1]").arg(options.join(QLatin1Literal(", --"))); QString argumentStr; if (!c.args.isEmpty() ) { @@ -146,7 +150,7 @@ .arg(argumentStr); const QString str = QStringLiteral("%1 %2") - .arg(commandStr, -58) + .arg(commandStr, -62) .arg(c.description); allowedcommands.append(str);