diff --git a/src/engine/experimental/databasesanitizer.cpp b/src/engine/experimental/databasesanitizer.cpp index 4c7dbc42..5af5993f 100644 --- a/src/engine/experimental/databasesanitizer.cpp +++ b/src/engine/experimental/databasesanitizer.cpp @@ -1,252 +1,281 @@ /* * This file is part of the KDE Baloo project. * Copyright 2018 Michael Heidelbach * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of * the License or (at your option) version 3 or any later version * accepted by the membership of KDE e.V. (or its successor approved * by the membership of KDE e.V.), which shall act as a proxy * defined in Section 14 of version 3 of the license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "databasesanitizer.h" #include "documenturldb.h" #include "idutils.h" #include #include #include #include #include namespace Baloo { class DatabaseSanitizerImpl { public: DatabaseSanitizerImpl(const Database& db, Transaction::TransactionType type) : m_transaction(new Transaction(db, type)) { } public: /** * \brief Basic info about database items */ struct FileInfo { quint32 deviceId = 0; quint32 inode = 0; QString url = QString(); bool accessible = true; }; void printProgress(QTextStream& out, uint& cur, const uint max, const uint step) const { if (cur % step == 0) { out << QStringLiteral("%1%2\r").arg(100 * cur / max, 6).arg("%", -16); out.flush(); } cur++; } + /** + * Summary of createList() actions + */ + struct Summary { + quint64 total = 0; ///Count of all files + quint64 ignored = 0; ///Count of filtered out files + quint64 accessible = 0; ///Count of checked and accessible files + }; /** * Create a list of \a FileInfo items. * * \p deviceIDs filter by device ids. If the vector is empty no filtering is done * and every item is collected. * Positive numbers are including filters collecting only the mentioned device ids. * Negative numbers are excluding filters collecting everything but the mentioned device ids. * - * \p missingOnly Only inaccessible items are collected. + * \p accessFilter Flags to filter items by accessibility. * * \p urlFilter Filter result urls. Default is null = Collect everything. */ - QVector createList( + QPair, Summary> createList( const QVector& deviceIds, - const bool purging, + const DatabaseSanitizer::ItemAccessFilters accessFilter, const QSharedPointer& urlFilter ) const { Q_ASSERT(m_transaction); const auto docUrlDb = DocumentUrlDB(m_transaction->m_dbis.idTreeDbi, m_transaction->m_dbis.idFilenameDbi, m_transaction->m_txn); const auto map = docUrlDb.toTestMap(); const auto keys = map.keys(); QVector result; - result.reserve(keys.count()); + uint max = map.size(); uint i = 0; - uint max = keys.count(); + result.reserve(max); QVector includeIds; QVector excludeIds; for (qint64 deviceId : deviceIds) { if (deviceId > 0) { includeIds.append(deviceId); } else if (deviceId < 0) { excludeIds.append(-deviceId); } } - + Summary summary; + summary.total = max; + summary.ignored = max; QTextStream err(stderr); - for (quint64 id: keys) { + + for (auto it = map.constBegin(), end = map.constEnd(); it != end; it++) { printProgress(err, i, max, 100); + const quint64 id = it.key(); + const quint32 deviceId = idToDeviceId(id); + if (!includeIds.isEmpty() && !includeIds.contains(deviceId)) { + continue; + } else if (excludeIds.contains(deviceId)) { + continue; + } else if (urlFilter && !urlFilter->match(it.value()).hasMatch()) { + continue; + } - const quint32* arr = reinterpret_cast(&id); - const auto url = docUrlDb.get(id); FileInfo info; - info.deviceId = arr[0]; - info.inode = arr[1]; - info.url = url; - info.accessible = !url.isEmpty() && QFileInfo::exists(url); - if ((!includeIds.isEmpty() && !includeIds.contains(info.deviceId)) - || (!excludeIds.isEmpty() && excludeIds.contains(info.deviceId)) - || (purging && info.accessible) - || (urlFilter && !urlFilter->match(info.url).hasMatch()) - ) { + info.deviceId = deviceId; + info.inode = idToInode(id); + info.url = QFile::decodeName(it.value()); + info.accessible = !info.url.isEmpty() && QFileInfo::exists(info.url); + + if (info.accessible && (accessFilter & DatabaseSanitizer::IgnoreAvailable)) { + continue; + } else if (!info.accessible && (accessFilter & DatabaseSanitizer::IgnoreUnavailable)) { continue; } + result.append(info); + summary.ignored--; + if (info.accessible) { + summary.accessible++; + } } - return result; + return {result, summary}; } QStorageInfo getStorageInfo(const quint32 id) { static QMap storageInfos = []() { QMap result; const auto volumes = QStorageInfo::mountedVolumes(); for (const auto& vol : volumes) { const QByteArray rootPath = QFile::encodeName(vol.rootPath()); const auto fsinfo = filePathToStat(rootPath); const quint32 id = static_cast(fsinfo.st_dev); // qDebug() << vol; result[id] = vol; } return result; }(); QStorageInfo info = storageInfos.value(id); return info; } private: Transaction* m_transaction; }; } using namespace Baloo; DatabaseSanitizer::DatabaseSanitizer(const Database& db, Baloo::Transaction::TransactionType type) : m_pimpl(new DatabaseSanitizerImpl(db, type)) { } DatabaseSanitizer::DatabaseSanitizer(Database* db, Transaction::TransactionType type) : DatabaseSanitizer(*db, type) { } DatabaseSanitizer::~DatabaseSanitizer() { delete m_pimpl; m_pimpl = nullptr; } /** * Create a list of \a FileInfo items and print it to stdout. * * \p deviceIDs filter by device ids. If the vector is empty no filtering is done * and everything is printed. * Positive numbers are including filters printing only the mentioned device ids. * Negative numbers are excluding filters printing everything but the mentioned device ids. * * \p missingOnly Simulate purging operation. Only inaccessible items are printed. * * \p urlFilter Filter result urls. Default is null = Print everything. */ void DatabaseSanitizer::printList( const QVector& deviceIds, - const bool missingOnly, + const ItemAccessFilters accessFilter, const QSharedPointer& urlFilter) { - auto infos = m_pimpl->createList(deviceIds, missingOnly, urlFilter); + auto listResult = m_pimpl->createList(deviceIds, accessFilter, urlFilter); const auto sep = QLatin1Char(' '); QTextStream out(stdout); QTextStream err(stderr); - for (const auto& info: infos) { - if (!missingOnly) { - out << QStringLiteral("%1").arg(info.accessible ? "+" : "!") << sep; - } else if (!info.accessible) { - out << i18n("Missing:") << sep; - } else { - Q_ASSERT(false); - continue; - } - out << QStringLiteral("device: %1").arg(info.deviceId) + for (const auto& info: listResult.first) { + out << QStringLiteral("%1").arg(info.accessible ? "+" : "!") + << sep << QStringLiteral("device: %1").arg(info.deviceId) << sep << QStringLiteral("inode: %1").arg(info.inode) << sep << QStringLiteral("url: %1").arg(info.url) << endl; } - err << i18n("Found %1 matching items", infos.count()) << endl; + + const auto& summary = listResult.second; + if (accessFilter & IgnoreAvailable) { + err << i18n("Total: %1, Inaccessible: %2", + summary.total, + summary.total - (summary.ignored + summary.accessible)) << endl; + } else { + err << i18n("Total: %1, Ignored: %2, Accessible: %3, Inaccessible: %4", + summary.total, + summary.ignored, + summary.accessible, + summary.total - (summary.ignored + summary.accessible)) << endl; + } } -void DatabaseSanitizer::printDevices(const QVector& deviceIds, const bool missingOnly) +void DatabaseSanitizer::printDevices(const QVector& deviceIds, const ItemAccessFilters accessFilter) { - auto infos = m_pimpl->createList(deviceIds, false, nullptr); + auto infos = m_pimpl->createList(deviceIds, accessFilter, nullptr); QMap useCount; - for (const auto& info : infos) { + for (const auto& info : infos.first) { useCount[info.deviceId]++; } const auto sep = QLatin1Char(' '); QTextStream out(stdout); QTextStream err(stderr); int matchCount = 0; - for (auto it = useCount.cbegin(); it != useCount.cend(); it++) { auto id = it.key(); auto info = m_pimpl->getStorageInfo(id); auto mounted = info.isValid(); - - if (missingOnly && mounted) { + if (info.fileSystemType() == QLatin1String("tmpfs")) { + continue; + } else if (mounted && (accessFilter & IgnoreMounted)) { + continue; + } else if (!mounted && (accessFilter & IgnoreUnmounted)) { continue; } matchCount++; // TODO coloring would be nice, but "...|grep '^!'" does not work with it. // out << QStringLiteral("%1").arg(dev.mounted ? "+" : "\033[1;31m!") // Can be done, see: https://code.qt.io/cgit/qt/qtbase.git/tree/src/corelib/global/qlogging.cpp#n263 out << QStringLiteral("%1").arg(mounted ? "+" : "!") << sep << QStringLiteral("device:%1").arg(id) << sep << QStringLiteral("[%1:%2]") .arg(major(id), 4, 16, QLatin1Char('0')) .arg(minor(id), 4, 16, QLatin1Char('0')) << sep << QStringLiteral("indexed-items:%1").arg(it.value()); if (mounted) { out << sep << QStringLiteral("fstype:%1").arg(info.fileSystemType().toPercentEncoding().constData()) << sep << QStringLiteral("device:%1").arg(info.device().constData()) << sep << QStringLiteral("path:%1").arg(info.rootPath()) ; } // TODO: see above // out << QStringLiteral("\033[0m") << endl; out << endl; } err << i18n("Found %1 matching in %2 devices", matchCount, useCount.size()) << endl; } diff --git a/src/engine/experimental/databasesanitizer.h b/src/engine/experimental/databasesanitizer.h index 612018cd..7fe05740 100644 --- a/src/engine/experimental/databasesanitizer.h +++ b/src/engine/experimental/databasesanitizer.h @@ -1,71 +1,84 @@ /* * This file is part of the KDE Baloo project. * Copyright 2018 Michael Heidelbach * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of * the License or (at your option) version 3 or any later version * accepted by the membership of KDE e.V. (or its successor approved * by the membership of KDE e.V.), which shall act as a proxy * defined in Section 14 of version 3 of the license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef BALOODATABASESANITIZER_H #define BALOODATABASESANITIZER_H #include "transaction.h" namespace Baloo { class DatabaseSanitizerImpl; /** * \brief Provide methods to show database problems and * sanitize them. */ class BALOO_ENGINE_EXPORT DatabaseSanitizer { +public: + enum ItemAccessFilterFlags { + IgnoreNone = 0, + IgnoreAvailable = 1, + IgnoreUnavailable = 2, + IgnoreMounted = 8, + IgnoreUnmounted = 0x10, + }; + Q_DECLARE_FLAGS(ItemAccessFilters, ItemAccessFilterFlags) + public: DatabaseSanitizer(const Database& db, Transaction::TransactionType type); DatabaseSanitizer(Database* db, Transaction::TransactionType type); ~DatabaseSanitizer(); /** * Print database content to stdout * * \p deviceIDs filter by device ids. Negative numbers list everything but... * with empty \p deviceIDs(default) everything is printed. * * \p missingOnly Simulate purging operation. Only inaccessible items are printed. * * \p urlFilter Filter result urls. Default is null = Print everything. */ void printList(const QVector& deviceIds, - const bool missingOnly, - const QSharedPointer& urlFilter + const ItemAccessFilters accessFilter = IgnoreNone, + const QSharedPointer& urlFilter = nullptr ); /** * Print info about known devices to stdout * * \p deviceIDs filter by device ids. Negative numbers list everything but... * with empty \p deviceIDs(default) everything is printed. * - * \p missingOnly Only inaccessible items are printed. + * \p accessFilter filter by accessibility. E.g IgnoreAvailable|IgnoreUnmounted + * prints only mounted devices with inaccessible files. */ - void printDevices(const QVector& deviceIds, const bool missingOnly = false); + void printDevices(const QVector& deviceIds, const ItemAccessFilters accessFilter = IgnoreNone); private: DatabaseSanitizer(const DatabaseSanitizer& rhs) = delete; DatabaseSanitizerImpl* m_pimpl; }; +Q_DECLARE_OPERATORS_FOR_FLAGS(DatabaseSanitizer::ItemAccessFilters) + } #endif // BALOODATABASESANITIZER_H diff --git a/src/tools/experimental/baloodb/main.cpp b/src/tools/experimental/baloodb/main.cpp index d59db60b..fe5cc86c 100644 --- a/src/tools/experimental/baloodb/main.cpp +++ b/src/tools/experimental/baloodb/main.cpp @@ -1,265 +1,277 @@ /* * * Copyright 2018 Michael Heidelbach * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of * the License or (at your option) version 3 or any later version * accepted by the membership of KDE e.V. (or its successor approved * by the membership of KDE e.V.), which shall act as a proxy * defined in Section 14 of version 3 of the license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "global.h" #include "experimental/databasesanitizer.h" #include #include #include #include #include #include #include #include #include using namespace Baloo; struct Command { const QString name; const QString description; const QStringList args; const QStringList options; }; const auto options = QList{ QCommandLineOption{ QStringList{QStringLiteral("i"), QStringLiteral("device-id")}, i18n("Filter by device id." "\n0 (default) does not filter and everything is printed." "\nPositive numbers are including filters printing only the mentioned device id." "\nNegative numbers are excluding filters printing everything but the mentioned device id." "\nMay be given multiple times."), i18n("integer"), 0 }, QCommandLineOption{ QStringList{QStringLiteral("D"), QStringLiteral("dry-run")}, i18n("Print results of a prune operation, but do not change anything." "\nOnly applies to \"%1\" command", QStringLiteral("prune")) }, QCommandLineOption{ QStringList{QStringLiteral("m"), QStringLiteral("missing-only")}, i18n("List only inaccessible entries.\nOnly applies to \"%1\"", QStringLiteral("list")) + }, + QCommandLineOption{ + QStringList{QStringLiteral("u"), QStringLiteral("mounted-only")}, + i18n("Act only on item on mounted devices") } }; const auto commands = std::vector{ Command{ QStringLiteral("list"), i18n("List database contents. Use a regular expression as argument to filter output"), QStringList{ QStringLiteral("pattern") }, QStringList{ QStringLiteral("missing-only"), QStringLiteral("device-id") } }, /*TODO: Command{ QStringLiteral("check"), i18n("Check database contents. " "Beware this may take very long to execute"), QStringList{}, QStringList{} }, */ /*TODO: Command{ QStringLiteral("prune"), i18n("Remove stale database entries"), QStringList{ QStringLiteral("pattern") }, QStringList{ QStringLiteral("dry-run"), QStringLiteral("device-id") } }, */ Command{ QStringLiteral("devices"), i18n("List devices"), QStringList{}, - QStringList{QStringLiteral("missing-only")} + QStringList{QStringLiteral("missing-only"), QStringLiteral("mounted-only")} } }; const QStringList allowedCommands() { QStringList names; for (const auto& c : commands) { names.append(c.name); } return names; } const QStringList getOptions(const QString& name) { for (const auto& c : commands) { if (c.name == name) { return c.options; } } return QStringList(); } QString createDescription() { QStringList allowedcommands; for (const auto& c: commands) { auto options = getOptions(c.name); const QString optionStr = options.isEmpty() ? QString() : QStringLiteral(" [--%1]").arg(options.join(QLatin1Literal("] [--"))); QString argumentStr; if (!c.args.isEmpty() ) { argumentStr = QStringLiteral(" [%1]").arg(c.args.join(QStringLiteral("] ["))); } const QString commandStr = QStringLiteral("%1%2%3") .arg(c.name) .arg(optionStr) .arg(argumentStr); const QString str = QStringLiteral("%1 %2") .arg(commandStr, -48) .arg(c.description); allowedcommands.append(str); } const QString allCommandsStr = allowedcommands.join(QStringLiteral("\n ")); return i18n("\n\nCommands:\n %1", allCommandsStr); } int main(int argc, char* argv[]) { QCoreApplication app(argc, argv); KAboutData aboutData(QStringLiteral("baloodb"), i18n("Baloo Database Sanitizer"), PROJECT_VERSION, i18n("The Baloo Database Lister & Sanitizer"), KAboutLicense::GPL, i18n("(c) 2018, Michael Heidelbach")); aboutData.addAuthor(i18n("Michael Heidelbach"), i18n("Maintainer"), QStringLiteral("ottwolt@gmail.com")); KAboutData::setApplicationData(aboutData); QCommandLineParser parser; parser.addOptions(options); parser.addPositionalArgument(QStringLiteral("command"), i18n("The command to execute"), allowedCommands().join(QStringLiteral("|")) ); parser.addPositionalArgument(QStringLiteral("pattern"), i18nc("Command", "A regular expression applied to the URL of database items" "\nExample: %1" , "baloodb list '^/media/videos/series'" ) ); const QString warnExperiment = QStringLiteral( "===\nPlease note: This is an experimental tool. Command line switches or their meaning may change.\n==="); parser.setApplicationDescription(warnExperiment + createDescription()); parser.addVersionOption(); parser.addHelpOption(); parser.process(app); if (parser.positionalArguments().isEmpty()) { qDebug() << "No command"; parser.showHelp(1); } auto args = parser.positionalArguments(); auto command = args.at(0); args.removeFirst(); if(!allowedCommands().contains(command)) { qDebug() << "Unknown command" << command; parser.showHelp(1); } const auto optNames = parser.optionNames(); const auto allowedOptions = getOptions(command); QVector deviceIds; for (const auto& dev : parser.values(QStringLiteral("device-id"))) { deviceIds.append(dev.toInt()); } - const bool missingOnly = parser.isSet(QStringLiteral("missing-only")); + const DatabaseSanitizer::ItemAccessFilters accessFilter = ( + parser.isSet(QStringLiteral("missing-only")) + ? DatabaseSanitizer::IgnoreAvailable + : DatabaseSanitizer::IgnoreNone + ) | ( + parser.isSet(QStringLiteral("mounted-only")) + ? DatabaseSanitizer::IgnoreUnmounted + : DatabaseSanitizer::IgnoreNone + ); const QString pattern = args.isEmpty() ? QString() : args.at(0); const QSharedPointer urlFilter(pattern.isEmpty() ? nullptr : new QRegularExpression{pattern}); auto db = globalDatabaseInstance(); QTextStream err(stderr); QElapsedTimer timer; timer.start(); if (command == QStringLiteral("list")) { if (!db->open(Database::ReadOnlyDatabase)) { err << i18n("Baloo Index could not be opened") << endl; return 1; } DatabaseSanitizer san(db, Transaction::ReadOnly); err << i18n("Listing database contents...") << endl; - san.printList(deviceIds, missingOnly, urlFilter); + san.printList(deviceIds, accessFilter, urlFilter); } else if (command == QStringLiteral("devices")) { if (!db->open(Database::ReadOnlyDatabase)) { err << i18n("Baloo Index could not be opened") << endl; return 1; } DatabaseSanitizer san(db, Transaction::ReadOnly); err << i18n("Listing database contents...") << endl; - san.printDevices(deviceIds, missingOnly); + san.printDevices(deviceIds, accessFilter); } else if (command == QStringLiteral("clean")) { /* TODO: add prune command */ parser.showHelp(1); } else if (command == QStringLiteral("check")) { parser.showHelp(1); /* TODO: After check methods are improved Database *db = globalDatabaseInstance(); if (!db->open(Database::ReadOnlyDatabase)) { err << i18n("Baloo Index could not be opened") << endl; return 1; } Transaction tr(db, Transaction::ReadOnly); err << i18n("Checking file paths ... ") << endl; tr.checkFsTree(); err << i18n("Checking postings ... ") << endl; tr.checkTermsDbinPostingDb(); err << i18n("Checking terms ... ") << endl; tr.checkPostingDbinTermsDb(); */ } err << i18n("Elapsed: %1 secs", timer.nsecsElapsed() / 1000000000.0) << endl; return 0; }