Changeset View
Changeset View
Standalone View
Standalone View
src/engine/experimental/databasesanitizer.cpp
Show First 20 Lines • Show All 151 Lines • ▼ Show 20 Line(s) | 43 | public: | |||
---|---|---|---|---|---|
152 | } | 152 | } | ||
153 | 153 | | |||
154 | QStorageInfo getStorageInfo(const quint32 id) { | 154 | QStorageInfo getStorageInfo(const quint32 id) { | ||
155 | static QMap<quint32, QStorageInfo> storageInfos = []() { | 155 | static QMap<quint32, QStorageInfo> storageInfos = []() { | ||
156 | QMap<quint32, QStorageInfo> result; | 156 | QMap<quint32, QStorageInfo> result; | ||
157 | const auto volumes = QStorageInfo::mountedVolumes(); | 157 | const auto volumes = QStorageInfo::mountedVolumes(); | ||
158 | for (const auto& vol : volumes) { | 158 | for (const auto& vol : volumes) { | ||
159 | const QByteArray rootPath = QFile::encodeName(vol.rootPath()); | 159 | const QByteArray rootPath = QFile::encodeName(vol.rootPath()); | ||
160 | const auto fsinfo = filePathToStat(rootPath); | 160 | const auto fsinfo = filePathToStat(rootPath); | ||
michaelh: ```
$ baloodb list --mounted-only Einstein
Listing database contents...
! device: 43 inode: 319… | |||||
michaelh: On the wrong track
| |||||
161 | const quint32 id = static_cast<quint32>(fsinfo.st_dev); | 161 | const quint32 id = static_cast<quint32>(fsinfo.st_dev); | ||
162 | // qDebug() << vol; | 162 | // qDebug() << vol; | ||
163 | result[id] = vol; | 163 | result[id] = vol; | ||
164 | } | 164 | } | ||
165 | return result; | 165 | return result; | ||
166 | }(); | 166 | }(); | ||
167 | 167 | | |||
168 | QStorageInfo info = storageInfos.value(id); | 168 | QStorageInfo info = storageInfos.value(id); | ||
169 | return info; | 169 | return info; | ||
170 | } | 170 | } | ||
171 | 171 | | |||
172 | /** | ||||
173 | * \return True if device is mounted and not obscured | ||||
174 | */ | ||||
175 | bool isMounted(const quint32 id) { | ||||
176 | const QStorageInfo& info = getStorageInfo(id); | ||||
177 | return info.isValid() && !isObscured(info); | ||||
178 | } | ||||
179 | | ||||
bruns: missing space | |||||
bruns: `isMounted(id)` can be implemented as `getStorageInfo(id).isValid()` | |||||
180 | /** | ||||
michaelh: How can this work? `quint64 + bool` | |||||
bool is automatically cast to int, false -> 0, true -> 1. bruns: bool is automatically cast to int, false -> 0, true -> 1.
Although probably better to do it… | |||||
181 | * Due to the volatility of device ids, an id known by baloo may | ||||
182 | * appear as mounted, but is not what baloo expects. | ||||
183 | * For example at indexing time 43 was the id of a smb share, but | ||||
bruns: unused, please remove | |||||
184 | * at runtime 43 is the id of /run/media/<uid> when other users are | ||||
185 | * logged in. The latter have a type of 'tmpfs' and should be ignored. | ||||
186 | * \return Returns true when the device should be ignored | ||||
187 | */ | ||||
188 | bool isObscured(const QStorageInfo& info) | ||||
189 | { | ||||
190 | return info.fileSystemType() == QLatin1String("tmpfs"); | ||||
191 | } | ||||
172 | 192 | | |||
173 | QMap<quint32, bool> deviceFilters(QVector<FileInfo>& infos, const DatabaseSanitizer::ItemAccessFilters accessFilter) | 193 | Summary removeIgnored(QVector<FileInfo>& infos, const DatabaseSanitizer::ItemAccessFilters accessFilter) | ||
174 | { | 194 | { | ||
175 | QMap<quint32, bool> result; | 195 | const auto ignoredDevices = deviceFilters(infos, accessFilter); | ||
196 | auto tail = infos.end(); | ||||
197 | Summary summary; | ||||
198 | for (const quint32 deviceId : ignoredDevices) { | ||||
199 | tail = std::remove_if(infos.begin(), tail, | ||||
200 | [deviceId, &summary] (const FileInfo& info) { | ||||
201 | if (deviceId != info.deviceId) { | ||||
202 | return false; | ||||
203 | } | ||||
204 | summary.accessible += info.accessible ? 1 : 0; | ||||
205 | return true; | ||||
206 | }); | ||||
207 | } | ||||
208 | summary.ignored = infos.end() - tail; | ||||
209 | infos.erase(tail, infos.end()); | ||||
210 | return summary; | ||||
211 | } | ||||
212 | | ||||
213 | QVector<quint32> deviceFilters(QVector<FileInfo>& infos, const DatabaseSanitizer::ItemAccessFilters accessFilter) | ||||
214 | { | ||||
215 | QSet<quint32> deviceIds; | ||||
I can not see what this function is useful for. Just inline the FileInfo -> DeviceIds reduction here, and ... bruns: I can not see what this function is useful for. Just inline the FileInfo -> DeviceIds reduction… | |||||
176 | for (const auto& info : infos) { | 216 | for (const auto& info : infos) { | ||
177 | result[info.deviceId] = false; | 217 | deviceIds.insert(info.deviceId); | ||
178 | } | 218 | } | ||
... skip the deviceId if it should be ignored: const auto storageInfo = getStorageInfo(id); if (isDeviceIgnored(storageInfo, accessFilter)) { continue; } bruns: ... skip the deviceId if it should be ignored:
```
const auto storageInfo =… | |||||
179 | 219 | | |||
180 | for (auto it = result.begin(), end = result.end(); it != end; it++) { | 220 | QVector<quint32> result; | ||
181 | const auto storageInfo = getStorageInfo(it.key()); | 221 | for (const quint32 id : deviceIds) { | ||
if (info.deviceId != deviceId) { return false; } summary.accessible += info.accessible ? 1 : 0; return true; bruns: ```if (info.deviceId != deviceId) {
return false;
}
summary.accessible += info.accessible ? 1… | |||||
182 | it.value() = isIgnored(storageInfo, accessFilter); | 222 | const auto storageInfo = getStorageInfo(id); | ||
michaelh: ~3 hrs for this, phew. | |||||
223 | if (isIgnored(storageInfo, accessFilter)) { | ||||
224 | result.append(id); | ||||
225 | } | ||||
183 | } | 226 | } | ||
184 | return result; | 227 | return result; | ||
185 | } | 228 | } | ||
186 | 229 | | |||
187 | bool isIgnored(const QStorageInfo& storageInfo, const DatabaseSanitizer::ItemAccessFilters accessFilter) | 230 | bool isIgnored(const QStorageInfo& storageInfo, const DatabaseSanitizer::ItemAccessFilters accessFilter) | ||
188 | { | 231 | { | ||
189 | const bool mounted = storageInfo.isValid(); | 232 | const bool mounted = storageInfo.isValid() && !isObscured(storageInfo); | ||
190 | if (mounted && (accessFilter & DatabaseSanitizer::IgnoreMounted)) { | 233 | if (mounted && (accessFilter & DatabaseSanitizer::IgnoreMounted)) { | ||
191 | return true; | 234 | return true; | ||
192 | } else if (!mounted && (accessFilter & DatabaseSanitizer::IgnoreUnmounted)) { | 235 | } else if (!mounted && (accessFilter & DatabaseSanitizer::IgnoreUnmounted)) { | ||
193 | return true; | 236 | return true; | ||
194 | } | 237 | } | ||
195 | 238 | | |||
196 | if (storageInfo.fileSystemType() == QLatin1String("tmpfs")) { | | |||
197 | // Due to the volatility of device ids, an id known by baloo may | | |||
198 | // appear as mounted, but is not what baloo expects. | | |||
199 | // For example at indexing time 43 was the id of a smb share, but | | |||
200 | // at runtime 43 is the id of /run/media/<uid> when other users are | | |||
201 | // logged in. The latter have a type of 'tmpfs' and should be ignored. | | |||
202 | return true; | | |||
203 | } | | |||
204 | | ||||
205 | return false; | 239 | return false; | ||
206 | } | 240 | } | ||
207 | 241 | | |||
208 | void removeDocument(const quint64 id) { | 242 | void removeRecursively(const quint64 id) { | ||
209 | m_transaction->removeDocument(id); | 243 | m_transaction->removeRecursively(id); | ||
210 | } | 244 | } | ||
211 | 245 | | |||
212 | void commit() { | 246 | void commit() { | ||
213 | m_transaction->commit(); | 247 | m_transaction->commit(); | ||
214 | } | 248 | } | ||
215 | 249 | | |||
216 | void abort() { | 250 | void abort() { | ||
217 | m_transaction->abort(); | 251 | m_transaction->abort(); | ||
Show All 37 Lines | |||||
255 | * \p urlFilter Filter result urls. Default is null = Print everything. | 289 | * \p urlFilter Filter result urls. Default is null = Print everything. | ||
256 | */ | 290 | */ | ||
257 | void DatabaseSanitizer::printList( | 291 | void DatabaseSanitizer::printList( | ||
258 | const QVector<qint64>& deviceIds, | 292 | const QVector<qint64>& deviceIds, | ||
259 | const ItemAccessFilters accessFilter, | 293 | const ItemAccessFilters accessFilter, | ||
260 | const QSharedPointer<QRegularExpression>& urlFilter) | 294 | const QSharedPointer<QRegularExpression>& urlFilter) | ||
261 | { | 295 | { | ||
262 | auto listResult = m_pimpl->createList(deviceIds, accessFilter, urlFilter); | 296 | auto listResult = m_pimpl->createList(deviceIds, accessFilter, urlFilter); | ||
297 | auto& summary = listResult.second; | ||||
298 | auto diff = m_pimpl->removeIgnored(listResult.first, accessFilter); | ||||
299 | summary.ignored += diff.ignored; | ||||
300 | summary.accessible -= diff.accessible; | ||||
263 | const auto sep = QLatin1Char(' '); | 301 | const auto sep = QLatin1Char(' '); | ||
264 | QTextStream out(stdout); | 302 | QTextStream out(stdout); | ||
265 | QTextStream err(stderr); | 303 | QTextStream err(stderr); | ||
266 | for (const auto& info: listResult.first) { | 304 | for (const auto& info: listResult.first) { | ||
267 | out << QStringLiteral("%1").arg(info.accessible ? "+" : "!") | 305 | const bool accessible = info.accessible && !m_pimpl->isMounted(info.deviceId); | ||
If ignoredDevices is a Set/List, you can do a filter pass over the fileList first. auto& fileList = listResult.first; auto tail = fileList.end(); for (auto deviceId : ignoredDevices) { tail = std::remove_if(fileList.begin(), tail, [deviceId] (const FileInfo& info) { return info.id == deviceId; }); } summary.ignored += fileList.end() - tail; std::erase(tail, fileList.end()); bruns: If ignoredDevices is a Set/List, you can do a filter pass over the fileList first.
```
auto&… | |||||
michaelh: Cool! | |||||
306 | out << QStringLiteral("%1").arg(accessible ? "+" : "!") | ||||
268 | << sep << QStringLiteral("device: %1").arg(info.deviceId) | 307 | << sep << QStringLiteral("device: %1").arg(info.deviceId) | ||
269 | << sep << QStringLiteral("inode: %1").arg(info.inode) | 308 | << sep << QStringLiteral("inode: %1").arg(info.inode) | ||
270 | << sep << QStringLiteral("url: %1").arg(info.url) | 309 | << sep << QStringLiteral("url: %1").arg(info.url) | ||
271 | << endl; | 310 | << endl; | ||
272 | } | 311 | } | ||
273 | 312 | | |||
274 | const auto& summary = listResult.second; | | |||
275 | if (accessFilter & IgnoreAvailable) { | 313 | if (accessFilter & IgnoreAvailable) { | ||
276 | err << i18n("Total: %1, Inaccessible: %2", | 314 | err << i18n("Total: %1, Ignored: %2, Inaccessible: %3", | ||
277 | summary.total, | 315 | summary.total, | ||
316 | summary.ignored, | ||||
278 | summary.total - (summary.ignored + summary.accessible)) << endl; | 317 | summary.total - (summary.ignored + summary.accessible)) << endl; | ||
279 | } else { | 318 | } else { | ||
280 | err << i18n("Total: %1, Ignored: %2, Accessible: %3, Inaccessible: %4", | 319 | err << i18n("Total: %1, Ignored: %2, Accessible: %3, Inaccessible: %4", | ||
281 | summary.total, | 320 | summary.total, | ||
282 | summary.ignored, | 321 | summary.ignored, | ||
283 | summary.accessible, | 322 | summary.accessible, | ||
284 | summary.total - (summary.ignored + summary.accessible)) << endl; | 323 | summary.total - (summary.ignored + summary.accessible)) << endl; | ||
285 | } | 324 | } | ||
286 | 325 | | |||
287 | } | 326 | } | ||
288 | 327 | | |||
289 | void DatabaseSanitizer::printDevices(const QVector<qint64>& deviceIds, const ItemAccessFilters accessFilter) | 328 | void DatabaseSanitizer::printDevices(const QVector<qint64>& deviceIds, const ItemAccessFilters accessFilter) | ||
290 | { | 329 | { | ||
291 | auto infos = m_pimpl->createList(deviceIds, accessFilter, nullptr); | 330 | auto infos = m_pimpl->createList(deviceIds, accessFilter, nullptr); | ||
292 | 331 | | |||
293 | QMap<quint32, quint64> useCount; | 332 | QMap<quint32, quint64> useCount; | ||
294 | for (const auto& info : infos.first) { | 333 | for (const auto& info : infos.first) { | ||
295 | useCount[info.deviceId]++; | 334 | useCount[info.deviceId]++; | ||
296 | } | 335 | } | ||
297 | 336 | | |||
298 | const auto sep = QLatin1Char(' '); | 337 | const auto sep = QLatin1Char(' '); | ||
299 | QTextStream out(stdout); | 338 | QTextStream out(stdout); | ||
300 | QTextStream err(stderr); | 339 | QTextStream err(stderr); | ||
301 | int matchCount = 0; | 340 | DatabaseSanitizerImpl::Summary summary; | ||
341 | summary.total = useCount.size(); | ||||
342 | | ||||
302 | for (auto it = useCount.cbegin(); it != useCount.cend(); it++) { | 343 | for (auto it = useCount.cbegin(); it != useCount.cend(); it++) { | ||
303 | auto id = it.key(); | 344 | auto id = it.key(); | ||
304 | auto info = m_pimpl->getStorageInfo(id); | 345 | auto info = m_pimpl->getStorageInfo(id); | ||
305 | auto mounted = info.isValid(); | 346 | if (m_pimpl->isIgnored(info, accessFilter)) { | ||
306 | if (info.fileSystemType() == QLatin1String("tmpfs")) { | 347 | summary.ignored++; | ||
307 | continue; | | |||
308 | } else if (mounted && (accessFilter & IgnoreMounted)) { | | |||
309 | continue; | | |||
310 | } else if (!mounted && (accessFilter & IgnoreUnmounted)) { | | |||
311 | continue; | 348 | continue; | ||
312 | } | 349 | } | ||
313 | matchCount++; | 350 | | ||
351 | auto mounted = info.isValid() && !m_pimpl->isObscured(info); | ||||
352 | if (mounted) { | ||||
353 | summary.accessible++; | ||||
354 | } | ||||
314 | // TODO coloring would be nice, but "...|grep '^!'" does not work with it. | 355 | // TODO coloring would be nice, but "...|grep '^!'" does not work with it. | ||
315 | // out << QStringLiteral("%1").arg(dev.mounted ? "+" : "\033[1;31m!") | 356 | // out << QStringLiteral("%1").arg(dev.mounted ? "+" : "\033[1;31m!") | ||
316 | // Can be done, see: https://code.qt.io/cgit/qt/qtbase.git/tree/src/corelib/global/qlogging.cpp#n263 | 357 | // Can be done, see: https://code.qt.io/cgit/qt/qtbase.git/tree/src/corelib/global/qlogging.cpp#n263 | ||
317 | out << QStringLiteral("%1").arg(mounted ? "+" : "!") | 358 | out << QStringLiteral("%1").arg(mounted ? "+" : "!") | ||
318 | << sep << QStringLiteral("device:%1").arg(id) | 359 | << sep << QStringLiteral("device:%1").arg(id) | ||
319 | << sep << QStringLiteral("[%1:%2]") | 360 | << sep << QStringLiteral("[%1:%2]") | ||
320 | .arg(major(id), 4, 16, QLatin1Char('0')) | 361 | .arg(major(id), 4, 16, QLatin1Char('0')) | ||
321 | .arg(minor(id), 4, 16, QLatin1Char('0')) | 362 | .arg(minor(id), 4, 16, QLatin1Char('0')) | ||
322 | << sep << QStringLiteral("indexed-items:%1").arg(it.value()); | 363 | << sep << QStringLiteral("indexed-items:%1").arg(it.value()); | ||
323 | 364 | | |||
324 | if (mounted) { | 365 | if (mounted) { | ||
325 | out | 366 | out << sep << QStringLiteral("fstype:%1").arg(info.fileSystemType().toPercentEncoding().constData()) | ||
326 | << sep << QStringLiteral("fstype:%1").arg(info.fileSystemType().toPercentEncoding().constData()) | | |||
327 | << sep << QStringLiteral("device:%1").arg(info.device().constData()) | 367 | << sep << QStringLiteral("device:%1").arg(info.device().constData()) | ||
328 | << sep << QStringLiteral("path:%1").arg(info.rootPath()) | 368 | << sep << QStringLiteral("path:%1").arg(info.rootPath()) | ||
329 | ; | 369 | ; | ||
330 | } | 370 | } | ||
331 | // TODO: see above | 371 | // TODO: see above | ||
332 | // out << QStringLiteral("\033[0m") << endl; | 372 | // out << QStringLiteral("\033[0m") << endl; | ||
333 | out << endl; | 373 | out << endl; | ||
334 | } | 374 | } | ||
335 | 375 | | |||
336 | err << i18n("Found %1 matching in %2 devices", matchCount, useCount.size()) << endl; | 376 | err << i18n("Total: %1, Ignored: %2; Mounted: %3, Not mounted: %4", | ||
377 | summary.total, | ||||
378 | summary.ignored, | ||||
379 | summary.accessible, | ||||
380 | summary.total - (summary.ignored + summary.accessible)) | ||||
381 | << endl; | ||||
337 | } | 382 | } | ||
338 | 383 | | |||
339 | void DatabaseSanitizer::removeStaleEntries(const QVector<qint64>& deviceIds, | 384 | void DatabaseSanitizer::removeStaleEntries(const QVector<qint64>& deviceIds, | ||
340 | const DatabaseSanitizer::ItemAccessFilters accessFilter, | 385 | const DatabaseSanitizer::ItemAccessFilters accessFilter, | ||
341 | const bool dryRun, | 386 | const bool dryRun, | ||
342 | const QSharedPointer<QRegularExpression>& urlFilter) | 387 | const QSharedPointer<QRegularExpression>& urlFilter) | ||
343 | { | 388 | { | ||
344 | auto listResult = m_pimpl->createList(deviceIds, IgnoreAvailable, urlFilter); | 389 | // To capture files with wrong device ids, we must not filter by | ||
390 | // accessability, because those are false positives. | ||||
391 | auto listResult = m_pimpl->createList(deviceIds, IgnoreNone, urlFilter); | ||||
392 | auto& summary = listResult.second; | ||||
345 | 393 | | |||
346 | const auto ignoredDevices = m_pimpl->deviceFilters(listResult.first, accessFilter); | 394 | // Same here: Filter only by mount state of drives | ||
395 | auto diff = m_pimpl->removeIgnored(listResult.first, accessFilter & DeviceMask); | ||||
396 | summary.ignored += diff.ignored; | ||||
397 | summary.accessible -= diff.accessible; | ||||
347 | 398 | | |||
348 | const auto sep = QLatin1Char(' '); | 399 | const auto sep = QLatin1Char(' '); | ||
349 | auto& summary = listResult.second; | | |||
350 | QTextStream out(stdout); | 400 | QTextStream out(stdout); | ||
351 | QTextStream err(stderr); | 401 | QTextStream err(stderr); | ||
352 | for (const auto& info: listResult.first) { | 402 | for (const auto& info: listResult.first) { | ||
353 | if (ignoredDevices[info.deviceId] == true) { | 403 | if (info.accessible && m_pimpl->isMounted(info.deviceId)) { | ||
354 | summary.ignored++; | 404 | summary.ignored++; | ||
355 | } else { | 405 | summary.accessible--; | ||
406 | continue; | ||||
407 | } | ||||
356 | if (info.isSymLink) { | 408 | if (info.isSymLink) { | ||
409 | // Ignore symlinks! It's impossible obtain the device id of the target. | ||||
357 | out << i18n("IgnoredSymbolicLink:"); | 410 | out << i18n("IgnoredSymbolicLink:"); | ||
358 | summary.ignored++; | 411 | summary.ignored++; | ||
359 | } else { | 412 | summary.accessible--; | ||
360 | m_pimpl->removeDocument(info.id); | 413 | continue; | ||
361 | out << i18n("Removing:"); | | |||
362 | } | 414 | } | ||
415 | | ||||
416 | m_pimpl->removeRecursively(info.id); | ||||
417 | out << i18n("Removing:"); | ||||
363 | out << sep << QStringLiteral("device: %1").arg(info.deviceId) | 418 | out << sep << QStringLiteral("device: %1").arg(info.deviceId) | ||
364 | << sep << QStringLiteral("inode: %1").arg(info.inode) | 419 | << sep << QStringLiteral("inode: %1").arg(info.inode) | ||
365 | << sep << QStringLiteral("url: %1").arg(info.url) | 420 | << sep << QStringLiteral("url: %1").arg(info.url) | ||
366 | << endl; | 421 | << endl; | ||
367 | } | 422 | } | ||
368 | } | | |||
369 | if (dryRun) { | 423 | if (dryRun) { | ||
370 | m_pimpl->abort(); | 424 | m_pimpl->abort(); | ||
371 | } else { | 425 | } else { | ||
372 | m_pimpl->commit(); | 426 | m_pimpl->commit(); | ||
373 | } | 427 | } | ||
374 | Q_ASSERT(summary.accessible == 0); | 428 | err << i18nc("numbers", "Removed: %1 (Invalid device ids: %2), Total: %3, Ignored: %4, ", | ||
375 | err << i18nc("numbers", "Removed: %1, Total: %2, Ignored: %3", | | |||
376 | summary.total - summary.ignored, | 429 | summary.total - summary.ignored, | ||
430 | summary.accessible, | ||||
377 | summary.total, | 431 | summary.total, | ||
378 | summary.ignored) | 432 | summary.ignored | ||
433 | ) | ||||
379 | << endl; | 434 | << endl; | ||
380 | } | 435 | } |
43 is the correct id for a share. The path is wrong.
46 is a wrong id for the same share. It should not be listed. The path however is correct.
QStorageInfo is not enough, I'm afraid. For proper matching mtab must be read.