Changeset View
Standalone View
src/engine/databasesanitizer.cpp
Show All 38 Lines | |||||
39 | 39 | | |||
40 | public: | 40 | public: | ||
41 | /** | 41 | /** | ||
42 | * \brief Basic info about database items | 42 | * \brief Basic info about database items | ||
43 | */ | 43 | */ | ||
44 | struct FileInfo { | 44 | struct FileInfo { | ||
45 | quint32 deviceId = 0; | 45 | quint32 deviceId = 0; | ||
46 | quint32 inode = 0; | 46 | quint32 inode = 0; | ||
47 | quint64 id = 0; | ||||
47 | QString url = QString(); | 48 | QString url = QString(); | ||
49 | QString symlink = QString(); | ||||
48 | bool accessible = true; | 50 | bool accessible = true; | ||
49 | }; | 51 | }; | ||
50 | 52 | | |||
51 | void printProgress(QTextStream& out, uint& cur, const uint max, const uint step) const | 53 | void printProgress(QTextStream& out, uint& cur, const uint max, const uint step) const | ||
52 | { | 54 | { | ||
53 | if (cur % step == 0) { | 55 | if (cur % step == 0) { | ||
54 | out << QStringLiteral("%1%2\r").arg(100 * cur / max, 6).arg("%", -16); | 56 | out << QStringLiteral("%1%2\r").arg(100 * cur / max, 6).arg("%", -16); | ||
55 | out.flush(); | 57 | out.flush(); | ||
▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Line(s) | 79 | { | |||
97 | } | 99 | } | ||
98 | 100 | | |||
99 | QTextStream err(stderr); | 101 | QTextStream err(stderr); | ||
100 | for (quint64 id: keys) { | 102 | for (quint64 id: keys) { | ||
101 | printProgress(err, i, max, 100); | 103 | printProgress(err, i, max, 100); | ||
102 | 104 | | |||
103 | const quint32* arr = reinterpret_cast<quint32*>(&id); | 105 | const quint32* arr = reinterpret_cast<quint32*>(&id); | ||
104 | const auto url = docUrlDb.get(id); | 106 | const auto url = docUrlDb.get(id); | ||
107 | const auto fileInfo = QFileInfo(url); | ||||
105 | FileInfo info; | 108 | FileInfo info; | ||
109 | info.id = id; | ||||
106 | info.deviceId = arr[0]; | 110 | info.deviceId = arr[0]; | ||
107 | info.inode = arr[1]; | 111 | info.inode = arr[1]; | ||
108 | info.url = url; | 112 | info.url = url; | ||
109 | info.accessible = !url.isEmpty() && QFileInfo::exists(url); | 113 | info.accessible = !url.isEmpty() && fileInfo.exists(); | ||
114 | info.symlink = fileInfo.isSymLink() | ||||
115 | ? fileInfo.symLinkTarget() | ||||
116 | : QString(); | ||||
117 | | ||||
bruns: I think if you initialize fileinfo anyway, you should use fileinfo.exists() ... | |||||
110 | if ((!includeIds.isEmpty() && !includeIds.contains(info.deviceId)) | 118 | if ((!includeIds.isEmpty() && !includeIds.contains(info.deviceId)) | ||
111 | || (!excludeIds.isEmpty() && excludeIds.contains(info.deviceId)) | 119 | || (!excludeIds.isEmpty() && excludeIds.contains(info.deviceId)) | ||
112 | || (info.accessible && (accessFilter & DatabaseSanitizer::IgnoreAvailable)) | 120 | || (info.accessible && (accessFilter & DatabaseSanitizer::IgnoreAvailable)) | ||
113 | || (urlFilter && !urlFilter->match(info.url).hasMatch()) | 121 | || (urlFilter && !urlFilter->match(info.url).hasMatch()) | ||
114 | ) { | 122 | ) { | ||
115 | continue; | 123 | continue; | ||
116 | } | 124 | } | ||
117 | result.append(info); | 125 | result.append(info); | ||
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Line(s) | 164 | { | |||
164 | const auto keys = usedDevices.uniqueKeys(); | 172 | const auto keys = usedDevices.uniqueKeys(); | ||
165 | for (const auto& dev : keys) { | 173 | for (const auto& dev : keys) { | ||
166 | auto devinfo = getDeviceInfo(dev); | 174 | auto devinfo = getDeviceInfo(dev); | ||
167 | if ((devinfo.mounted && !(accessFilter & DatabaseSanitizer::IgnoreMounted)) | 175 | if ((devinfo.mounted && !(accessFilter & DatabaseSanitizer::IgnoreMounted)) | ||
168 | || (!devinfo.mounted && !(accessFilter & DatabaseSanitizer::IgnoreUnmounted)) | 176 | || (!devinfo.mounted && !(accessFilter & DatabaseSanitizer::IgnoreUnmounted)) | ||
169 | ) { | 177 | ) { | ||
170 | devinfo.items = usedDevices.values(dev).count(); | 178 | devinfo.items = usedDevices.values(dev).count(); | ||
171 | result.append(devinfo); | 179 | result.append(devinfo); | ||
180 | } else { | ||||
181 | qDebug() << "Ignoring device" << devinfo.id; | ||||
172 | } | 182 | } | ||
173 | } | 183 | } | ||
184 | return result; | ||||
185 | } | ||||
186 | | ||||
187 | struct IdInfo { | ||||
188 | quint32 deviceId = 0; | ||||
189 | quint32 inode = 0; | ||||
190 | quint64 id = 0; | ||||
191 | }; | ||||
174 | 192 | | |||
193 | IdInfo toIdInfo(quint64 id) { | ||||
194 | IdInfo result; | ||||
195 | const quint32* arr = reinterpret_cast<quint32*>(&id); | ||||
196 | result.deviceId = arr[0]; | ||||
197 | result.inode = arr[1]; | ||||
198 | result.id = id; | ||||
175 | return result; | 199 | return result; | ||
176 | } | 200 | } | ||
177 | 201 | | |||
178 | private: | | |||
179 | Transaction* m_transaction; | 202 | Transaction* m_transaction; | ||
180 | }; | 203 | }; | ||
181 | } | 204 | } | ||
182 | 205 | | |||
183 | using namespace Baloo; | 206 | using namespace Baloo; | ||
184 | 207 | | |||
185 | 208 | | |||
186 | DatabaseSanitizer::DatabaseSanitizer(const Database& db, Baloo::Transaction::TransactionType type) | 209 | DatabaseSanitizer::DatabaseSanitizer(const Database& db, Baloo::Transaction::TransactionType type) | ||
▲ Show 20 Lines • Show All 80 Lines • ▼ Show 20 Line(s) | 273 | for (const auto& dev : usedDevices) { | |||
267 | } | 290 | } | ||
268 | // TODO: see above | 291 | // TODO: see above | ||
269 | // out << QStringLiteral("\033[0m") << endl; | 292 | // out << QStringLiteral("\033[0m") << endl; | ||
270 | out << endl; | 293 | out << endl; | ||
271 | } | 294 | } | ||
272 | 295 | | |||
273 | err << i18n("Found %1 matching in %2 devices", matchCount, usedDevices.count()) << endl; | 296 | err << i18n("Found %1 matching in %2 devices", matchCount, usedDevices.count()) << endl; | ||
274 | } | 297 | } | ||
298 | | ||||
299 | void DatabaseSanitizer::removeStaleEntries(const QVector<qint64>& deviceIds, | ||||
300 | const DatabaseSanitizer::ItemAccessFilters accessFilter, | ||||
301 | const bool dryRun, | ||||
302 | const QSharedPointer<QRegularExpression>& urlFilter) | ||||
303 | { | ||||
304 | auto infos = m_pimpl->createList(deviceIds, IgnoreAvailable, urlFilter); | ||||
305 | auto devices = m_pimpl->createDeviceList(infos, accessFilter); | ||||
306 | QVector<quint32> deviceIdFilter; | ||||
307 | for (const auto& devInfo: devices) { | ||||
308 | deviceIdFilter.append(devInfo.id); | ||||
309 | } | ||||
310 | const auto sep = QLatin1Char(' '); | ||||
311 | QTextStream out(stdout); | ||||
312 | QTextStream err(stderr); | ||||
313 | for (const auto& info: infos) { | ||||
314 | if (deviceIdFilter.contains(info.deviceId)) { | ||||
315 | if (!info.symlink.isEmpty()) { | ||||
316 | // Do not remove symlinks pointing to unmounted devices | ||||
I think it is better to use bruns: I think it is better to use
QT_FSTAT(info.symlink.toLocal8Bit().constData(), ...) here, avoids… | |||||
I've tried it. Sadly your suggestion does not work. With fi = filePathToStat(info.symlink.toLocal8Bit()) fi._st_dev is == 0 when the symlink target does not exist. Hence it does tell me why the link can't be followed. baloo's db otoh knows about this. As deviceIdFilter does never contain 0 symlinks would be removed when they should be ignored. With If(id != 0) truly "dead" symlinks will not be removed. filePathToStat returns QT_STATBUF. My guess is that is essentially the same as QT_FSTAT, at least man fstat.2 says so. michaelh: I've tried it. Sadly your suggestion does not work. With `fi = filePathToStat(info.symlink. | |||||
What do you consider a "truly dead" symlink? If you really want to do it correctly, you have to walk the file system yourself, one symlink target path component at a time - each path component can be a symlink itself, or a mount point. You should check the device id for *every* path component. bruns: What do you consider a "truly dead" symlink?
If you really want to do it correctly, you have… | |||||
Damn, you're right! I was fooled by this: $ balooctl index /mnt/otto/test.mp4 $ ln -s /mnt/otto/test.mp4 ~/Videos/ baloo now also indexes ~/Videos/test.mp4 I did not notice because every indexed symbolic link on my system also has its target indexed. I never saw id == 0 michaelh: Damn, you're right! I was fooled by this:
```
$ balooctl index /mnt/otto/test.mp4
$ ln -s… | |||||
317 | const auto id = m_pimpl->m_transaction->documentId(info.symlink.toLocal8Bit()); | ||||
missing check id != 0, or the equivalent if the above code is changed to QT_FSTAT bruns: missing check id != 0, or the equivalent if the above code is changed to QT_FSTAT | |||||
318 | const auto idInfo = m_pimpl->toIdInfo(id); | ||||
319 | if (!deviceIdFilter.contains(idInfo.deviceId)) { | ||||
320 | qDebug() << "points to device" << idInfo.deviceId << "skipping" << info.url; | ||||
321 | continue; | ||||
322 | } | ||||
323 | } | ||||
324 | | ||||
325 | m_pimpl->m_transaction->removeDocument(info.id); | ||||
326 | out << i18n("Removing:") | ||||
327 | << sep << QStringLiteral("device: %1").arg(info.deviceId) | ||||
328 | << sep << QStringLiteral("inode: %1").arg(info.inode) | ||||
329 | << sep << QStringLiteral("url: %1").arg(info.url) | ||||
330 | << endl; | ||||
331 | } | ||||
332 | } | ||||
333 | if (dryRun) { | ||||
why not just make the removeDocument transaction above depend on dryRun? removeDocument is expensive ... bruns: why not just make the removeDocument transaction above depend on dryRun? removeDocument is… | |||||
I did that before, but decided on --dry-run to simulate the process as close as possible. michaelh: I did that before, but decided on `--dry-run` to simulate the process as close as possible. | |||||
bruns: Fair reason, the comment below is also mood then ... | |||||
8 meanings of MOOD acronym or abbreviation. Magic of Ordinary Days Meet only Original Designs Michigan Out of Doors Movie Organizer Online Database Miata Owners on Delmarva Methodology for Object Oriented Design Metrics for Object Oriented Design Museum of Outstanding Design typo? :-) michaelh: 8 meanings of MOOD acronym or abbreviation.
MOOD stands for
Magic of Ordinary Days
Meet… | |||||
bruns: s/mood/moot/ | |||||
334 | m_pimpl->m_transaction->abort(); | ||||
335 | } else { | ||||
336 | m_pimpl->m_transaction->commit(); | ||||
337 | } | ||||
338 | err << i18n("Removed %1 items", infos.count()) << endl; | ||||
339 | } |
I think if you initialize fileinfo anyway, you should use fileinfo.exists() ...