diff --git a/src/file/extractor/app.cpp b/src/file/extractor/app.cpp index ea2b0d69..95e44a8f 100644 --- a/src/file/extractor/app.cpp +++ b/src/file/extractor/app.cpp @@ -1,221 +1,205 @@ /* * This file is part of the KDE Baloo Project * Copyright (C) 2013-2015 Vishesh Handa * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) version 3, or any * later version accepted by the membership of KDE e.V. (or its * successor approved by the membership of KDE e.V.), which shall * act as a proxy defined in Section 6 of version 3 of the license. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * */ #include "app.h" #include "basicindexingjob.h" #include "result.h" #include "idutils.h" #include "transaction.h" #include "baloodebug.h" #include "global.h" #include #include #include #include #include #include #include #include //for STDIN_FILENO #include using namespace Baloo; App::App(QObject* parent) : QObject(parent) , m_notifyNewData(STDIN_FILENO, QSocketNotifier::Read) , m_input() , m_inputStream(&m_input) , m_outputStream(stdout) , m_tr(nullptr) { m_input.open(STDIN_FILENO, QIODevice::ReadOnly | QIODevice::Unbuffered ); m_inputStream.setByteOrder(QDataStream::BigEndian); connect(&m_notifyNewData, &QSocketNotifier::activated, this, &App::slotNewInput); } void App::slotNewInput() { - quint32 nIds; - quint64 id; - char buf[8]; - m_inputStream.startTransaction(); - m_inputStream.readRawData(buf, 4); + m_inputStream >> m_ids; if (m_inputStream.status() != QDataStream::Ok) { QCoreApplication::quit(); return; } - memcpy(&nIds, buf, 4); - for (quint32 i = 0; i < nIds; i++) { - - m_inputStream.readRawData(buf, 8); - if (m_inputStream.status() != QDataStream::Ok) { - QCoreApplication::quit(); - return; - } - memcpy(&id, buf, 8); - m_ids.append(id); - } - if (!m_inputStream.commitTransaction()) { m_ids.clear(); return; } Database *db = globalDatabaseInstance(); if (!db->open(Database::ReadWriteDatabase)) { qCritical() << "Failed to open the database"; exit(1); } Q_ASSERT(m_tr == nullptr); m_tr = new Transaction(db, Transaction::ReadWrite); // FIXME: The transaction is open for way too long. We should just open it for when we're // committing the data not during the extraction. QTimer::singleShot(0, this, &App::processNextFile); /** * A Single Batch seems to be triggering the SocketNotifier more than once * so we disable it till the batch is done. */ m_notifyNewData.setEnabled(false); } void App::processNextFile() { if (!m_ids.isEmpty()) { int delay = m_idleMonitor.isIdle() ? 0 : 10; quint64 id = m_ids.takeFirst(); QString url = QFile::decodeName(m_tr->documentUrl(id)); if (url.isEmpty() || !QFile::exists(url)) { m_tr->removeDocument(id); QTimer::singleShot(0, this, &App::processNextFile); return; } m_outputStream << "S " << url << endl; index(m_tr, url, id); m_outputStream << "F " << url << endl; m_updatedFiles << url; QTimer::singleShot(delay, this, &App::processNextFile); } else { m_tr->commit(); delete m_tr; m_tr = nullptr; /* * TODO we're already sending out each file as we start we can simply send out a done * signal isntead of sending out the list of files, that will need changes in whatever * uses this signal, Dolphin I think? */ QDBusMessage message = QDBusMessage::createSignal(QStringLiteral("/files"), QStringLiteral("org.kde"), QStringLiteral("changed")); QVariantList vl; vl.reserve(1); vl << QVariant(m_updatedFiles); m_updatedFiles.clear(); message.setArguments(vl); QDBusConnection::sessionBus().send(message); // Enable the SocketNotifier for the next batch m_notifyNewData.setEnabled(true); m_outputStream << "B" << endl; } } void App::index(Transaction* tr, const QString& url, quint64 id) { QString mimetype = m_mimeDb.mimeTypeForFile(url).name(); qCDebug(BALOO) << "Indexing" << id << url << mimetype; if (!m_config.shouldBeIndexed(url)) { // FIXME: This should never be happening! qCWarning(BALOO) << "Found" << url << "in the ContentIndexingDB, although it should be skipped"; tr->removeDocument(id); } // The initial BasicIndexingJob run has been supplied with the file extension // mimetype only, skip based on the "real" mimetype if (!m_config.shouldMimeTypeBeIndexed(mimetype)) { qCDebug(BALOO) << "Skipping" << url << "- mimetype:" << mimetype; tr->removePhaseOne(id); return; } // HACK: Also, we're ignoring ttext files which are greater tha 10 Mb as we // have trouble processing them // if (mimetype.startsWith(QStringLiteral("text/"))) { QFileInfo fileInfo(url); if (fileInfo.size() >= 10 * 1024 * 1024) { tr->removePhaseOne(id); return; } } // We always run the basic indexing again. This is mostly so that the proper // mimetype is set and we get proper type information. // The mimetype fetched in the BasicIndexingJob is fast but not accurate BasicIndexingJob basicIndexer(url, mimetype, BasicIndexingJob::NoLevel); basicIndexer.index(); Baloo::Document doc = basicIndexer.document(); Result result(url, mimetype, KFileMetaData::ExtractionResult::ExtractEverything); result.setDocument(doc); QList exList = m_extractorCollection.fetchExtractors(mimetype); Q_FOREACH (KFileMetaData::Extractor* ex, exList) { ex->extract(&result); } result.finish(); if (doc.id() != id) { qWarning() << url << "id seems to have changed. Perhaps baloo was not running, and this file was deleted + re-created"; tr->removeDocument(id); if (!tr->hasDocument(doc.id())) { tr->addDocument(result.document()); } else { tr->replaceDocument(result.document(), DocumentTerms | DocumentData); } } else { tr->replaceDocument(result.document(), DocumentTerms | DocumentData); } tr->removePhaseOne(doc.id()); } diff --git a/src/file/extractorprocess.cpp b/src/file/extractorprocess.cpp index 14dda763..c52c7e04 100644 --- a/src/file/extractorprocess.cpp +++ b/src/file/extractorprocess.cpp @@ -1,103 +1,97 @@ /* * Copyright (C) 2015 Vishesh Handa * Copyright (C) 2015 Pinak Ahuja * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ #include "extractorprocess.h" #include #include +#include using namespace Baloo; ExtractorProcess::ExtractorProcess(QObject* parent) : QObject(parent) , m_extractorPath(QStandardPaths::findExecutable(QStringLiteral("baloo_file_extractor"))) , m_extractorProcess(this) { connect(&m_extractorProcess, &QProcess::readyRead, this, &ExtractorProcess::slotIndexingFile); connect(&m_extractorProcess, QOverload::of(&QProcess::finished), [=](int exitCode, QProcess::ExitStatus exitStatus) { if (exitStatus == QProcess::CrashExit) { Q_EMIT failed(); } }); m_extractorProcess.setProgram(m_extractorPath); m_extractorProcess.setProcessChannelMode(QProcess::ForwardedErrorChannel); m_extractorProcess.start(); } ExtractorProcess::~ExtractorProcess() { m_extractorProcess.close(); } void ExtractorProcess::start() { m_extractorProcess.start(QIODevice::Unbuffered | QIODevice::ReadWrite); m_extractorProcess.waitForStarted(); m_extractorProcess.setReadChannel(QProcess::StandardOutput); } void ExtractorProcess::index(const QVector& fileIds) { Q_ASSERT(m_extractorProcess.state() == QProcess::Running); Q_ASSERT(!fileIds.isEmpty()); - QByteArray batchData; - - quint32 batchSize = fileIds.size(); - batchData.append(reinterpret_cast(&batchSize), sizeof(quint32)); - for (quint64 id : fileIds) { - batchData.append(reinterpret_cast(&id), sizeof(quint64)); - } - - m_extractorProcess.write(batchData.data(), batchData.size()); + QDataStream batch(&m_extractorProcess); + batch << fileIds; } void ExtractorProcess::slotIndexingFile() { while (m_extractorProcess.canReadLine()) { QString line = m_extractorProcess.readLine().trimmed(); if (line.isEmpty()) { continue; } char command = line[0].toLatin1(); QString arg = line.mid(2); switch (command) { case 'S': Q_EMIT startedIndexingFile(arg); break; case 'F': Q_EMIT finishedIndexingFile(arg); break; case 'B': Q_EMIT done(); break; default: qCritical() << "Got unknown result from extractor" << command << arg; } } }