diff --git a/src/tm/jobs.cpp b/src/tm/jobs.cpp index 0351b38..dc9f2a2 100644 --- a/src/tm/jobs.cpp +++ b/src/tm/jobs.cpp @@ -1,2125 +1,2123 @@ /* **************************************************************************** This file is part of Lokalize Copyright (C) 2007-2014 by Nick Shaforostoff This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License or (at your option) version 3 or any later version accepted by the membership of KDE e.V. (or its successor approved by the membership of KDE e.V.), which shall act as a proxy defined in Section 14 of version 3 of the license. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . **************************************************************************** */ #include "jobs.h" #include "lokalize_debug.h" #include "catalog.h" #include "project.h" #include "diff.h" #include "prefs_lokalize.h" #include "version.h" #include "stemming.h" #include #include #include #include #include #include #include #include #include #include #include using namespace TM; QThreadPool* TM::threadPool() { static QThreadPool* inst = new QThreadPool; return inst; } #ifdef Q_OS_WIN #define U QLatin1String #else #define U QStringLiteral #endif #define TM_DELIMITER '\v' #define TM_SEPARATOR '\b' #define TM_NOTAPPROVED 0x04 static bool stop = false; void TM::cancelAllJobs() { stop = true; } static qlonglong newTMSourceEntryCount = 0; static qlonglong reusedTMSourceEntryCount = 0; /** * splits string into words, removing any markup * * TODO segmentation by sentences... **/ static void doSplit(QString& cleanEn, QStringList& words, QRegExp& rxClean1, const QString& accel ) { static QRegExp rxSplit(QStringLiteral("\\W+|\\d+")); if (!rxClean1.pattern().isEmpty()) cleanEn.replace(rxClean1, QStringLiteral(" ")); cleanEn.remove(accel); words = cleanEn.toLower().split(rxSplit, QString::SkipEmptyParts); if (words.size() > 4) { int i = 0; for (; i < words.size(); ++i) { if (words.at(i).size() < 4) words.removeAt(i--); else if (words.at(i).startsWith('t') && words.at(i).size() == 4) { if (words.at(i) == QLatin1String("then") || words.at(i) == QLatin1String("than") || words.at(i) == QLatin1String("that") || words.at(i) == QLatin1String("this") ) words.removeAt(i--); } } } } static qlonglong getFileId(const QString& path, QSqlDatabase& db) { QSqlQuery query1(db); QString escapedPath = path; escapedPath.replace(QLatin1Char('\''), QLatin1String("''")); QString pathExpr = QStringLiteral("path='") % escapedPath % '\''; if (path.isEmpty()) pathExpr = QStringLiteral("path ISNULL"); if (Q_UNLIKELY(!query1.exec(U("SELECT id FROM files WHERE " "path='") % escapedPath % '\''))) qCWarning(LOKALIZE_LOG) << "select db error: " << query1.lastError().text(); if (Q_LIKELY(query1.next())) { //this is translation of en string that is already present in db qlonglong id = query1.value(0).toLongLong(); query1.clear(); return id; } query1.clear(); //nope, this is new file bool qpsql = (db.driverName() == QLatin1String("QPSQL")); QString sql = QStringLiteral("INSERT INTO files (path) VALUES (?)"); if (qpsql) sql += QLatin1String(" RETURNING id"); query1.prepare(sql); query1.bindValue(0, path); if (Q_LIKELY(query1.exec())) return qpsql ? (query1.next(), query1.value(0).toLongLong()) : query1.lastInsertId().toLongLong(); else qCWarning(LOKALIZE_LOG) << "insert db error: " << query1.lastError().text(); return -1; } static void addToIndex(qlonglong sourceId, QString sourceString, QRegExp& rxClean1, const QString& accel, QSqlDatabase& db) { QStringList words; doSplit(sourceString, words, rxClean1, accel); if (Q_UNLIKELY(words.isEmpty())) return; QSqlQuery query1(db); QByteArray sourceIdStr = QByteArray::number(sourceId, 36); bool isShort = words.size() < 20; int j = words.size(); while (--j >= 0) { // insert word (if we do not have it) if (Q_UNLIKELY(!query1.exec(U("SELECT word, ids_short, ids_long FROM words WHERE " "word='") % words.at(j) % '\''))) qCWarning(LOKALIZE_LOG) << "select error 3: " << query1.lastError().text(); //we _have_ it bool weHaveIt = query1.next(); if (weHaveIt) { //just add new id QByteArray arr; QString field; if (isShort) { arr = query1.value(1).toByteArray(); field = QStringLiteral("ids_short"); } else { arr = query1.value(2).toByteArray(); field = QStringLiteral("ids_long"); } query1.clear(); if (arr.contains(' ' % sourceIdStr % ' ') || arr.startsWith(sourceIdStr + ' ') || arr.endsWith(' ' + sourceIdStr) || arr == sourceIdStr) return;//this string is already indexed query1.prepare(QStringLiteral("UPDATE words SET ") % field % QStringLiteral("=? WHERE word='") % words.at(j) % '\''); if (!arr.isEmpty()) arr += ' '; arr += sourceIdStr; query1.bindValue(0, arr); if (Q_UNLIKELY(!query1.exec())) qCWarning(LOKALIZE_LOG) << "update error 4: " << query1.lastError().text(); } else { query1.clear(); query1.prepare(QStringLiteral("INSERT INTO words (word, ids_short, ids_long) VALUES (?, ?, ?)")); QByteArray idsShort; QByteArray idsLong; if (isShort) idsShort = sourceIdStr; else idsLong = sourceIdStr; query1.bindValue(0, words.at(j)); query1.bindValue(1, idsShort); query1.bindValue(2, idsLong); if (Q_UNLIKELY(!query1.exec())) qCWarning(LOKALIZE_LOG) << "insert error 2: " << query1.lastError().text() ; } } } /** * remove source string from index if there are no other * 'good' entries using it but the entry specified with mainId */ static void removeFromIndex(qlonglong mainId, qlonglong sourceId, QString sourceString, QRegExp& rxClean1, const QString& accel, QSqlDatabase& db) { QStringList words; doSplit(sourceString, words, rxClean1, accel); if (Q_UNLIKELY(words.isEmpty())) return; QSqlQuery query1(db); QByteArray sourceIdStr = QByteArray::number(sourceId, 36); //BEGIN check //TM_NOTAPPROVED=4 if (Q_UNLIKELY(!query1.exec(U("SELECT count(*) FROM main, target_strings WHERE " "main.source=") % QString::number(sourceId) % U(" AND " "main.target=target_strings.id AND " "target_strings.target NOTNULL AND " "main.id!=") % QString::number(mainId) % U(" AND " "(main.bits&4)!=4")))) { qCWarning(LOKALIZE_LOG) << "select error 500: " << query1.lastError().text(); return; } bool exit = query1.next() && (query1.value(0).toLongLong() > 0); query1.clear(); if (exit) return; //END check bool isShort = words.size() < 20; int j = words.size(); while (--j >= 0) { // remove from record for the word (if we do not have it) if (Q_UNLIKELY(!query1.exec(U("SELECT word, ids_short, ids_long FROM words WHERE " "word='") % words.at(j) % '\''))) { qCWarning(LOKALIZE_LOG) << "select error 3: " << query1.lastError().text(); return; } if (!query1.next()) { qCWarning(LOKALIZE_LOG) << "exit here 1"; //we don't have record for the word, so nothing to remove query1.clear(); return; } QByteArray arr; QString field; if (isShort) { arr = query1.value(1).toByteArray(); field = QStringLiteral("ids_short"); } else { arr = query1.value(2).toByteArray(); field = QStringLiteral("ids_long"); } query1.clear(); if (arr.contains(' ' + sourceIdStr + ' ')) arr.replace(' ' + sourceIdStr + ' ', " "); else if (arr.startsWith(sourceIdStr + ' ')) arr.remove(0, sourceIdStr.size() + 1); else if (arr.endsWith(' ' + sourceIdStr)) arr.chop(sourceIdStr.size() + 1); else if (arr == sourceIdStr) arr.clear(); query1.prepare(U("UPDATE words " "SET ") % field % U("=? " "WHERE word='") % words.at(j) % '\''); query1.bindValue(0, arr); if (Q_UNLIKELY(!query1.exec())) qCWarning(LOKALIZE_LOG) << "update error 504: " << query1.lastError().text(); } } static bool doRemoveEntry(qlonglong mainId, QRegExp& rxClean1, const QString& accel, QSqlDatabase& db) { QSqlQuery query1(db); if (Q_UNLIKELY(!query1.exec(U("SELECT source_strings.id, source_strings.source FROM source_strings, main WHERE " "source_strings.id=main.source AND main.id=") + QString::number(mainId)))) return false; if (!query1.next()) return false; qlonglong sourceId = query1.value(0).toLongLong(); QString source_string = query1.value(1).toString(); query1.clear(); if (!query1.exec(QStringLiteral("SELECT count(*) FROM main WHERE source=") + QString::number(sourceId)) || !query1.next()) return false; bool theOnly = query1.value(0).toInt() == 1; query1.clear(); if (theOnly) { removeFromIndex(mainId, sourceId, source_string, rxClean1, accel, db); qCWarning(LOKALIZE_LOG) << "ok delete?" << query1.exec(QStringLiteral("DELETE FROM source_strings WHERE id=") + QString::number(sourceId)); } if (Q_UNLIKELY(!query1.exec(U("SELECT target FROM main WHERE " "main.id=") + QString::number(mainId)) || !query1.next())) return false; qlonglong targetId = query1.value(0).toLongLong(); query1.clear(); if (!query1.exec(QStringLiteral("SELECT count(*) FROM main WHERE target=") + QString::number(targetId)) || ! query1.next()) return false; theOnly = query1.value(0).toInt() == 1; query1.clear(); if (theOnly) query1.exec(QStringLiteral("DELETE FROM target_strings WHERE id=") + QString::number(targetId)); return query1.exec(QStringLiteral("DELETE FROM main WHERE id=") + QString::number(mainId)); } static bool doRemoveFile(const QString& filePath, QSqlDatabase& db) { qlonglong fileId = getFileId(filePath, db); QSqlQuery query1(db); if (Q_UNLIKELY(!query1.exec(U("SELECT id FROM files WHERE " "id=") + QString::number(fileId)))) return false; if (!query1.next()) return false; query1.clear(); query1.exec(QStringLiteral("DELETE source_strings FROM source_strings, main WHERE source_strings.id = main.source AND main.file =") + QString::number(fileId)); query1.exec(QStringLiteral("DELETE target_strings FROM target_strings, main WHERE target_strings.id = main.target AND main.file =") + QString::number(fileId)); query1.exec(QStringLiteral("DELETE FROM main WHERE file = ") + QString::number(fileId)); return query1.exec(QStringLiteral("DELETE FROM files WHERE id=") + QString::number(fileId)); } static int doRemoveMissingFiles(QSqlDatabase& db, const QString& dbName, QObject *job) { int deletedFiles = 0; QSqlQuery query1(db); if (Q_UNLIKELY(!query1.exec(U("SELECT files.path FROM files")))) return false; if (!query1.next()) return false; do { QString filePath = query1.value(0).toString(); if (Project::instance()->isFileMissing(filePath)) { qCWarning(LOKALIZE_LOG) << "Removing file " << filePath << " from translation memory"; RemoveFileJob* job_removefile = new RemoveFileJob(filePath, dbName, job); TM::threadPool()->start(job_removefile, REMOVEFILE); deletedFiles++; } } while (query1.next()); return deletedFiles; } static QString escape(QString str) { return str.replace(QLatin1Char('\''), QStringLiteral("''")); } static bool doInsertEntry(CatalogString source, CatalogString target, const QString& ctxt, //TODO QStringList -- after XLIFF bool approved, qlonglong fileId, QSqlDatabase& db, QRegExp& rxClean1,//cleaning regexps for word index update const QString& accel, qlonglong priorId, qlonglong& mainId ) { QTime a; a.start(); mainId = -1; if (Q_UNLIKELY(source.isEmpty())) { qCWarning(LOKALIZE_LOG) << "source empty"; return false; } bool qpsql = (db.driverName() == QLatin1String("QPSQL")); //we store non-entranslaed entries to make search over all source parts possible bool untranslated = target.isEmpty(); bool shouldBeInIndex = !untranslated && approved; //remove first occurrence of accel character so that search returns words containing accel mark int sourceAccelPos = source.string.indexOf(accel); if (sourceAccelPos != -1) source.string.remove(sourceAccelPos, accel.size()); int targetAccelPos = target.string.indexOf(accel); if (targetAccelPos != -1) target.string.remove(targetAccelPos, accel.size()); //check if we already have record with the same en string QSqlQuery query1(db); QString escapedCtxt = escape(ctxt); QByteArray sourceTags = source.tagsAsByteArray(); QByteArray targetTags = target.tagsAsByteArray(); //BEGIN get sourceId query1.prepare(QString(U("SELECT id FROM source_strings WHERE " "source=? AND (source_accel%1) AND source_markup%2")).arg (sourceAccelPos != -1 ? QStringLiteral("=?") : QStringLiteral("=-1 OR source_accel ISNULL"), sourceTags.isEmpty() ? QStringLiteral(" ISNULL") : QStringLiteral("=?"))); int paranum = 0; query1.bindValue(paranum++, source.string); if (sourceAccelPos != -1) query1.bindValue(paranum++, sourceAccelPos); if (!sourceTags.isEmpty()) query1.bindValue(paranum++, sourceTags); if (Q_UNLIKELY(!query1.exec())) { qCWarning(LOKALIZE_LOG) << "select db source_strings error: " << query1.lastError().text(); return false; } qlonglong sourceId; if (!query1.next()) { //BEGIN insert source anew //qCDebug(LOKALIZE_LOG) <<"insert source anew";; ++newTMSourceEntryCount; QString sql = QStringLiteral("INSERT INTO source_strings (source, source_markup, source_accel) VALUES (?, ?, ?)"); if (qpsql) sql += QLatin1String(" RETURNING id"); query1.clear(); query1.prepare(sql); query1.bindValue(0, source.string); query1.bindValue(1, sourceTags); query1.bindValue(2, sourceAccelPos != -1 ? QVariant(sourceAccelPos) : QVariant()); if (Q_UNLIKELY(!query1.exec())) { qCWarning(LOKALIZE_LOG) << "select db source_strings error: " << query1.lastError().text(); return false; } sourceId = qpsql ? (query1.next(), query1.value(0).toLongLong()) : query1.lastInsertId().toLongLong(); query1.clear(); //update index if (shouldBeInIndex) addToIndex(sourceId, source.string, rxClean1, accel, db); //END insert source anew } else { sourceId = query1.value(0).toLongLong(); ++reusedTMSourceEntryCount; //qCDebug(LOKALIZE_LOG)<<"SOURCE ALREADY PRESENT"< tmConfigCache; static void setConfig(QSqlDatabase& db, const TMConfig& c) { QSqlQuery query(db); query.prepare(QStringLiteral("INSERT INTO tm_config (key, value) VALUES (?, ?)")); query.addBindValue(0); query.addBindValue(c.markup); //qCDebug(LOKALIZE_LOG)<<"setting tm db config:"<setPriority(QThread::IdlePriority); if (m_type == TM::Local) { QSqlDatabase db = QSqlDatabase::addDatabase(QStringLiteral("QSQLITE"), m_dbName); QString dbFolder = QStandardPaths::writableLocation(QStandardPaths::DataLocation); QFileInfo fileInfo(dbFolder); if (!fileInfo.exists(dbFolder)) fileInfo.absoluteDir().mkpath(fileInfo.fileName()); db.setDatabaseName(dbFolder % QLatin1Char('/') % m_dbName % TM_DATABASE_EXTENSION); m_connectionSuccessful = db.open(); if (Q_UNLIKELY(!m_connectionSuccessful)) { qCDebug(LOKALIZE_LOG) << "failed to open db" << db.databaseName() << db.lastError().text(); QSqlDatabase::removeDatabase(m_dbName); emit done(this); return; } if (!initSqliteDb(db)) { //need to recreate db ;( QString filename = db.databaseName(); db.close(); QSqlDatabase::removeDatabase(m_dbName); QFile::remove(filename); db = QSqlDatabase::addDatabase(QStringLiteral("QSQLITE"), m_dbName); db.setDatabaseName(filename); m_connectionSuccessful = db.open() && initSqliteDb(db); if (!m_connectionSuccessful) { QSqlDatabase::removeDatabase(m_dbName); emit done(this); return; } } } else { if (QSqlDatabase::contains(m_dbName)) { //reconnect is true QSqlDatabase::database(m_dbName).close(); QSqlDatabase::removeDatabase(m_dbName); } if (!m_connParams.isFilled()) { QFile rdb(QStandardPaths::writableLocation(QStandardPaths::DataLocation) + QLatin1Char('/') + m_dbName % REMOTETM_DATABASE_EXTENSION); if (!rdb.open(QIODevice::ReadOnly | QIODevice::Text)) { emit done(this); return; } QTextStream rdbParams(&rdb); m_connParams.driver = rdbParams.readLine(); m_connParams.host = rdbParams.readLine(); m_connParams.db = rdbParams.readLine(); m_connParams.user = rdbParams.readLine(); m_connParams.passwd = rdbParams.readLine(); } QSqlDatabase db = QSqlDatabase::addDatabase(m_connParams.driver, m_dbName); db.setHostName(m_connParams.host); db.setDatabaseName(m_connParams.db); db.setUserName(m_connParams.user); db.setPassword(m_connParams.passwd); m_connectionSuccessful = db.open(); if (Q_UNLIKELY(!m_connectionSuccessful)) { QSqlDatabase::removeDatabase(m_dbName); emit done(this); return; } m_connParams.user = db.userName(); initPgDb(db); } } QSqlDatabase db = QSqlDatabase::database(m_dbName); //if (!m_markup.isEmpty()||!m_accel.isEmpty()) if (m_setParams) setConfig(db, m_tmConfig); else m_tmConfig = getConfig(db); qCDebug(LOKALIZE_LOG) << "db" << m_dbName << "opened" << a.elapsed() << m_tmConfig.targetLangCode; getStats(db, m_stat.pairsCount, m_stat.uniqueSourcesCount, m_stat.uniqueTranslationsCount); if (m_type == TM::Local) { db.close(); db.open(); } emit done(this); } CloseDBJob::CloseDBJob(const QString& name) : QObject(), QRunnable() , m_dbName(name) { setAutoDelete(false); } CloseDBJob::~CloseDBJob() { qCDebug(LOKALIZE_LOG) << "closedb dtor" << m_dbName; } void CloseDBJob::run() { if (m_dbName.length()) QSqlDatabase::removeDatabase(m_dbName); emit done(this); } static QString makeAcceledString(QString source, const QString& accel, const QVariant& accelPos) { if (accelPos.isNull()) return source; int accelPosInt = accelPos.toInt(); if (accelPosInt != -1) source.insert(accelPosInt, accel); return source; } SelectJob* TM::initSelectJob(Catalog* catalog, DocPosition pos, QString db, int opt) { SelectJob* job = new SelectJob(catalog->sourceWithTags(pos), catalog->context(pos.entry).first(), catalog->url(), pos, db.isEmpty() ? Project::instance()->projectID() : db); if (opt & Enqueue) { //deletion should be done by receiver, e.g. slotSuggestionsCame() threadPool()->start(job, SELECT); } return job; } SelectJob::SelectJob(const CatalogString& source, const QString& ctxt, const QString& file, const DocPosition& pos, const QString& dbName) : QObject(), QRunnable() , m_source(source) , m_ctxt(ctxt) , m_file(file) , m_dequeued(false) , m_pos(pos) , m_dbName(dbName) { setAutoDelete(false); //qCDebug(LOKALIZE_LOG)<<"selectjob"< invertMap(const QMap& source) { //uses the fact that map has its keys always sorted QMap sortingMap; for (QMap::const_iterator i = source.constBegin(); i != source.constEnd(); ++i) { sortingMap.insertMulti(i.value(), i.key()); } return sortingMap; } //returns true if seen translation with >85% bool SelectJob::doSelect(QSqlDatabase& db, QStringList& words, //QList& entries, bool isShort) { bool qpsql = (db.driverName() == QLatin1String("QPSQL")); QMap occurencies; QVector idsForWord; QSqlQuery queryWords(db); //TODO ??? not sure. make another loop before to create QList< QList > then reorder it by size static const QString queryC[] = {U("SELECT ids_long FROM words WHERE word='%1'"), U("SELECT ids_short FROM words WHERE word='%1'") }; QString queryString = queryC[isShort]; //for each word... int o = words.size(); while (--o >= 0) { //if this is not the first word occurrence, just readd ids for it if (!(!idsForWord.isEmpty() && words.at(o) == words.at(o + 1))) { idsForWord.clear(); queryWords.exec(queryString.arg(words.at(o))); if (Q_UNLIKELY(!queryWords.exec(queryString.arg(words.at(o))))) qCWarning(LOKALIZE_LOG) << "select error: " << queryWords.lastError().text() << endl; if (queryWords.next()) { QByteArray arr(queryWords.value(0).toByteArray()); queryWords.clear(); QList ids(arr.split(' ')); int p = ids.size(); idsForWord.reserve(p); while (--p >= 0) idsForWord.append(ids.at(p).toLongLong(/*bool ok*/0, 36)); } else { queryWords.clear(); continue; } } //qCWarning(LOKALIZE_LOG) <<"SelectJob: idsForWord.size() "<::const_iterator i = idsForWord.constBegin(); i != idsForWord.constEnd(); i++) occurencies[*i]++; //0 is default value } //accels are removed TMConfig c = getConfig(db); QString tmp = c.markup; if (!c.markup.isEmpty()) tmp += '|'; QRegExp rxSplit(QLatin1Char('(') % tmp % QStringLiteral("\\W+|\\d+)+")); QString sourceClean(m_source.string); sourceClean.remove(c.accel); //split m_english for use in wordDiff later--all words are needed so we cant use list we already have QStringList englishList(sourceClean.toLower().split(rxSplit, QString::SkipEmptyParts)); static QRegExp delPart(QStringLiteral("*"), Qt::CaseSensitive, QRegExp::Wildcard); static QRegExp addPart(QStringLiteral("*"), Qt::CaseSensitive, QRegExp::Wildcard); delPart.setMinimal(true); addPart.setMinimal(true); //QList concordanceLevels=sortedUniqueValues(occurencies); //we start from entries with higher word-concordance level QMap concordanceLevelToIds = invertMap(occurencies); if (concordanceLevelToIds.isEmpty()) return false; bool seen85 = false; int limit = 200; auto clit = concordanceLevelToIds.constEnd(); if (concordanceLevelToIds.size()) --clit; if (concordanceLevelToIds.size()) while (--limit >= 0) { if (Q_UNLIKELY(m_dequeued)) break; //for every concordance level qlonglong level = clit.key(); QString joined; while (level == clit.key()) { joined += QString::number(clit.value()) + ','; if (clit == concordanceLevelToIds.constBegin() || --limit < 0) break; --clit; } joined.chop(1); //get records containing current word QSqlQuery queryFetch(U( "SELECT id, source, source_accel, source_markup FROM source_strings WHERE " "source_strings.id IN (") % joined % ')', db); TMEntry e; while (queryFetch.next()) { e.id = queryFetch.value(0).toLongLong(); if (queryFetch.value(3).toByteArray().size()) qCDebug(LOKALIZE_LOG) << "BA" << queryFetch.value(3).toByteArray(); e.source = CatalogString(makeAcceledString(queryFetch.value(1).toString(), c.accel, queryFetch.value(2)), queryFetch.value(3).toByteArray()); if (e.source.string.contains(TAGRANGE_IMAGE_SYMBOL)) { if (!e.source.tags.size()) qCWarning(LOKALIZE_LOG) << "problem:" << queryFetch.value(3).toByteArray().size() << queryFetch.value(3).toByteArray(); } //e.target=queryFetch.value(2).toString(); //QStringList e_ctxt=queryFetch.value(3).toString().split('\b',QString::SkipEmptyParts); //e.date=queryFetch.value(4).toString(); e.markupExpr = c.markup; e.accelExpr = c.accel; e.dbName = db.connectionName(); //BEGIN calc score QString str = e.source.string; str.remove(c.accel); QStringList englishSuggList(str.toLower().split(rxSplit, QString::SkipEmptyParts)); if (englishSuggList.size() > 10 * englishList.size()) continue; //sugg is 'old' --translator has to adapt its translation to 'new'--current QString result = wordDiff(englishSuggList, englishList); //qCWarning(LOKALIZE_LOG) <<"SelectJob: doin "< 1 so we have decreased it, and increased result: / exp(0.014 * float(addLen) * log10(3.0f + addSubStrCount)); if (delLen) { //qCWarning(LOKALIZE_LOG) <<"SelectJob: delLen:"< 8500; if (seen85 && e.score < 6000) continue; //BEGIN fetch rest of the data QString change_author_str; QString authors_table_str; if (qpsql) { //change_author_str=", main.change_author "; change_author_str = QStringLiteral(", pg_user.usename "); authors_table_str = QStringLiteral(" JOIN pg_user ON (pg_user.usesysid=main.change_author) "); } QSqlQuery queryRest(U( "SELECT main.id, main.date, main.ctxt, main.bits, " "target_strings.target, target_strings.target_accel, target_strings.target_markup, " "files.path, main.change_date ") % change_author_str % U( "FROM main JOIN target_strings ON (target_strings.id=main.target) JOIN files ON (files.id=main.file) ") % authors_table_str % U("WHERE " "main.source=") % QString::number(e.id) % U(" AND " "(main.bits&4)!=4 AND " "target_strings.target NOTNULL") , db); //ORDER BY tm_main.id ? queryRest.exec(); //qCDebug(LOKALIZE_LOG)<<"main select error"< sortedEntryList; //to eliminate same targets from different files while (queryRest.next()) { e.id = queryRest.value(0).toLongLong(); e.date = queryRest.value(1).toDate(); e.ctxt = queryRest.value(2).toString(); e.target = CatalogString(makeAcceledString(queryRest.value(4).toString(), c.accel, queryRest.value(5)), queryRest.value(6).toByteArray()); QStringList matchData = queryRest.value(2).toString().split(TM_DELIMITER, QString::KeepEmptyParts); //context|plural e.file = queryRest.value(7).toString(); if (e.target.isEmpty()) continue; e.obsolete = queryRest.value(3).toInt() & 1; e.changeDate = queryRest.value(8).toDate(); if (qpsql) e.changeAuthor = queryRest.value(9).toString(); //BEGIN exact match score++ if (possibleExactMatch) { //"exact" match (case insensitive+w/o non-word characters!) if (m_source.string == e.source.string) e.score = 10000; else e.score = 9900; } if (!m_ctxt.isEmpty() && matchData.size() > 0) { //check not needed? if (matchData.at(0) == m_ctxt) e.score += 33; } //qCWarning(LOKALIZE_LOG)<<"m_pos"< 1) { int form = matchData.at(1).toInt(); //pluralMatches=(form&&form==m_pos.form); if (form && form == (int)m_pos.form) { //qCWarning(LOKALIZE_LOG)<<"this"< hash; int oldCount = m_entries.size(); QMap::const_iterator it = sortedEntryList.constEnd(); if (sortedEntryList.size()) while (true) { --it; const TMEntry& e = it.key(); int& hits = hash[e.target.string]; if (!hits) //0 was default value m_entries.append(e); hits++; if (it == sortedEntryList.constBegin()) break; } for (int i = oldCount; i < m_entries.size(); ++i) m_entries[i].hits = hash.value(m_entries.at(i).target.string); //END fetch rest of the data } queryFetch.clear(); if (clit == concordanceLevelToIds.constBegin()) break; if (seen85) limit = qMin(limit, 100); //be more restrictive for the next concordance levels } return seen85; } void SelectJob::run() { //qCDebug(LOKALIZE_LOG)<<"select started"<setPriority(QThread::IdlePriority); QTime a; a.start(); if (Q_UNLIKELY(!QSqlDatabase::contains(m_dbName))) { emit done(this); return; } QSqlDatabase db = QSqlDatabase::database(m_dbName); if (Q_UNLIKELY(!db.isValid() || !db.isOpen())) { emit done(this); return; } //qCDebug(LOKALIZE_LOG)<<"select started 2"<()); int limit = qMin(Settings::suggCount(), m_entries.size()); int i = m_entries.size(); while (--i >= limit) m_entries.removeLast(); if (Q_UNLIKELY(m_dequeued)) { emit done(this); return; } ++i; while (--i >= 0) { m_entries[i].accelExpr = c.accel; m_entries[i].markupExpr = c.markup; m_entries[i].diff = userVisibleWordDiff(m_entries.at(i).source.string, m_source.string, m_entries.at(i).accelExpr, m_entries.at(i).markupExpr); } emit done(this); } ScanJob::ScanJob(const QString& filePath, const QString& dbName) : QRunnable() , m_filePath(filePath) , m_time(0) , m_added(0) , m_newVersions(0) , m_size(0) , m_dbName(dbName) { qCDebug(LOKALIZE_LOG) << m_dbName << m_filePath; } ScanJob::~ScanJob() { } void ScanJob::run() { if (stop || !QSqlDatabase::contains(m_dbName)) { return; } qCWarning(LOKALIZE_LOG) << "scan job started for" << m_filePath << m_dbName << stop << m_dbName; //QThread::currentThread()->setPriority(QThread::IdlePriority); QTime a; a.start(); QSqlDatabase db = QSqlDatabase::database(m_dbName); if (!db.isOpen()) return; //initSqliteDb(db); TMConfig c = getConfig(db, true); QRegExp rxClean1(c.markup); rxClean1.setMinimal(true); Catalog catalog(0); if (Q_LIKELY(catalog.loadFromUrl(m_filePath, QString(), &m_size, /*no auto save*/true) == 0)) { if (c.targetLangCode != catalog.targetLangCode()) { qCWarning(LOKALIZE_LOG) << "not indexing file because target languages don't match:" << c.targetLangCode << "in TM vs" << catalog.targetLangCode() << "in file"; return; } qlonglong priorId = -1; QSqlQuery queryBegin(QStringLiteral("BEGIN"), db); //qCWarning(LOKALIZE_LOG) <<"queryBegin error: " < #include /** @author Nick Shaforostoff */ class TmxParser : public QXmlDefaultHandler { enum State { //localstate for getting chars into right place null = 0, seg, propContext, propFile, propPluralForm, propApproved }; enum Lang { Source, Target, Null }; public: TmxParser(const QString& dbName); ~TmxParser(); private: bool startDocument(); bool startElement(const QString&, const QString&, const QString&, const QXmlAttributes&); bool endElement(const QString&, const QString&, const QString&); bool characters(const QString&); private: QSqlDatabase db; QRegExp rxClean1; QString accel; int m_hits; CatalogString m_segment[3]; //Lang enum QList m_inlineTags; QString m_context; QString m_pluralForm; QString m_filePath; QString m_approvedString; State m_state: 8; Lang m_lang: 8; ushort m_added; QMap m_fileIds; QString m_dbLangCode; }; TmxParser::TmxParser(const QString& dbName) : m_hits(0) , m_state(null) , m_lang(Null) , m_added(0) , m_dbLangCode(Project::instance()->langCode().toLower()) { db = QSqlDatabase::database(dbName); TMConfig c = getConfig(db); rxClean1.setPattern(c.markup); rxClean1.setMinimal(true); accel = c.accel; } bool TmxParser::startDocument() { //initSqliteDb(db); m_fileIds.clear(); QSqlQuery queryBegin(QLatin1String("BEGIN"), db); m_state = null; m_lang = Null; return true; } TmxParser::~TmxParser() { QSqlQuery queryEnd(QLatin1String("END"), db); } bool TmxParser::startElement(const QString&, const QString&, const QString& qName, const QXmlAttributes& attr) { if (qName == QLatin1String("tu")) { bool ok; m_hits = attr.value(QLatin1String("usagecount")).toInt(&ok); if (!ok) m_hits = -1; m_segment[Source].clear(); m_segment[Target].clear(); m_context.clear(); m_pluralForm.clear(); m_filePath.clear(); m_approvedString.clear(); } else if (qName == QLatin1String("tuv")) { QString attrLang = attr.value(QStringLiteral("xml:lang")).toLower(); if (attrLang == QLatin1String("en")) //TODO startsWith? m_lang = Source; else if (attrLang == m_dbLangCode) m_lang = Target; else { qCWarning(LOKALIZE_LOG) << "skipping lang" << attr.value("xml:lang"); m_lang = Null; } } else if (qName == QLatin1String("prop")) { QString attrType = attr.value(QStringLiteral("type")).toLower(); if (attrType == QLatin1String("x-context")) m_state = propContext; else if (attrType == QLatin1String("x-file")) m_state = propFile; else if (attrType == QLatin1String("x-pluralform")) m_state = propPluralForm; else if (attrType == QLatin1String("x-approved")) m_state = propApproved; else m_state = null; } else if (qName == QLatin1String("seg")) { m_state = seg; } else if (m_state == seg && m_lang != Null) { InlineTag::InlineElement t = InlineTag::getElementType(qName.toLatin1()); if (t != InlineTag::_unknown) { m_segment[m_lang].string += QChar(TAGRANGE_IMAGE_SYMBOL); int pos = m_segment[m_lang].string.size(); m_inlineTags.append(InlineTag(pos, pos, t, attr.value(QStringLiteral("id")))); } } return true; } bool TmxParser::endElement(const QString&, const QString&, const QString& qName) { if (qName == QLatin1String("tu")) { if (m_filePath.isEmpty()) m_filePath = QLatin1String("tmx-import"); if (!m_fileIds.contains(m_filePath)) m_fileIds.insert(m_filePath, getFileId(m_filePath, db)); qlonglong fileId = m_fileIds.value(m_filePath); if (!m_pluralForm.isEmpty()) m_context += TM_DELIMITER + m_pluralForm; qlonglong priorId = -1; bool ok = doInsertEntry(m_segment[Source], m_segment[Target], m_context, m_approvedString != QLatin1String("no"), fileId, db, rxClean1, accel, priorId, priorId); if (Q_LIKELY(ok)) ++m_added; } else if (m_state == seg && m_lang != Null) { InlineTag::InlineElement t = InlineTag::getElementType(qName.toLatin1()); if (t != InlineTag::_unknown) { InlineTag tag = m_inlineTags.takeLast(); qCWarning(LOKALIZE_LOG) << qName << tag.getElementName(); if (tag.isPaired()) { tag.end = m_segment[m_lang].string.size(); m_segment[m_lang].string += QChar(TAGRANGE_IMAGE_SYMBOL); } m_segment[m_lang].tags.append(tag); } } m_state = null; return true; } bool TmxParser::characters(const QString& ch) { if (m_state == seg && m_lang != Null) m_segment[m_lang].string += ch; else if (m_state == propFile) m_filePath += ch; else if (m_state == propContext) m_context += ch; else if (m_state == propPluralForm) m_pluralForm += ch; else if (m_state == propApproved) m_approvedString += ch; return true; } ImportTmxJob::ImportTmxJob(const QString& filename, const QString& dbName) : QRunnable() , m_filename(filename) , m_time(0) , m_dbName(dbName) { } ImportTmxJob::~ImportTmxJob() { qCWarning(LOKALIZE_LOG) << "ImportTmxJob dtor"; } void ImportTmxJob::run() { QTime a; a.start(); QFile file(m_filename); if (!file.open(QFile::ReadOnly | QFile::Text)) return; TmxParser parser(m_dbName); QXmlSimpleReader reader; reader.setContentHandler(&parser); QXmlInputSource xmlInputSource(&file); if (!reader.parse(xmlInputSource)) qCWarning(LOKALIZE_LOG) << "failed to load" << m_filename; //qCWarning(LOKALIZE_LOG) <<"Done scanning "< ExportTmxJob::ExportTmxJob(const QString& filename, const QString& dbName) : QRunnable() , m_filename(filename) , m_time(0) , m_dbName(dbName) { } ExportTmxJob::~ExportTmxJob() { qCDebug(LOKALIZE_LOG) << "ExportTmxJob dtor"; } void ExportTmxJob::run() { QTime a; a.start(); QFile out(m_filename); if (!out.open(QFile::WriteOnly | QFile::Text)) return; QXmlStreamWriter xmlOut(&out); xmlOut.setAutoFormatting(true); xmlOut.writeStartDocument(QStringLiteral("1.0")); xmlOut.writeStartElement(QStringLiteral("tmx")); xmlOut.writeAttribute(QStringLiteral("version"), QStringLiteral("2.0")); xmlOut.writeStartElement(QStringLiteral("header")); xmlOut.writeAttribute(QStringLiteral("creationtool"), QStringLiteral("lokalize")); xmlOut.writeAttribute(QStringLiteral("creationtoolversion"), QStringLiteral(LOKALIZE_VERSION)); xmlOut.writeAttribute(QStringLiteral("segtype"), QStringLiteral("paragraph")); xmlOut.writeAttribute(QStringLiteral("o-encoding"), QStringLiteral("UTF-8")); xmlOut.writeEndElement(); xmlOut.writeStartElement(QStringLiteral("body")); QString dbLangCode = Project::instance()->langCode(); QSqlDatabase db = QSqlDatabase::database(m_dbName); QSqlQuery query1(db); if (Q_UNLIKELY(!query1.exec(U( "SELECT main.id, main.ctxt, main.date, main.bits, " "source_strings.source, source_strings.source_accel, " "target_strings.target, target_strings.target_accel, " "files.path, main.change_date " "FROM main, source_strings, target_strings, files " "WHERE source_strings.id=main.source AND " "target_strings.id=main.target AND " "files.id=main.file")))) qCWarning(LOKALIZE_LOG) << "select error: " << query1.lastError().text(); TMConfig c = getConfig(db); const QString DATE_FORMAT = QStringLiteral("yyyyMMdd"); const QString PROP = QStringLiteral("prop"); const QString TYPE = QStringLiteral("type"); while (query1.next()) { QString source = makeAcceledString(query1.value(4).toString(), c.accel, query1.value(5)); QString target = makeAcceledString(query1.value(6).toString(), c.accel, query1.value(7)); xmlOut.writeStartElement(QStringLiteral("tu")); xmlOut.writeAttribute(QStringLiteral("tuid"), QString::number(query1.value(0).toLongLong())); xmlOut.writeStartElement(QStringLiteral("tuv")); xmlOut.writeAttribute(QStringLiteral("xml:lang"), QStringLiteral("en")); xmlOut.writeStartElement(QStringLiteral("seg")); xmlOut.writeCharacters(source); xmlOut.writeEndElement(); xmlOut.writeEndElement(); xmlOut.writeStartElement(QStringLiteral("tuv")); xmlOut.writeAttribute(QStringLiteral("xml:lang"), dbLangCode); xmlOut.writeAttribute(QStringLiteral("creationdate"), QDate::fromString(query1.value(2).toString(), Qt::ISODate).toString(DATE_FORMAT)); xmlOut.writeAttribute(QStringLiteral("changedate"), QDate::fromString(query1.value(9).toString(), Qt::ISODate).toString(DATE_FORMAT)); QString ctxt = query1.value(1).toString(); if (!ctxt.isEmpty()) { int pos = ctxt.indexOf(TM_DELIMITER); if (pos != -1) { QString plural = ctxt; plural.remove(0, pos + 1); ctxt.remove(pos, plural.size()); xmlOut.writeStartElement(PROP); xmlOut.writeAttribute(TYPE, "x-pluralform"); xmlOut.writeCharacters(plural); xmlOut.writeEndElement(); } if (!ctxt.isEmpty()) { xmlOut.writeStartElement(PROP); xmlOut.writeAttribute(TYPE, "x-context"); xmlOut.writeCharacters(ctxt); xmlOut.writeEndElement(); } } QString filePath = query1.value(8).toString(); if (!filePath.isEmpty()) { xmlOut.writeStartElement(PROP); xmlOut.writeAttribute(TYPE, "x-file"); xmlOut.writeCharacters(filePath); xmlOut.writeEndElement(); } qlonglong bits = query1.value(8).toLongLong(); if (bits & TM_NOTAPPROVED) if (!filePath.isEmpty()) { xmlOut.writeStartElement(PROP); xmlOut.writeAttribute(TYPE, "x-approved"); xmlOut.writeCharacters("no"); xmlOut.writeEndElement(); } xmlOut.writeStartElement(QStringLiteral("seg")); xmlOut.writeCharacters(target); xmlOut.writeEndElement(); xmlOut.writeEndElement(); xmlOut.writeEndElement(); } query1.clear(); xmlOut.writeEndDocument(); out.close(); qCWarning(LOKALIZE_LOG) << "ExportTmxJob done exporting:" << a.elapsed(); m_time = a.elapsed(); } //END TMX ExecQueryJob::ExecQueryJob(const QString& queryString, const QString& dbName) : QObject(), QRunnable() , query(0) , m_dbName(dbName) , m_query(queryString) { setAutoDelete(false); //qCDebug(LOKALIZE_LOG)<<"ExecQueryJob"<lastError().text(); emit done(this); }