diff --git a/src/metadata/filemetadata.cpp b/src/metadata/filemetadata.cpp index 69287fd..1e6e4ca 100644 --- a/src/metadata/filemetadata.cpp +++ b/src/metadata/filemetadata.cpp @@ -1,111 +1,109 @@ /* **************************************************************************** This file is part of Lokalize Copyright (C) 2018 by Karl Ove Hufthammer Copyright (C) 2007-2014 by Nick Shaforostoff Copyright (C) 2009 by Viesturs Zarins Copyright (C) 2018-2019 by Simon Depiets Copyright (C) 2019 by Alexander Potashev This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License or (at your option) version 3 or any later version accepted by the membership of KDE e.V. (or its successor approved by the membership of KDE e.V.), which shall act as a proxy defined in Section 14 of version 3 of the license. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . **************************************************************************** */ #include "filemetadata.h" #include "poextractor.h" #include "xliffextractor.h" FileMetaData::FileMetaData() : invalid_file(false) , translated(0) , translated_reviewer(0) , translated_approver(0) , untranslated(0) , fuzzy(0) , fuzzy_reviewer(0) , fuzzy_approver(0) {} // static FileMetaData FileMetaData::extract(const QString &filePath) { - FileMetaData m; - if (filePath.endsWith(QLatin1String(".po")) || filePath.endsWith(QLatin1String(".pot"))) { POExtractor extractor; - extractor.extract(filePath, m); + return extractor.extract(filePath); } else if (filePath.endsWith(QLatin1String(".xlf")) || filePath.endsWith(QLatin1String(".xliff"))) { XliffExtractor extractor; - extractor.extract(filePath, m); + return extractor.extract(filePath); } else if (filePath.endsWith(QLatin1String(".ts"))) { //POExtractor extractor; //extractor.extract(filePath, m); } - return m; + return {}; } QDataStream &operator<<(QDataStream &s, const FileMetaData &d) { //Magic number s << (quint32)0xABC42BCA; //Version s << (qint32)1; s << d.translated; s << d.translated_approver; s << d.translated_reviewer; s << d.fuzzy; s << d.fuzzy_approver; s << d.fuzzy_reviewer; s << d.untranslated; s << d.lastTranslator; s << d.translationDate; s << d.sourceDate; s << d.invalid_file; return s; } QDataStream &operator>>(QDataStream &s, FileMetaData &d) { //Read the magic number qint32 version = 0; quint32 magic; s >> magic; if (magic == 0xABC42BCA) { //This is a valid magic number, we can expect a version number //Else it's the old format s >> version; s >> d.translated; } else { //Legacy format, the magic number was actually the translated count d.translated = magic; } s >> d.translated_approver; s >> d.translated_reviewer; s >> d.fuzzy; s >> d.fuzzy_approver; s >> d.fuzzy_reviewer; s >> d.untranslated; s >> d.lastTranslator; s >> d.translationDate; s >> d.sourceDate; if (version >= 1) { s >> d.invalid_file; } return s; } diff --git a/src/metadata/poextractor.cpp b/src/metadata/poextractor.cpp index 5fb3bb3..11da64c 100644 --- a/src/metadata/poextractor.cpp +++ b/src/metadata/poextractor.cpp @@ -1,192 +1,194 @@ /* Gettext translation file analyzer Copyright (C) 2007 Montel Laurent Copyright (C) 2009 Jos van den Oever Copyright (C) 2014 Nick Shaforostoff 2018-2019 by Simon Depiets This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ - #include "poextractor.h" #include #include POExtractor::POExtractor() : state(WHITESPACE) , messages(0) , untranslated(0) , fuzzy(0) , isFuzzy(false) , isTranslated(false) { } void POExtractor::endMessage() { messages++; if (isTranslated) fuzzy += isFuzzy; untranslated += (!isTranslated); isFuzzy = false; isTranslated = false; state = WHITESPACE; } void POExtractor::handleComment(const char* data, uint32_t length) { state = COMMENT; if (length >= 8 && strncmp(data, "#, fuzzy", 8) == 0) { // could be better isFuzzy = true; } } void POExtractor::handleLine(const char* data, uint32_t length) { if (state == ERROR) return; if (state == WHITESPACE) { if (length == 0) return; if (data[0] != '#') { state = COMMENT; //this allows PO files w/o comments } else { handleComment(data, length); return; } } if (state == COMMENT) { if (length == 0) { state = WHITESPACE; } else if (data[0] == '#') { handleComment(data, length); } else if (length > 7 && strncmp("msgctxt", data, 7) == 0) { state = MSGCTXT; } else if (length > 7 && strncmp("msgid \"", data, 7) == 0) { state = MSGID; } else { state = ERROR; } return; } else if (length > 1 && data[0] == '"' && data[length - 1] == '"' && (state == MSGCTXT || state == MSGID || state == MSGSTR || state == MSGID_PLURAL)) { // continued text field isTranslated = state == MSGSTR && length > 2; } else if (state == MSGCTXT && length > 7 && strncmp("msgid \"", data, 7) == 0) { state = MSGID; } else if (state == MSGID && length > 14 && strncmp("msgid_plural \"", data, 14) == 0) { state = MSGID_PLURAL; } else if ((state == MSGID || state == MSGID_PLURAL || state == MSGSTR) && length > 8 && strncmp("msgstr", data, 6) == 0) { state = MSGSTR; isTranslated = strncmp(data + length - 3, " \"\"", 3) != 0; } else if (state == MSGSTR) { if (length == 0) { endMessage(); } else if (data[0] == '#' || data[0] == 'm') { //allow PO without empty line between entries endMessage(); state = COMMENT; handleLine(data, length); } else { state = ERROR; } } else { state = ERROR; } #if 0 if (messages > 1 || state != MSGSTR) return; // handle special values in the first messsage // assumption is that value takes up only one line if (strncmp("\"POT-Creation-Date: ", data, 20) == 0) { result->add(Property::TranslationTemplateDate, QByteArray(data + 20, length - 21)); } else if (strncmp("\"PO-Revision-Date: ", data, 19) == 0) { result->add(Property::TranslationLastUpDate, QByteArray(data + 19, length - 20)); } else if (strncmp("\"Last-Translator: ", data, 18) == 0) { result->add(Property::TranslationLastAuthor, QByteArray(data + 18, length - 19)); } #endif } -void POExtractor::extract(const QString& filePath, FileMetaData& m) +FileMetaData POExtractor::extract(const QString& filePath) { std::ifstream fstream(QFile::encodeName(filePath)); if (!fstream.is_open()) { - return; + return {}; } state = WHITESPACE; messages = 0; untranslated = 0; fuzzy = 0; isFuzzy = false; isTranslated = false; std::string line; int lines = 0; + FileMetaData m; while (std::getline(fstream, line)) { //TODO add a parsed text of translation units //QByteArray arr = QByteArray::fromRawData(line.c_str(), line.size()); //result->append(QString::fromUtf8(arr)); handleLine(line.c_str(), line.size()); lines++; if (messages <= 1 && state == MSGSTR) { // handle special values in the first messsage // assumption is that value takes up only one line if (strncmp("\"POT-Creation-Date: ", line.c_str(), 20) == 0) { m.sourceDate = QByteArray(line.c_str() + 20, line.size() - 21 - 2); } else if (strncmp("\"PO-Revision-Date: ", line.c_str(), 19) == 0) { m.translationDate = QByteArray(line.c_str() + 19, line.size() - 20 - 2); } else if (strncmp("\"Last-Translator: ", line.c_str(), 18) == 0) { m.lastTranslator = QString::fromUtf8(QByteArray::fromRawData(line.c_str() + 18, line.size() - 19 - 2)); } fuzzy = 0; } } handleLine("", 0); //for files with non-empty last line messages--;//cause header does not count /* result->add(Property::TranslationUnitsTotal, messages); result->add(Property::TranslationUnitsWithTranslation, messages-untranslated); result->add(Property::TranslationUnitsWithDraftTranslation, fuzzy); result->add(Property::LineCount, lines); */ //TODO WordCount m.fuzzy = fuzzy; m.translated = messages - untranslated - fuzzy; m.untranslated = untranslated; m.filePath = filePath; //File is invalid if (messages < 0 || fuzzy < 0 || untranslated < 0) { m.invalid_file = true; m.translated = 0; m.untranslated = 0; m.fuzzy = 0; } //TODO m.translated_approver = m.translated_reviewer = m.translated; m.fuzzy_approver = m.fuzzy_reviewer = m.fuzzy; + + return m; } diff --git a/src/metadata/poextractor.h b/src/metadata/poextractor.h index f64e266..8f25bdf 100644 --- a/src/metadata/poextractor.h +++ b/src/metadata/poextractor.h @@ -1,54 +1,53 @@ /* Gettext translation file analyzer Copyright (C) 2007 Montel Laurent Copyright (C) 2009 Jos van den Oever Copyright (C) 2014 Nick Shaforostoff 2018-2019 by Simon Depiets This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef POEXTRACTOR_H #define POEXTRACTOR_H #include "filemetadata.h" class POExtractor { public: POExtractor(); - void extract(const QString& filePath, FileMetaData& data); + FileMetaData extract(const QString& filePath); private: void endMessage(); void handleComment(const char* data, uint32_t length); void handleLine(const char* data, uint32_t length); enum PoState {COMMENT, MSGCTXT, MSGID, MSGID_PLURAL, MSGSTR, MSGSTR_PLURAL, WHITESPACE, ERROR }; PoState state; int messages; int untranslated; int fuzzy; bool isFuzzy, isTranslated; }; - #endif // PLAINTEXTEXTRACTOR_H diff --git a/src/metadata/xliffextractor.cpp b/src/metadata/xliffextractor.cpp index a6165eb..1ef4c25 100644 --- a/src/metadata/xliffextractor.cpp +++ b/src/metadata/xliffextractor.cpp @@ -1,180 +1,180 @@ /* XLIFF translation file analyzer Copyright (C) 2011 Albert Astals Cid Copyright (C) 2015 Nick Shaforostoff 2018-2019 by Simon Depiets This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "xliffextractor.h" #include #include #include "lokalize_debug.h" #include "catalog/catalog.h" -XliffExtractor::XliffExtractor() -{ -} - class XliffHandler: public QXmlDefaultHandler { public: XliffHandler() : total(0) , untranslated(0) , fuzzy(0) , fuzzy_reviewer(0) , fuzzy_approver(0) , currentEntryFuzzy(false) , currentEntryFuzzy_reviewer(false) , currentEntryFuzzy_approver(false) , charCount(0) {} bool startElement(const QString& namespaceURI, const QString& localName, const QString& qName, const QXmlAttributes& atts) override; bool endElement(const QString& namespaceURI, const QString& localName, const QString& qName) override; bool characters(const QString&) override; //void endAnalysis(bool complete); int total; int untranslated; int fuzzy; int fuzzy_reviewer; int fuzzy_approver; QDate lastDate; QString lastTranslator; QString lastTranslator_fallback; QString lastDateString_fallback; private: bool currentEntryFuzzy; bool currentEntryFuzzy_reviewer; bool currentEntryFuzzy_approver; int charCount; }; extern const QString xliff_states[]; TargetState stringToState(const QString& state); bool XliffHandler::startElement(const QString&, const QString& localName, const QString&, const QXmlAttributes& atts) { //if (fileType == Unknown) // fileType = strcmp(localname, "xliff") ? Other : XLF; if (localName == QLatin1String("source")) total++; else if (localName == QLatin1String("target")) { charCount = 0; currentEntryFuzzy = currentEntryFuzzy_reviewer = currentEntryFuzzy_approver = false; if (atts.value(QLatin1String("approved")) != QLatin1String("yes")) { QString state = atts.value(QLatin1String("state")); if (state.length()) { TargetState tstate = stringToState(state); currentEntryFuzzy = !::isApproved(tstate, ProjectLocal::Translator); currentEntryFuzzy_reviewer = !::isApproved(tstate, ProjectLocal::Reviewer); currentEntryFuzzy_approver = !::isApproved(tstate, ProjectLocal::Approver); } } } else if (localName == QLatin1String("phase")) { QString contactNameString = atts.value(QLatin1String("contact-name")); QString contactEmailString = atts.value(QLatin1String("contact-email")); QString dateString = atts.value(QLatin1String("date")); QString currentLastTranslator; if (contactNameString.length() && contactEmailString.length()) currentLastTranslator = contactNameString % " <" % contactEmailString % ">"; else if (contactNameString.length()) currentLastTranslator = contactNameString; else if (contactEmailString.length()) currentLastTranslator = contactEmailString; if (currentLastTranslator.length()) lastTranslator_fallback = currentLastTranslator; if (dateString.length()) { lastDateString_fallback = dateString; const QDate thisDate = QDate::fromString(dateString, Qt::ISODate); if (lastDate.isNull() || thisDate >= lastDate) { // >= Assuming the last one in the file is the real last one lastDate = thisDate; lastTranslator = currentLastTranslator; } } } return true; } bool XliffHandler::endElement(const QString&, const QString& localName, const QString&) { if (localName == QLatin1String("target")) { if (!charCount) { ++untranslated; } else if (currentEntryFuzzy) { ++fuzzy; ++fuzzy_reviewer; ++fuzzy_approver; } else if (currentEntryFuzzy_reviewer) { ++fuzzy_reviewer; ++fuzzy_approver; } else if (currentEntryFuzzy_approver) { ++fuzzy_approver; } } return true; } bool XliffHandler::characters(const QString& ch) { charCount += ch.length(); return true; } -void XliffExtractor::extract(const QString& filePath, FileMetaData& m) +FileMetaData XliffExtractor::extract(const QString& filePath) { QFile file(filePath); - if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) - return; + if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) { + return {}; + } QXmlInputSource source(&file); QXmlSimpleReader xmlReader; XliffHandler handler; xmlReader.setContentHandler(&handler); bool ok = xmlReader.parse(source); if (!ok) qCDebug(LOKALIZE_LOG) << "Parsing failed."; //TODO WordCount + FileMetaData m; m.fuzzy = handler.fuzzy; m.translated = handler.total - handler.untranslated - handler.fuzzy; m.untranslated = handler.untranslated; m.filePath = filePath; //qCDebug(LOKALIZE_LOG)<<"parsed"<= 0 && m.untranslated >= 0 && handler.total >= 0); m.translated_approver = handler.total - handler.untranslated - handler.fuzzy_approver; m.translated_reviewer = handler.total - handler.untranslated - handler.fuzzy_reviewer; m.fuzzy_approver = handler.fuzzy_approver; m.fuzzy_reviewer = handler.fuzzy_reviewer; m.lastTranslator = handler.lastTranslator.length() ? handler.lastTranslator : handler.lastTranslator_fallback; m.translationDate = handler.lastDate.isValid() ? handler.lastDate.toString(Qt::ISODate) : handler.lastDateString_fallback; + + return m; } diff --git a/src/metadata/xliffextractor.h b/src/metadata/xliffextractor.h index bf71d6e..7e25fa6 100644 --- a/src/metadata/xliffextractor.h +++ b/src/metadata/xliffextractor.h @@ -1,38 +1,36 @@ /* XLIFF translation file analyzer Copyright (C) 2011 Albert Astals Cid Copyright (C) 2015 Nick Shaforostoff 2018-2019 by Simon Depiets This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ - #ifndef XLIFFEXTRACTOR_H #define XLIFFEXTRACTOR_H #include "filemetadata.h" class XliffExtractor { public: - XliffExtractor(); - void extract(const QString& filePath, FileMetaData& data); + XliffExtractor() = default; + FileMetaData extract(const QString& filePath); }; - #endif // PLAINTEXTEXTRACTOR_H