diff --git a/data/schema/language.xsd b/data/schema/language.xsd index 8c21c35..dc5b081 100644 --- a/data/schema/language.xsd +++ b/data/schema/language.xsd @@ -1,680 +1,680 @@ diff --git a/src/lib/definition.cpp b/src/lib/definition.cpp index b6a9764..67bcd51 100644 --- a/src/lib/definition.cpp +++ b/src/lib/definition.cpp @@ -1,795 +1,797 @@ /* Copyright (C) 2016 Volker Krause Copyright (C) 2018 Dominik Haumann Copyright (C) 2018 Christoph Cullmann Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "definition.h" #include "definition_p.h" #include "definitionref_p.h" #include "context_p.h" #include "format.h" #include "format_p.h" #include "repository_p.h" #include "rule_p.h" #include "ksyntaxhighlighting_logging.h" #include "ksyntaxhighlighting_version.h" #include "xml_p.h" #include #include #include #include #include #include #include #include #include using namespace KSyntaxHighlighting; DefinitionData::DefinitionData() : wordDelimiters(QStringLiteral("\t !%&()*+,-./:;<=>?[\\]^{|}~")) // must be sorted! , wordWrapDelimiters(wordDelimiters) { } DefinitionData::~DefinitionData() { qDeleteAll(contexts); } DefinitionData* DefinitionData::get(const Definition &def) { return def.d.get(); } Definition::Definition() : d(new DefinitionData) { } Definition::Definition(const Definition &other) : d(other.d) { d->q = *this; } Definition::Definition(const std::shared_ptr &dd) : d(dd) { } Definition::~Definition() { } Definition& Definition::operator=(const Definition &rhs) { d = rhs.d; return *this; } bool Definition::operator==(const Definition &other) const { return d->fileName == other.d->fileName; } bool Definition::operator!=(const Definition& other) const { return d->fileName != other.d->fileName; } bool Definition::isValid() const { return d->repo && !d->fileName.isEmpty() && !d->name.isEmpty(); } QString Definition::filePath() const { return d->fileName; } QString Definition::name() const { return d->name; } QString Definition::translatedName() const { return QCoreApplication::instance()->translate("Language", d->name.toUtf8().constData()); } QString Definition::section() const { return d->section; } QString Definition::translatedSection() const { return QCoreApplication::instance()->translate("Language Section", d->section.toUtf8().constData()); } QVector Definition::mimeTypes() const { return d->mimetypes; } QVector Definition::extensions() const { return d->extensions; } int Definition::version() const { return d->version; } int Definition::priority() const { return d->priority; } bool Definition::isHidden() const { return d->hidden; } QString Definition::style() const { return d->style; } QString Definition::indenter() const { return d->indenter; } QString Definition::author() const { return d->author; } QString Definition::license() const { return d->license; } bool Definition::isWordDelimiter(QChar c) const { d->load(); return d->isWordDelimiter(c); } bool Definition::isWordWrapDelimiter(QChar c) const { d->load(); return std::binary_search(d->wordWrapDelimiters.constBegin(), d->wordWrapDelimiters.constEnd(), c); } bool Definition::foldingEnabled() const { d->load(); if (d->hasFoldingRegions || indentationBasedFoldingEnabled()) { return true; } // check included definitions for (const auto &def : includedDefinitions()) { if (def.foldingEnabled()) { d->hasFoldingRegions = true; break; } } return d->hasFoldingRegions; } bool Definition::indentationBasedFoldingEnabled() const { d->load(); return d->indentationBasedFolding; } QStringList Definition::foldingIgnoreList() const { d->load(); return d->foldingIgnoreList; } QStringList Definition::keywordLists() const { d->load(); return d->keywordLists.keys(); } QStringList Definition::keywordList(const QString& name) const { d->load(); - return d->keywordList(name).keywords(); + const auto list = d->keywordList(name); + return list ? list->keywords() : QStringList(); } QVector Definition::formats() const { d->load(); // sort formats so that the order matches the order of the itemDatas in the xml files. auto formatList = QVector::fromList(d->formats.values()); std::sort(formatList.begin(), formatList.end(), [](const KSyntaxHighlighting::Format & lhs, const KSyntaxHighlighting::Format & rhs){ return lhs.id() < rhs.id(); }); return formatList; } QVector Definition::includedDefinitions() const { d->load(); // init worklist and result used as guard with this definition QVector queue{*this}; QVector definitions{*this}; while (!queue.isEmpty()) { // Iterate all context rules to find associated Definitions. This will // automatically catch other Definitions referenced with IncludeRuldes or ContextSwitch. const auto definition = queue.takeLast(); for (const auto & context : definition.d->contexts) { // handle context switch attributes of this context itself for (const auto switchContext : {context->lineEndContext().context(), context->lineEmptyContext().context(), context->fallthroughContext().context()}) { if (switchContext) { if (!definitions.contains(switchContext->definition())) { queue.push_back(switchContext->definition()); definitions.push_back(switchContext->definition()); } } } // handle the embedded rules for (const auto &rule : context->rules()) { // handle include rules like inclusion if (!definitions.contains(rule->definition())) { queue.push_back(rule->definition()); definitions.push_back(rule->definition()); } // handle context switch context inclusion if (auto switchContext = rule->context().context()) { if (!definitions.contains(switchContext->definition())) { queue.push_back(switchContext->definition()); definitions.push_back(switchContext->definition()); } } } } } // remove the 1st entry, since it is this Definition definitions.pop_front(); return definitions; } QString Definition::singleLineCommentMarker() const { d->load(); return d->singleLineCommentMarker; } CommentPosition Definition::singleLineCommentPosition() const { d->load(); return d->singleLineCommentPosition; } QPair Definition::multiLineCommentMarker() const { d->load(); return { d->multiLineCommentStartMarker, d->multiLineCommentEndMarker }; } QVector> Definition::characterEncodings() const { d->load(); return d->characterEncodings; } Context* DefinitionData::initialContext() const { Q_ASSERT(!contexts.isEmpty()); return contexts.first(); } Context* DefinitionData::contextByName(const QString& name) const { foreach (auto context, contexts) { if (context->name() == name) return context; } return nullptr; } -KeywordList DefinitionData::keywordList(const QString& name) const +KeywordList *DefinitionData::keywordList(const QString& name) { - return keywordLists.value(name); + auto it = keywordLists.find(name); + return (it == keywordLists.end()) ? nullptr : &it.value(); } bool DefinitionData::isWordDelimiter(QChar c) const { return std::binary_search(wordDelimiters.constBegin(), wordDelimiters.constEnd(), c); } Format DefinitionData::formatByName(const QString& name) const { const auto it = formats.constFind(name); if (it != formats.constEnd()) return it.value(); return Format(); } bool DefinitionData::isLoaded() const { return !contexts.isEmpty(); } bool DefinitionData::load() { if (fileName.isEmpty()) return false; if (isLoaded()) return true; QFile file(fileName); if (!file.open(QFile::ReadOnly)) return false; QXmlStreamReader reader(&file); while (!reader.atEnd()) { const auto token = reader.readNext(); if (token != QXmlStreamReader::StartElement) continue; if (reader.name() == QLatin1String("highlighting")) loadHighlighting(reader); else if (reader.name() == QLatin1String("general")) loadGeneral(reader); } for (auto it = keywordLists.begin(); it != keywordLists.end(); ++it) (*it).setCaseSensitivity(caseSensitive); foreach (auto context, contexts) { context->resolveContexts(); context->resolveIncludes(); context->resolveAttributeFormat(); } Q_ASSERT(std::is_sorted(wordDelimiters.constBegin(), wordDelimiters.constEnd())); return true; } void DefinitionData::clear() { // keep only name and repo, so we can re-lookup to make references persist over repo reloads keywordLists.clear(); qDeleteAll(contexts); contexts.clear(); formats.clear(); fileName.clear(); section.clear(); style.clear(); indenter.clear(); author.clear(); license.clear(); mimetypes.clear(); extensions.clear(); wordDelimiters = QStringLiteral("\t !%&()*+,-./:;<=>?[\\]^{|}~"); // must be sorted! wordWrapDelimiters = wordDelimiters; caseSensitive = Qt::CaseSensitive; version = 0.0f; priority = 0; hidden = false; } bool DefinitionData::loadMetaData(const QString& definitionFileName) { fileName = definitionFileName; QFile file(definitionFileName); if (!file.open(QFile::ReadOnly)) return false; QXmlStreamReader reader(&file); while (!reader.atEnd()) { const auto token = reader.readNext(); if (token != QXmlStreamReader::StartElement) continue; if (reader.name() == QLatin1String("language")) { return loadLanguage(reader); } } return false; } bool DefinitionData::loadMetaData(const QString &file, const QJsonObject &obj) { name = obj.value(QLatin1String("name")).toString(); section = obj.value(QLatin1String("section")).toString(); version = obj.value(QLatin1String("version")).toInt(); priority = obj.value(QLatin1String("priority")).toInt(); style = obj.value(QLatin1String("style")).toString(); author = obj.value(QLatin1String("author")).toString(); license = obj.value(QLatin1String("license")).toString(); indenter = obj.value(QLatin1String("indenter")).toString(); hidden = obj.value(QLatin1String("hidden")).toBool(); fileName = file; const auto exts = obj.value(QLatin1String("extensions")).toString(); foreach (const auto &ext, exts.split(QLatin1Char(';'), QString::SkipEmptyParts)) extensions.push_back(ext); const auto mts = obj.value(QLatin1String("mimetype")).toString(); foreach (const auto &mt, mts.split(QLatin1Char(';'), QString::SkipEmptyParts)) mimetypes.push_back(mt); return true; } bool DefinitionData::loadLanguage(QXmlStreamReader &reader) { Q_ASSERT(reader.name() == QLatin1String("language")); Q_ASSERT(reader.tokenType() == QXmlStreamReader::StartElement); if (!checkKateVersion(reader.attributes().value(QStringLiteral("kateversion")))) return false; name = reader.attributes().value(QStringLiteral("name")).toString(); section = reader.attributes().value(QStringLiteral("section")).toString(); // toFloat instead of toInt for backward compatibility with old Kate files version = reader.attributes().value(QStringLiteral("version")).toFloat(); priority = reader.attributes().value(QStringLiteral("priority")).toInt(); hidden = Xml::attrToBool(reader.attributes().value(QStringLiteral("hidden"))); style = reader.attributes().value(QStringLiteral("style")).toString(); indenter = reader.attributes().value(QStringLiteral("indenter")).toString(); author = reader.attributes().value(QStringLiteral("author")).toString(); license = reader.attributes().value(QStringLiteral("license")).toString(); const auto exts = reader.attributes().value(QStringLiteral("extensions")).toString(); foreach (const auto &ext, exts.split(QLatin1Char(';'), QString::SkipEmptyParts)) extensions.push_back(ext); const auto mts = reader.attributes().value(QStringLiteral("mimetype")).toString(); foreach (const auto &mt, mts.split(QLatin1Char(';'), QString::SkipEmptyParts)) mimetypes.push_back(mt); if (reader.attributes().hasAttribute(QStringLiteral("casesensitive"))) caseSensitive = Xml::attrToBool(reader.attributes().value(QStringLiteral("casesensitive"))) ? Qt::CaseSensitive : Qt::CaseInsensitive; return true; } void DefinitionData::loadHighlighting(QXmlStreamReader& reader) { Q_ASSERT(reader.name() == QLatin1String("highlighting")); Q_ASSERT(reader.tokenType() == QXmlStreamReader::StartElement); while (!reader.atEnd()) { switch (reader.tokenType()) { case QXmlStreamReader::StartElement: if (reader.name() == QLatin1String("list")) { KeywordList keywords; keywords.load(reader); keywordLists.insert(keywords.name(), keywords); } else if (reader.name() == QLatin1String("contexts")) { loadContexts(reader); reader.readNext(); } else if (reader.name() == QLatin1String("itemDatas")) { loadItemData(reader); } else { reader.readNext(); } break; case QXmlStreamReader::EndElement: return; default: reader.readNext(); break; } } } void DefinitionData::loadContexts(QXmlStreamReader& reader) { Q_ASSERT(reader.name() == QLatin1String("contexts")); Q_ASSERT(reader.tokenType() == QXmlStreamReader::StartElement); while (!reader.atEnd()) { switch (reader.tokenType()) { case QXmlStreamReader::StartElement: if (reader.name() == QLatin1String("context")) { auto context = new Context; context->setDefinition(q); context->load(reader); contexts.push_back(context); } reader.readNext(); break; case QXmlStreamReader::EndElement: return; default: reader.readNext(); break; } } } void DefinitionData::loadItemData(QXmlStreamReader& reader) { Q_ASSERT(reader.name() == QLatin1String("itemDatas")); Q_ASSERT(reader.tokenType() == QXmlStreamReader::StartElement); while (!reader.atEnd()) { switch (reader.tokenType()) { case QXmlStreamReader::StartElement: if (reader.name() == QLatin1String("itemData")) { Format f; auto formatData = FormatPrivate::detachAndGet(f); formatData->definition = q; formatData->load(reader); formatData->id = RepositoryPrivate::get(repo)->nextFormatId(); formats.insert(f.name(), f); reader.readNext(); } reader.readNext(); break; case QXmlStreamReader::EndElement: return; default: reader.readNext(); break; } } } void DefinitionData::loadGeneral(QXmlStreamReader& reader) { Q_ASSERT(reader.name() == QLatin1String("general")); Q_ASSERT(reader.tokenType() == QXmlStreamReader::StartElement); reader.readNext(); // reference counter to count XML child elements, to not return too early int elementRefCounter = 1; while (!reader.atEnd()) { switch (reader.tokenType()) { case QXmlStreamReader::StartElement: ++elementRefCounter; if (reader.name() == QLatin1String("keywords")) { if (reader.attributes().hasAttribute(QStringLiteral("casesensitive"))) caseSensitive = Xml::attrToBool(reader.attributes().value(QStringLiteral("casesensitive"))) ? Qt::CaseSensitive : Qt::CaseInsensitive; // adapt sorted wordDelimiters wordDelimiters += reader.attributes().value(QStringLiteral("additionalDeliminator")); std::sort(wordDelimiters.begin(), wordDelimiters.end()); auto it = std::unique(wordDelimiters.begin(), wordDelimiters.end()); wordDelimiters.truncate(std::distance(wordDelimiters.begin(), it)); foreach (const auto c, reader.attributes().value(QLatin1String("weakDeliminator"))) wordDelimiters.remove(c); // adaptWordWrapDelimiters, and sort wordWrapDelimiters = reader.attributes().value(QStringLiteral("wordWrapDeliminator")).toString(); std::sort(wordWrapDelimiters.begin(), wordWrapDelimiters.end()); if (wordWrapDelimiters.isEmpty()) wordWrapDelimiters = wordDelimiters; } else if (reader.name() == QLatin1String("folding")) { if (reader.attributes().hasAttribute(QStringLiteral("indentationsensitive"))) indentationBasedFolding = Xml::attrToBool(reader.attributes().value(QStringLiteral("indentationsensitive"))); } else if (reader.name() == QLatin1String("emptyLines")) { loadFoldingIgnoreList(reader); } else if (reader.name() == QLatin1String("comments")) { loadComments(reader); } else if (reader.name() == QLatin1String("spellchecking")) { loadSpellchecking(reader); } else { reader.skipCurrentElement(); } reader.readNext(); break; case QXmlStreamReader::EndElement: --elementRefCounter; if (elementRefCounter == 0) return; reader.readNext(); break; default: reader.readNext(); break; } } } void DefinitionData::loadComments(QXmlStreamReader &reader) { Q_ASSERT(reader.name() == QLatin1String("comments")); Q_ASSERT(reader.tokenType() == QXmlStreamReader::StartElement); reader.readNext(); // reference counter to count XML child elements, to not return too early int elementRefCounter = 1; while (!reader.atEnd()) { switch (reader.tokenType()) { case QXmlStreamReader::StartElement: ++elementRefCounter; if (reader.name() == QLatin1String("comment")) { const bool isSingleLine = reader.attributes().value(QStringLiteral("name")) == QStringLiteral("singleLine"); if (isSingleLine) { singleLineCommentMarker = reader.attributes().value(QStringLiteral("start")).toString(); const bool afterWhiteSpace = reader.attributes().value(QStringLiteral("position")).toString() == QStringLiteral("afterwhitespace"); singleLineCommentPosition = afterWhiteSpace ? CommentPosition::AfterWhitespace : CommentPosition::StartOfLine; } else { multiLineCommentStartMarker = reader.attributes().value(QStringLiteral("start")).toString(); multiLineCommentEndMarker = reader.attributes().value(QStringLiteral("end")).toString(); } } reader.readNext(); break; case QXmlStreamReader::EndElement: --elementRefCounter; if (elementRefCounter == 0) return; reader.readNext(); break; default: reader.readNext(); break; } } } void DefinitionData::loadFoldingIgnoreList(QXmlStreamReader& reader) { Q_ASSERT(reader.name() == QLatin1String("emptyLines")); Q_ASSERT(reader.tokenType() == QXmlStreamReader::StartElement); reader.readNext(); // reference counter to count XML child elements, to not return too early int elementRefCounter = 1; while (!reader.atEnd()) { switch (reader.tokenType()) { case QXmlStreamReader::StartElement: ++elementRefCounter; if (reader.name() == QLatin1String("emptyLine")) { foldingIgnoreList << reader.attributes().value(QStringLiteral("regexpr")).toString(); } reader.readNext(); break; case QXmlStreamReader::EndElement: --elementRefCounter; if (elementRefCounter == 0) return; reader.readNext(); break; default: reader.readNext(); break; } } } void DefinitionData::loadSpellchecking(QXmlStreamReader &reader) { Q_ASSERT(reader.name() == QLatin1String("spellchecking")); Q_ASSERT(reader.tokenType() == QXmlStreamReader::StartElement); reader.readNext(); // reference counter to count XML child elements, to not return too early int elementRefCounter = 1; while (!reader.atEnd()) { switch (reader.tokenType()) { case QXmlStreamReader::StartElement: ++elementRefCounter; if (reader.name() == QLatin1String("encoding")) { const auto charRef = reader.attributes().value(QStringLiteral("char")); if (!charRef.isEmpty()) { const auto str = reader.attributes().value(QStringLiteral("string")).toString(); characterEncodings.push_back({ charRef[0], str }); } } reader.readNext(); break; case QXmlStreamReader::EndElement: --elementRefCounter; if (elementRefCounter == 0) return; reader.readNext(); break; default: reader.readNext(); break; } } } bool DefinitionData::checkKateVersion(const QStringRef& verStr) { const auto idx = verStr.indexOf(QLatin1Char('.')); if (idx <= 0) { qCWarning(Log) << "Skipping" << fileName << "due to having no valid kateversion attribute:" << verStr; return false; } const auto major = verStr.left(idx).toInt(); const auto minor = verStr.mid(idx + 1).toInt(); if (major > SyntaxHighlighting_VERSION_MAJOR || (major == SyntaxHighlighting_VERSION_MAJOR && minor > SyntaxHighlighting_VERSION_MINOR)) { qCWarning(Log) << "Skipping" << fileName << "due to being too new, version:" << verStr; return false; } return true; } quint16 DefinitionData::foldingRegionId(const QString &foldName) { hasFoldingRegions = true; return RepositoryPrivate::get(repo)->foldingRegionId(name, foldName); } DefinitionRef::DefinitionRef() { } DefinitionRef::DefinitionRef(const Definition &def) : d(def.d) { } DefinitionRef::~DefinitionRef() { } DefinitionRef& DefinitionRef::operator=(const Definition &def) { d = def.d; return *this; } Definition DefinitionRef::definition() const { if (!d.expired()) return Definition(d.lock()); return Definition(); } bool DefinitionRef::operator==(const DefinitionRef &other) const { if (d.expired() != other.d.expired()) { return false; } return d.expired() || d.lock().get() == other.d.lock().get(); } diff --git a/src/lib/definition_p.h b/src/lib/definition_p.h index 3c59099..ab95a95 100644 --- a/src/lib/definition_p.h +++ b/src/lib/definition_p.h @@ -1,111 +1,111 @@ /* Copyright (C) 2016 Volker Krause Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef KSYNTAXHIGHLIGHTING_DEFINITION_P_H #define KSYNTAXHIGHLIGHTING_DEFINITION_P_H #include "definitionref_p.h" #include "definition.h" #include #include #include QT_BEGIN_NAMESPACE class QXmlStreamReader; class QJsonObject; QT_END_NAMESPACE namespace KSyntaxHighlighting { class Repository; class DefinitionData { public: DefinitionData(); ~DefinitionData(); static DefinitionData* get(const Definition &def); bool isLoaded() const; bool loadMetaData(const QString &definitionFileName); bool loadMetaData(const QString &fileName, const QJsonObject &obj); void clear(); bool load(); bool loadLanguage(QXmlStreamReader &reader); void loadHighlighting(QXmlStreamReader &reader); void loadContexts(QXmlStreamReader &reader); void loadItemData(QXmlStreamReader &reader); void loadGeneral(QXmlStreamReader &reader); void loadComments(QXmlStreamReader &reader); void loadFoldingIgnoreList(QXmlStreamReader &reader); void loadSpellchecking(QXmlStreamReader &reader); bool checkKateVersion(const QStringRef &verStr); - KeywordList keywordList(const QString &name) const; + KeywordList *keywordList(const QString &name); bool isWordDelimiter(QChar c) const; Context* initialContext() const; Context* contextByName(const QString &name) const; Format formatByName(const QString &name) const; quint16 foldingRegionId(const QString &foldName); DefinitionRef q; Repository *repo = nullptr; QHash keywordLists; QVector contexts; QHash formats; QString wordDelimiters; QString wordWrapDelimiters; bool hasFoldingRegions = false; bool indentationBasedFolding = false; QStringList foldingIgnoreList; QString singleLineCommentMarker; CommentPosition singleLineCommentPosition = CommentPosition::StartOfLine; QString multiLineCommentStartMarker; QString multiLineCommentEndMarker; QVector> characterEncodings; QString fileName; QString name = QStringLiteral(QT_TRANSLATE_NOOP("Syntax highlighting", "None")); QString section; QString style; QString indenter; QString author; QString license; QVector mimetypes; QVector extensions; Qt::CaseSensitivity caseSensitive = Qt::CaseSensitive; int version = 0; int priority = 0; bool hidden = false; }; } #endif diff --git a/src/lib/keywordlist.cpp b/src/lib/keywordlist.cpp index 2624ae1..fe5f775 100644 --- a/src/lib/keywordlist.cpp +++ b/src/lib/keywordlist.cpp @@ -1,94 +1,104 @@ /* Copyright (C) 2016 Volker Krause Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "keywordlist_p.h" #include #include -using namespace KSyntaxHighlighting; - -bool KeywordList::isEmpty() const -{ - return m_keywords.isEmpty(); -} +#include -QString KeywordList::name() const -{ - return m_name; -} - -QStringList KeywordList::keywords() const -{ - return m_keywords.values(); -} - -bool KeywordList::contains(const QStringRef &str) const -{ - return contains(str, m_caseSensitive); -} +using namespace KSyntaxHighlighting; -bool KeywordList::contains(const QStringRef &str, Qt::CaseSensitivity caseSensitivityOverride) const +bool KeywordList::contains(const QStringRef &str, Qt::CaseSensitivity caseSensitive) const { - if (Q_UNLIKELY(caseSensitivityOverride == Qt::CaseInsensitive && m_lowerCaseKeywords.isEmpty())) { - foreach (const auto &kw, m_keywords) - m_lowerCaseKeywords.insert(kw.toLower()); - } - - // TODO avoid the copy in toString! - if (caseSensitivityOverride == Qt::CaseSensitive) - return m_keywords.contains(str.toString()); - return m_lowerCaseKeywords.contains(str.toString().toLower()); + /** + * get right vector to search in + */ + const auto &vectorToSearch = (caseSensitive == Qt::CaseSensitive) ? m_keywordsSortedCaseSensitive : m_keywordsSortedCaseInsensitive; + + /** + * search with right predicate + */ + return std::binary_search(vectorToSearch.begin(), vectorToSearch.end(), str, [caseSensitive] (const QStringRef &a, const QStringRef &b) { return a.compare(b, caseSensitive) < 0; }); } void KeywordList::load(QXmlStreamReader& reader) { Q_ASSERT(reader.name() == QLatin1String("list")); Q_ASSERT(reader.tokenType() == QXmlStreamReader::StartElement); m_name = reader.attributes().value(QStringLiteral("name")).toString(); while (!reader.atEnd()) { switch (reader.tokenType()) { case QXmlStreamReader::StartElement: if (reader.name() == QLatin1String("item")) { - m_keywords.insert(reader.readElementText().trimmed()); + m_keywords.append(reader.readElementText().trimmed()); reader.readNextStartElement(); break; } reader.readNext(); break; case QXmlStreamReader::EndElement: reader.readNext(); return; default: reader.readNext(); break; } } } void KeywordList::setCaseSensitivity(Qt::CaseSensitivity caseSensitive) { + /** + * remember default case-sensitivity and init lookup for it + */ m_caseSensitive = caseSensitive; + initLookupForCaseSensitivity(m_caseSensitive); +} + +void KeywordList::initLookupForCaseSensitivity(Qt::CaseSensitivity caseSensitive) +{ + /** + * get right vector to sort, if non-empty, we are done + */ + auto &vectorToSort = (caseSensitive == Qt::CaseSensitive) ? m_keywordsSortedCaseSensitive : m_keywordsSortedCaseInsensitive; + if (!vectorToSort.empty()) { + return; + } + + /** + * fill vector with refs to keywords + */ + vectorToSort.reserve(m_keywords.size()); + for (const auto &keyword : qAsConst(m_keywords)) { + vectorToSort.push_back(&keyword); + } + + /** + * sort with right predicate + */ + std::sort(vectorToSort.begin(), vectorToSort.end(), [caseSensitive] (const QStringRef &a, const QStringRef &b) { return a.compare(b, caseSensitive) < 0; }); } diff --git a/src/lib/keywordlist_p.h b/src/lib/keywordlist_p.h index 4810307..fa70f11 100644 --- a/src/lib/keywordlist_p.h +++ b/src/lib/keywordlist_p.h @@ -1,63 +1,99 @@ /* Copyright (C) 2016 Volker Krause Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef KSYNTAXHIGHLIGHTING_KEYWORDLIST_P_H #define KSYNTAXHIGHLIGHTING_KEYWORDLIST_P_H #include #include #include +#include + class QXmlStreamReader; namespace KSyntaxHighlighting { class KeywordList { public: KeywordList() = default; ~KeywordList() = default; - bool isEmpty() const; + bool isEmpty() const + { + return m_keywords.isEmpty(); + } - QString name() const; + const QString &name() const + { + return m_name; + } - QStringList keywords() const; + const QStringList &keywords() const + { + return m_keywords; + } /** Checks if @p str is a keyword in this list. */ - bool contains(const QStringRef &str) const; + bool contains(const QStringRef &str) const + { + return contains(str, m_caseSensitive); + } + /** Checks if @p str is a keyword in this list, overriding the global case-sensitivity setting. */ - bool contains(const QStringRef &str, Qt::CaseSensitivity caseSensitivityOverride) const; + bool contains(const QStringRef &str, Qt::CaseSensitivity caseSensitive) const; void load(QXmlStreamReader &reader); void setCaseSensitivity(Qt::CaseSensitivity caseSensitive); + void initLookupForCaseSensitivity(Qt::CaseSensitivity caseSensitive); private: + /** + * name of keyword list as in XML + */ QString m_name; - QSet m_keywords; - mutable QSet m_lowerCaseKeywords; + + /** + * raw list of keywords, as seen in XML (but trimmed) + */ + QStringList m_keywords; + + /** + * default case-sensitivity setting + */ Qt::CaseSensitivity m_caseSensitive = Qt::CaseSensitive; + + /** + * case-sensitive sorted string references to m_keywords for lookup + */ + std::vector m_keywordsSortedCaseSensitive; + + /** + * case-insensitive sorted string references to m_keywords for lookup + */ + std::vector m_keywordsSortedCaseInsensitive; }; } #endif // KSYNTAXHIGHLIGHTING_KEYWORDLIST_P_H diff --git a/src/lib/rule.cpp b/src/lib/rule.cpp index 34218a8..2cd6e92 100644 --- a/src/lib/rule.cpp +++ b/src/lib/rule.cpp @@ -1,711 +1,738 @@ /* Copyright (C) 2016 Volker Krause Copyright (C) 2018 Christoph Cullmann Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "context_p.h" #include "rule_p.h" #include "definition_p.h" #include "ksyntaxhighlighting_logging.h" #include "xml_p.h" #include #include using namespace KSyntaxHighlighting; static bool isOctalChar(QChar c) { return c.isNumber() && c != QLatin1Char('9') && c != QLatin1Char('8'); } static bool isHexChar(QChar c) { return c.isNumber() || c == QLatin1Char('a') || c == QLatin1Char('A') || c == QLatin1Char('b') || c == QLatin1Char('B') || c == QLatin1Char('c') || c == QLatin1Char('C') || c == QLatin1Char('d') || c == QLatin1Char('D') || c == QLatin1Char('e') || c == QLatin1Char('E') || c == QLatin1Char('f') || c == QLatin1Char('F'); } static int matchEscapedChar(const QString &text, int offset) { if (text.at(offset) != QLatin1Char('\\') || text.size() < offset + 2) return offset; const auto c = text.at(offset + 1); static const auto controlChars = QStringLiteral("abefnrtv\"'?\\"); if (controlChars.contains(c)) return offset + 2; if (c == QLatin1Char('x')) { // hex encoded character auto newOffset = offset + 2; for (int i = 0; i < 2 && newOffset + i < text.size(); ++i, ++newOffset) { if (!isHexChar(text.at(newOffset))) break; } if (newOffset == offset + 2) return offset; return newOffset; } if (isOctalChar(c)) { // octal encoding auto newOffset = offset + 2; for (int i = 0; i < 2 && newOffset + i < text.size(); ++i, ++newOffset) { if (!isOctalChar(text.at(newOffset))) break; } if (newOffset == offset + 2) return offset; return newOffset; } return offset; } static QString replaceCaptures(const QString &pattern, const QStringList &captures, bool quote) { auto result = pattern; for (int i = captures.size() - 1; i >= 1; --i) { result.replace(QLatin1Char('%') + QString::number(i), quote ? QRegularExpression::escape(captures.at(i)) : captures.at(i)); } return result; } Definition Rule::definition() const { return m_def.definition(); } void Rule::setDefinition(const Definition &def) { m_def = def; // cache for DefinitionData::wordDelimiters, is accessed VERY often m_wordDelimiter = &DefinitionData::get(m_def.definition())->wordDelimiters; } ContextSwitch Rule::context() const { return m_context; } bool Rule::isLookAhead() const { return m_lookAhead; } bool Rule::firstNonSpace() const { return m_firstNonSpace; } int Rule::requiredColumn() const { return m_column; } FoldingRegion Rule::beginRegion() const { return m_beginRegion; } FoldingRegion Rule::endRegion() const { return m_endRegion; } bool Rule::load(QXmlStreamReader &reader) { Q_ASSERT(reader.tokenType() == QXmlStreamReader::StartElement); m_attribute = reader.attributes().value(QStringLiteral("attribute")).toString(); if (reader.name() != QLatin1String("IncludeRules")) // IncludeRules uses this with a different semantic m_context.parse(reader.attributes().value(QStringLiteral("context"))); m_firstNonSpace = Xml::attrToBool(reader.attributes().value(QStringLiteral("firstNonSpace"))); m_lookAhead = Xml::attrToBool(reader.attributes().value(QStringLiteral("lookAhead"))); bool colOk = false; m_column = reader.attributes().value(QStringLiteral("column")).toInt(&colOk); if (!colOk) m_column = -1; auto regionName = reader.attributes().value(QLatin1String("beginRegion")); if (!regionName.isEmpty()) m_beginRegion = FoldingRegion(FoldingRegion::Begin, DefinitionData::get(m_def.definition())->foldingRegionId(regionName.toString())); regionName = reader.attributes().value(QLatin1String("endRegion")); if (!regionName.isEmpty()) m_endRegion = FoldingRegion(FoldingRegion::End, DefinitionData::get(m_def.definition())->foldingRegionId(regionName.toString())); auto result = doLoad(reader); if (m_lookAhead && m_context.isStay()) result = false; reader.readNext(); while (!reader.atEnd()) { switch (reader.tokenType()) { case QXmlStreamReader::StartElement: { auto rule = Rule::create(reader.name()); if (rule) { rule->setDefinition(m_def.definition()); if (rule->load(reader)) { m_subRules.push_back(rule); reader.readNext(); } } else { reader.skipCurrentElement(); } break; } case QXmlStreamReader::EndElement: return result; default: reader.readNext(); break; } } return result; } void Rule::resolveContext() { m_context.resolve(m_def.definition()); foreach (const auto &rule, m_subRules) rule->resolveContext(); } void Rule::resolveAttributeFormat(Context *lookupContext) { /** * try to get our format from the definition we stem from */ if (!m_attribute.isEmpty()) { m_attributeFormat = DefinitionData::get(definition())->formatByName(m_attribute); if (!m_attributeFormat.isValid()) { qCWarning(Log) << "Rule: Unknown format" << m_attribute << "in context" << lookupContext->name() << "of definition" << definition().name(); } } /** * lookup formats for our sub-rules */ foreach (const auto &rule, m_subRules) { rule->resolveAttributeFormat(lookupContext); } } bool Rule::doLoad(QXmlStreamReader& reader) { Q_UNUSED(reader); return true; } MatchResult Rule::match(const QString &text, int offset, const QStringList &captures) const { Q_ASSERT(!text.isEmpty()); const auto result = doMatch(text, offset, captures); if (result.offset() == offset || result.offset() == text.size()) return result; foreach (const auto &subRule, m_subRules) { const auto subResult = subRule->match(text, result.offset(), QStringList()); if (subResult.offset() > result.offset()) return MatchResult(subResult.offset(), result.captures()); } return result; } Rule::Ptr Rule::create(const QStringRef& name) { Rule *rule = nullptr; if (name == QLatin1String("AnyChar")) rule = new AnyChar; else if (name == QLatin1String("DetectChar")) rule = new DetectChar; else if (name == QLatin1String("Detect2Chars")) rule = new Detect2Char; else if (name == QLatin1String("DetectIdentifier")) rule = new DetectIdentifier; else if (name == QLatin1String("DetectSpaces")) rule = new DetectSpaces; else if (name == QLatin1String("Float")) rule = new Float; else if (name == QLatin1String("Int")) rule = new Int; else if (name == QLatin1String("HlCChar")) rule = new HlCChar; else if (name == QLatin1String("HlCHex")) rule = new HlCHex; else if (name == QLatin1String("HlCOct")) rule = new HlCOct; else if (name == QLatin1String("HlCStringChar")) rule = new HlCStringChar; else if (name == QLatin1String("IncludeRules")) rule = new IncludeRules; else if (name == QLatin1String("keyword")) rule = new KeywordListRule; else if (name == QLatin1String("LineContinue")) rule = new LineContinue; else if (name == QLatin1String("RangeDetect")) rule = new RangeDetect; else if (name == QLatin1String("RegExpr")) rule = new RegExpr; else if (name == QLatin1String("StringDetect")) rule = new StringDetect; else if (name == QLatin1String("WordDetect")) rule = new WordDetect; else qCWarning(Log) << "Unknown rule type:" << name; return Ptr(rule); } bool Rule::isWordDelimiter(QChar c) const { // perf tells contains is MUCH faster than binary search here, very short array return m_wordDelimiter.contains(c); } bool AnyChar::doLoad(QXmlStreamReader& reader) { m_chars = reader.attributes().value(QStringLiteral("String")).toString(); if (m_chars.size() == 1) qCDebug(Log) << "AnyChar rule with just one char: use DetectChar instead."; return !m_chars.isEmpty(); } MatchResult AnyChar::doMatch(const QString& text, int offset, const QStringList&) const { if (m_chars.contains(text.at(offset))) return offset + 1; return offset; } bool DetectChar::doLoad(QXmlStreamReader& reader) { const auto s = reader.attributes().value(QStringLiteral("char")); if (s.isEmpty()) return false; m_char = s.at(0); m_dynamic = Xml::attrToBool(reader.attributes().value(QStringLiteral("dynamic"))); if (m_dynamic) { m_captureIndex = m_char.digitValue(); } return true; } MatchResult DetectChar::doMatch(const QString& text, int offset, const QStringList &captures) const { if (m_dynamic) { - if (captures.size() <= m_captureIndex || captures.at(m_captureIndex).isEmpty()) + if (m_captureIndex == 0 || captures.size() <= m_captureIndex || captures.at(m_captureIndex).isEmpty()) return offset; if (text.at(offset) == captures.at(m_captureIndex).at(0)) return offset + 1; return offset; } if (text.at(offset) == m_char) return offset + 1; return offset; } bool Detect2Char::doLoad(QXmlStreamReader& reader) { const auto s1 = reader.attributes().value(QStringLiteral("char")); const auto s2 = reader.attributes().value(QStringLiteral("char1")); if (s1.isEmpty() || s2.isEmpty()) return false; m_char1 = s1.at(0); m_char2 = s2.at(0); return true; } MatchResult Detect2Char::doMatch(const QString& text, int offset, const QStringList &) const { if (text.size() - offset < 2) return offset; if (text.at(offset) == m_char1 && text.at(offset + 1) == m_char2) return offset + 2; return offset; } MatchResult DetectIdentifier::doMatch(const QString& text, int offset, const QStringList&) const { if (!text.at(offset).isLetter() && text.at(offset) != QLatin1Char('_')) return offset; for (int i = offset + 1; i < text.size(); ++i) { const auto c = text.at(i); if (!c.isLetterOrNumber() && c != QLatin1Char('_')) return i; } return text.size(); } MatchResult DetectSpaces::doMatch(const QString& text, int offset, const QStringList&) const { while(offset < text.size() && text.at(offset).isSpace()) ++offset; return offset; } MatchResult Float::doMatch(const QString& text, int offset, const QStringList&) const { if (offset > 0 && !isWordDelimiter(text.at(offset - 1))) return offset; auto newOffset = offset; while (newOffset < text.size() && text.at(newOffset).isDigit()) ++newOffset; if (newOffset >= text.size() || text.at(newOffset) != QLatin1Char('.')) return offset; ++newOffset; while (newOffset < text.size() && text.at(newOffset).isDigit()) ++newOffset; if (newOffset == offset + 1) // we only found a decimal point return offset; auto expOffset = newOffset; if (expOffset >= text.size() || (text.at(expOffset) != QLatin1Char('e') && text.at(expOffset) != QLatin1Char('E'))) return newOffset; ++expOffset; if (expOffset < text.size() && (text.at(expOffset) == QLatin1Char('+') || text.at(expOffset) == QLatin1Char('-'))) ++expOffset; bool foundExpDigit = false; while (expOffset < text.size() && text.at(expOffset).isDigit()) { ++expOffset; foundExpDigit = true; } if (!foundExpDigit) return newOffset; return expOffset; } MatchResult HlCChar::doMatch(const QString& text, int offset, const QStringList&) const { if (text.size() < offset + 3) return offset; if (text.at(offset) != QLatin1Char('\'') || text.at(offset + 1) == QLatin1Char('\'')) return offset; auto newOffset = matchEscapedChar(text, offset + 1); if (newOffset == offset + 1) { if (text.at(newOffset) == QLatin1Char('\\')) return offset; else ++newOffset; } if (newOffset >= text.size()) return offset; if (text.at(newOffset) == QLatin1Char('\'')) return newOffset + 1; return offset; } MatchResult HlCHex::doMatch(const QString& text, int offset, const QStringList&) const { if (offset > 0 && !isWordDelimiter(text.at(offset - 1))) return offset; if (text.size() < offset + 3) return offset; if (text.at(offset) != QLatin1Char('0') || (text.at(offset + 1) != QLatin1Char('x') && text.at(offset + 1) != QLatin1Char('X'))) return offset; if (!isHexChar(text.at(offset + 2))) return offset; offset += 3; while (offset < text.size() && isHexChar(text.at(offset))) ++offset; // TODO Kate matches U/L suffix, QtC does not? return offset; } MatchResult HlCOct::doMatch(const QString& text, int offset, const QStringList&) const { if (offset > 0 && !isWordDelimiter(text.at(offset - 1))) return offset; if (text.size() < offset + 2) return offset; if (text.at(offset) != QLatin1Char('0')) return offset; if (!isOctalChar(text.at(offset + 1))) return offset; offset += 2; while (offset < text.size() && isOctalChar(text.at(offset))) ++offset; return offset; } MatchResult HlCStringChar::doMatch(const QString& text, int offset, const QStringList&) const { return matchEscapedChar(text, offset); } QString IncludeRules::contextName() const { return m_contextName; } QString IncludeRules::definitionName() const { return m_defName; } bool IncludeRules::includeAttribute() const { return m_includeAttribute; } bool IncludeRules::doLoad(QXmlStreamReader& reader) { const auto s = reader.attributes().value(QLatin1String("context")); const auto split = s.split(QLatin1String("##"), QString::KeepEmptyParts); if (split.isEmpty()) return false; m_contextName = split.at(0).toString(); if (split.size() > 1) m_defName = split.at(1).toString(); m_includeAttribute = Xml::attrToBool(reader.attributes().value(QLatin1String("includeAttrib"))); return !m_contextName.isEmpty() || !m_defName.isEmpty(); } MatchResult IncludeRules::doMatch(const QString& text, int offset, const QStringList&) const { Q_UNUSED(text); qCWarning(Log) << "Unresolved include rule for" << m_contextName << "##" << m_defName; return offset; } MatchResult Int::doMatch(const QString& text, int offset, const QStringList &) const { if (offset > 0 && !isWordDelimiter(text.at(offset - 1))) return offset; while(offset < text.size() && text.at(offset).isDigit()) ++offset; return offset; } bool KeywordListRule::doLoad(QXmlStreamReader& reader) { - m_listName = reader.attributes().value(QLatin1String("String")).toString(); + /** + * get our keyword list, if not found => bail out + */ + auto defData = DefinitionData::get(definition()); + m_keywordList = defData->keywordList(reader.attributes().value(QLatin1String("String")).toString()); + if (!m_keywordList) { + return false; + } + + /** + * we might overwrite the case sensitivity + * then we need to init the list for lookup of that sensitivity setting + */ if (reader.attributes().hasAttribute(QLatin1String("insensitive"))) { m_hasCaseSensitivityOverride = true; m_caseSensitivityOverride = Xml::attrToBool(reader.attributes().value(QLatin1String("insensitive"))) ? Qt::CaseInsensitive : Qt::CaseSensitive; + m_keywordList->initLookupForCaseSensitivity(m_caseSensitivityOverride); } else { m_hasCaseSensitivityOverride = false; } - return !m_listName.isEmpty(); + + return !m_keywordList->isEmpty(); } MatchResult KeywordListRule::doMatch(const QString& text, int offset, const QStringList&) const { - if (m_keywordList.isEmpty()) { - const auto def = definition(); - Q_ASSERT(def.isValid()); - auto defData = DefinitionData::get(def); - m_keywordList = defData->keywordList(m_listName); - } - auto newOffset = offset; while (text.size() > newOffset && !isWordDelimiter(text.at(newOffset))) ++newOffset; if (newOffset == offset) return offset; if (m_hasCaseSensitivityOverride) { - if (m_keywordList.contains(text.midRef(offset, newOffset - offset), m_caseSensitivityOverride)) + if (m_keywordList->contains(text.midRef(offset, newOffset - offset), m_caseSensitivityOverride)) return newOffset; } else { - if (m_keywordList.contains(text.midRef(offset, newOffset - offset))) + if (m_keywordList->contains(text.midRef(offset, newOffset - offset))) return newOffset; } // we don't match, but we can skip until newOffset as we can't start a keyword in-between return MatchResult(offset, newOffset); } bool LineContinue::doLoad(QXmlStreamReader& reader) { const auto s = reader.attributes().value(QStringLiteral("char")); if (s.isEmpty()) m_char = QLatin1Char('\\'); else m_char = s.at(0); return true; } MatchResult LineContinue::doMatch(const QString& text, int offset, const QStringList&) const { if (offset == text.size() - 1 && text.at(offset) == m_char) return offset + 1; return offset; } bool RangeDetect::doLoad(QXmlStreamReader& reader) { const auto s1 = reader.attributes().value(QStringLiteral("char")); const auto s2 = reader.attributes().value(QStringLiteral("char1")); if (s1.isEmpty() || s2.isEmpty()) return false; m_begin = s1.at(0); m_end = s2.at(0); return true; } MatchResult RangeDetect::doMatch(const QString& text, int offset, const QStringList&) const { if (text.size() - offset < 2) return offset; if (text.at(offset) != m_begin) return offset; auto newOffset = offset + 1; while (newOffset < text.size()) { if (text.at(newOffset) == m_end) return newOffset + 1; ++newOffset; } return offset; } bool RegExpr::doLoad(QXmlStreamReader& reader) { m_regexp.setPattern(reader.attributes().value(QStringLiteral("String")).toString()); const auto isMinimal = Xml::attrToBool(reader.attributes().value(QStringLiteral("minimal"))); const auto isCaseInsensitive = Xml::attrToBool(reader.attributes().value(QStringLiteral("insensitive"))); m_regexp.setPatternOptions( (isMinimal ? QRegularExpression::InvertedGreedinessOption : QRegularExpression::NoPatternOption) | (isCaseInsensitive ? QRegularExpression::CaseInsensitiveOption : QRegularExpression::NoPatternOption)); // optimize the pattern for the non-dynamic case, we use them OFTEN m_dynamic = Xml::attrToBool(reader.attributes().value(QStringLiteral("dynamic"))); if (!m_dynamic) { m_regexp.optimize(); } // always using m_regexp.isValid() would be better, but parses the regexp and thus is way too expensive for release builds Q_ASSERT(m_regexp.isValid()); return !m_regexp.pattern().isEmpty(); } MatchResult RegExpr::doMatch(const QString& text, int offset, const QStringList &captures) const { /** * for dynamic case: create new pattern with right instantiation */ const auto ®exp = m_dynamic ? QRegularExpression(replaceCaptures(m_regexp.pattern(), captures, true), m_regexp.patternOptions()) : m_regexp; - auto result = regexp.match(text, offset, QRegularExpression::NormalMatch, QRegularExpression::DontCheckSubjectStringMatchOption); - if (result.capturedStart() == offset) - return MatchResult(offset + result.capturedLength(), result.capturedTexts()); + /** + * match the pattern + */ + const auto result = regexp.match(text, offset, QRegularExpression::NormalMatch, QRegularExpression::DontCheckSubjectStringMatchOption); + if (result.capturedStart() == offset) { + /** + * we only need to compute the captured texts if we have real capture groups + * highlightings should only address %1..%.., see e.g. replaceCaptures + * DetectChar ignores %0, too + */ + if (result.lastCapturedIndex() > 0) { + return MatchResult(offset + result.capturedLength(), result.capturedTexts()); + } + + /** + * else: ignore the implicit 0 group we always capture, no need to allocate stuff for that + */ + return MatchResult(offset + result.capturedLength()); + } + + /** + * no match + */ return MatchResult(offset, result.capturedStart()); } bool StringDetect::doLoad(QXmlStreamReader& reader) { m_string = reader.attributes().value(QStringLiteral("String")).toString(); m_caseSensitivity = Xml::attrToBool(reader.attributes().value(QStringLiteral("insensitive"))) ? Qt::CaseInsensitive : Qt::CaseSensitive; m_dynamic = Xml::attrToBool(reader.attributes().value(QStringLiteral("dynamic"))); return !m_string.isEmpty(); } MatchResult StringDetect::doMatch(const QString& text, int offset, const QStringList &captures) const { /** * for dynamic case: create new pattern with right instantiation */ const auto &pattern = m_dynamic ? replaceCaptures(m_string, captures, false) : m_string; if (text.midRef(offset, pattern.size()).compare(pattern, m_caseSensitivity) == 0) return offset + pattern.size(); return offset; } bool WordDetect::doLoad(QXmlStreamReader& reader) { m_word = reader.attributes().value(QStringLiteral("String")).toString(); m_caseSensitivity = Xml::attrToBool(reader.attributes().value(QStringLiteral("insensitive"))) ? Qt::CaseInsensitive : Qt::CaseSensitive; return !m_word.isEmpty(); } MatchResult WordDetect::doMatch(const QString& text, int offset, const QStringList &) const { if (text.size() - offset < m_word.size()) return offset; if (offset > 0 && !isWordDelimiter(text.at(offset - 1))) return offset; if (text.midRef(offset, m_word.size()).compare(m_word, m_caseSensitivity) != 0) return offset; if (text.size() == offset + m_word.size() || isWordDelimiter(text.at(offset + m_word.size()))) return offset + m_word.size(); return offset; } diff --git a/src/lib/rule_p.h b/src/lib/rule_p.h index 881deb2..676a3aa 100644 --- a/src/lib/rule_p.h +++ b/src/lib/rule_p.h @@ -1,270 +1,269 @@ /* Copyright (C) 2016 Volker Krause Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef KSYNTAXHIGHLIGHTING_RULE_P_H #define KSYNTAXHIGHLIGHTING_RULE_P_H #include "contextswitch_p.h" #include "definition.h" #include "definitionref_p.h" #include "foldingregion.h" #include "format.h" #include "keywordlist_p.h" #include "matchresult_p.h" #include #include #include #include class QXmlStreamReader; namespace KSyntaxHighlighting { class Rule { public: Rule() = default; virtual ~Rule() = default; typedef std::shared_ptr Ptr; Definition definition() const; void setDefinition(const Definition &def); const Format &attributeFormat() const { return m_attributeFormat; } ContextSwitch context() const; bool isLookAhead() const; bool firstNonSpace() const; int requiredColumn() const; FoldingRegion beginRegion() const; FoldingRegion endRegion() const; bool load(QXmlStreamReader &reader); void resolveContext(); void resolveAttributeFormat(Context *lookupContext); MatchResult match(const QString &text, int offset, const QStringList &captures) const; static Rule::Ptr create(const QStringRef &name); protected: virtual bool doLoad(QXmlStreamReader &reader); virtual MatchResult doMatch(const QString &text, int offset, const QStringList &captures) const = 0; bool isWordDelimiter(QChar c) const; private: Q_DISABLE_COPY(Rule) DefinitionRef m_def; QString m_attribute; Format m_attributeFormat; ContextSwitch m_context; QVector m_subRules; int m_column = -1; FoldingRegion m_beginRegion; FoldingRegion m_endRegion; bool m_firstNonSpace = false; bool m_lookAhead = false; // cache for DefinitionData::wordDelimiters, is accessed VERY often QStringRef m_wordDelimiter; }; class AnyChar : public Rule { protected: bool doLoad(QXmlStreamReader & reader) override; MatchResult doMatch(const QString & text, int offset, const QStringList&) const override; private: QString m_chars; }; class DetectChar : public Rule { protected: bool doLoad(QXmlStreamReader & reader) override; MatchResult doMatch(const QString & text, int offset, const QStringList &captures) const override; private: QChar m_char; bool m_dynamic = false; int m_captureIndex = 0; }; class Detect2Char : public Rule { protected: bool doLoad(QXmlStreamReader & reader) override; MatchResult doMatch(const QString & text, int offset, const QStringList &captures) const override; private: QChar m_char1; QChar m_char2; }; class DetectIdentifier : public Rule { protected: MatchResult doMatch(const QString & text, int offset, const QStringList&) const override; }; class DetectSpaces : public Rule { protected: MatchResult doMatch(const QString & text, int offset, const QStringList&) const override; }; class Float : public Rule { protected: MatchResult doMatch(const QString & text, int offset, const QStringList&) const override; }; class IncludeRules : public Rule { public: QString contextName() const; QString definitionName() const; bool includeAttribute() const; protected: bool doLoad(QXmlStreamReader & reader) override; MatchResult doMatch(const QString & text, int offset, const QStringList&) const override; private: QString m_contextName; QString m_defName; bool m_includeAttribute; }; class Int : public Rule { protected: MatchResult doMatch(const QString & text, int offset, const QStringList &captures) const override; }; class HlCChar : public Rule { protected: MatchResult doMatch(const QString & text, int offset, const QStringList&) const override; }; class HlCHex : public Rule { protected: MatchResult doMatch(const QString & text, int offset, const QStringList&) const override; }; class HlCOct : public Rule { protected: MatchResult doMatch(const QString & text, int offset, const QStringList&) const override; }; class HlCStringChar : public Rule { protected: MatchResult doMatch(const QString & text, int offset, const QStringList&) const override; }; class KeywordListRule : public Rule { protected: bool doLoad(QXmlStreamReader & reader) override; MatchResult doMatch(const QString & text, int offset, const QStringList&) const override; private: - QString m_listName; - mutable KeywordList m_keywordList; + KeywordList *m_keywordList; bool m_hasCaseSensitivityOverride; Qt::CaseSensitivity m_caseSensitivityOverride; }; class LineContinue : public Rule { protected: bool doLoad(QXmlStreamReader & reader) override; MatchResult doMatch(const QString & text, int offset, const QStringList&) const override; private: QChar m_char; }; class RangeDetect : public Rule { protected: bool doLoad(QXmlStreamReader & reader) override; MatchResult doMatch(const QString & text, int offset, const QStringList&) const override; private: QChar m_begin; QChar m_end; }; class RegExpr : public Rule { protected: bool doLoad(QXmlStreamReader & reader) override; MatchResult doMatch(const QString & text, int offset, const QStringList &captures) const override; private: QRegularExpression m_regexp; bool m_dynamic = false; }; class StringDetect : public Rule { protected: bool doLoad(QXmlStreamReader & reader) override; MatchResult doMatch(const QString & text, int offset, const QStringList &captures) const override; private: QString m_string; Qt::CaseSensitivity m_caseSensitivity; bool m_dynamic = false; }; class WordDetect : public Rule { protected: bool doLoad(QXmlStreamReader & reader) override; MatchResult doMatch(const QString & text, int offset, const QStringList &captures) const override; private: QString m_word; Qt::CaseSensitivity m_caseSensitivity; }; } #endif // KSYNTAXHIGHLIGHTING_RULE_P_H