diff --git a/src/lib/abstracthighlighter.cpp b/src/lib/abstracthighlighter.cpp index ec372fb..b0f5106 100644 --- a/src/lib/abstracthighlighter.cpp +++ b/src/lib/abstracthighlighter.cpp @@ -1,379 +1,391 @@ /* Copyright (C) 2016 Volker Krause Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "abstracthighlighter.h" #include "abstracthighlighter_p.h" #include "context_p.h" #include "definition_p.h" #include "foldingregion.h" #include "format.h" #include "ksyntaxhighlighting_logging.h" #include "repository.h" #include "rule_p.h" #include "state.h" #include "state_p.h" #include "theme.h" using namespace KSyntaxHighlighting; AbstractHighlighterPrivate::AbstractHighlighterPrivate() { } AbstractHighlighterPrivate::~AbstractHighlighterPrivate() { } void AbstractHighlighterPrivate::ensureDefinitionLoaded() { auto defData = DefinitionData::get(m_definition); if (Q_UNLIKELY(!m_definition.isValid() && defData->repo && !m_definition.name().isEmpty())) { qCDebug(Log) << "Definition became invalid, trying re-lookup."; m_definition = defData->repo->definitionForName(m_definition.name()); defData = DefinitionData::get(m_definition); } if (Q_UNLIKELY(!defData->repo && !defData->fileName.isEmpty())) qCCritical(Log) << "Repository got deleted while a highlighter is still active!"; if (m_definition.isValid()) defData->load(); } AbstractHighlighter::AbstractHighlighter() : d_ptr(new AbstractHighlighterPrivate) { } AbstractHighlighter::AbstractHighlighter(AbstractHighlighterPrivate *dd) : d_ptr(dd) { } AbstractHighlighter::~AbstractHighlighter() { delete d_ptr; } Definition AbstractHighlighter::definition() const { return d_ptr->m_definition; } void AbstractHighlighter::setDefinition(const Definition &def) { Q_D(AbstractHighlighter); d->m_definition = def; } Theme AbstractHighlighter::theme() const { Q_D(const AbstractHighlighter); return d->m_theme; } void AbstractHighlighter::setTheme(const Theme &theme) { Q_D(AbstractHighlighter); d->m_theme = theme; } /** * Returns the index of the first non-space character. If the line is empty, * or only contains white spaces, text.size() is returned. */ static inline int firstNonSpaceChar(const QString &text) { for (int i = 0; i < text.length(); ++i) { if (!text[i].isSpace()) { return i; } } return text.size(); } State AbstractHighlighter::highlightLine(const QString &text, const State &state) { Q_D(AbstractHighlighter); // verify definition, deal with no highlighting being enabled d->ensureDefinitionLoaded(); const auto defData = DefinitionData::get(d->m_definition); if (!d->m_definition.isValid() || !defData->isLoaded()) { applyFormat(0, text.size(), Format()); return State(); } // verify/initialize state auto newState = state; auto stateData = StateData::get(newState); const DefinitionRef currentDefRef(d->m_definition); if (!stateData->isEmpty() && (stateData->m_defRef != currentDefRef)) { qCDebug(Log) << "Got invalid state, resetting."; stateData->clear(); } if (stateData->isEmpty()) { stateData->push(defData->initialContext(), QStringList()); stateData->m_defRef = currentDefRef; } // process empty lines if (text.isEmpty()) { /** * handle line empty context switches * guard against endless loops * see https://phabricator.kde.org/D18509 */ int endlessLoopingCounter = 0; while (!stateData->topContext()->lineEmptyContext().isStay() || (stateData->topContext()->lineEmptyContext().isStay() && !stateData->topContext()->lineEndContext().isStay())) { /** * line empty context switches */ if (!stateData->topContext()->lineEmptyContext().isStay()) { if (!d->switchContext(stateData, stateData->topContext()->lineEmptyContext(), QStringList())) { /** * end when trying to #pop the main context */ break; } /** * line end context switches only when lineEmptyContext is #stay. This avoids * skipping empty lines after a line continuation character (see bug 405903) */ } else if (!stateData->topContext()->lineEndContext().isStay() && !d->switchContext(stateData, stateData->topContext()->lineEndContext(), QStringList())) break; // guard against endless loops ++endlessLoopingCounter; if (endlessLoopingCounter > 1024) { qCDebug(Log) << "Endless switch context transitions for line empty context, aborting highlighting of line."; break; } } auto context = stateData->topContext(); applyFormat(0, 0, context->attributeFormat()); return newState; } int offset = 0, beginOffset = 0; bool lineContinuation = false; QHash skipOffsets; + QStringList capturesForLastDynamicSkipOffset; /** * current active format * stored as pointer to avoid deconstruction/constructions inside the internal loop * the pointers are stable, the formats are either in the contexts or rules */ auto currentFormat = &stateData->topContext()->attributeFormat(); /** * cached first non-space character, needs to be computed if < 0 */ int firstNonSpace = -1; int lastOffset = offset; int endlessLoopingCounter = 0; do { /** * avoid that we loop endless for some broken hl definitions */ if (lastOffset == offset) { ++endlessLoopingCounter; if (endlessLoopingCounter > 1024) { qCDebug(Log) << "Endless state transitions, aborting highlighting of line."; break; } } else { // ensure we made progress, clear the endlessLoopingCounter Q_ASSERT(offset > lastOffset); lastOffset = offset; endlessLoopingCounter = 0; } /** * try to match all rules in the context in order of declaration in XML */ bool isLookAhead = false; int newOffset = 0; const Format *newFormat = nullptr; for (const auto &rule : stateData->topContext()->rules()) { /** * filter out rules that require a specific column */ if ((rule->requiredColumn() >= 0) && (rule->requiredColumn() != offset)) { continue; } /** * filter out rules that only match for leading whitespace */ if (rule->firstNonSpace()) { /** * compute the first non-space lazy * avoids computing it for contexts without any such rules */ if (firstNonSpace < 0) { firstNonSpace = firstNonSpaceChar(text); } /** * can we skip? */ if (offset > firstNonSpace) { continue; } } /** * shall we skip application of this rule? two cases: * - rule can't match at all => currentSkipOffset < 0 * - rule will only match for some higher offset => currentSkipOffset > offset + * + * we need to invalidate this if we are dynamic and have different captures then last time */ + if (rule->isDynamic() && (capturesForLastDynamicSkipOffset != stateData->topCaptures())) { + skipOffsets.clear(); + } const auto currentSkipOffset = skipOffsets.value(rule.get()); if (currentSkipOffset < 0 || currentSkipOffset > offset) continue; const auto newResult = rule->doMatch(text, offset, stateData->topCaptures()); newOffset = newResult.offset(); /** * update skip offset if new one rules out any later match or is larger than current one */ - if (newResult.skipOffset() < 0 || newResult.skipOffset() > currentSkipOffset) + if (newResult.skipOffset() < 0 || newResult.skipOffset() > currentSkipOffset) { skipOffsets.insert(rule.get(), newResult.skipOffset()); + // remember new captures, if dynamic to enforce proper reset above on change! + if (rule->isDynamic()) { + capturesForLastDynamicSkipOffset = stateData->topCaptures(); + } + } + if (newOffset <= offset) continue; /** * apply folding. * special cases: * - rule with endRegion + beginRegion: in endRegion, the length is 0 * - rule with lookAhead: length is 0 */ if (rule->endRegion().isValid() && rule->beginRegion().isValid()) applyFolding(offset, 0, rule->endRegion()); else if (rule->endRegion().isValid()) applyFolding(offset, rule->isLookAhead() ? 0 : newOffset - offset, rule->endRegion()); if (rule->beginRegion().isValid()) applyFolding(offset, rule->isLookAhead() ? 0 : newOffset - offset, rule->beginRegion()); if (rule->isLookAhead()) { Q_ASSERT(!rule->context().isStay()); d->switchContext(stateData, rule->context(), newResult.captures()); isLookAhead = true; break; } d->switchContext(stateData, rule->context(), newResult.captures()); newFormat = rule->attributeFormat().isValid() ? &rule->attributeFormat() : &stateData->topContext()->attributeFormat(); if (newOffset == text.size() && std::dynamic_pointer_cast(rule)) lineContinuation = true; break; } if (isLookAhead) continue; if (newOffset <= offset) { // no matching rule if (stateData->topContext()->fallthrough()) { d->switchContext(stateData, stateData->topContext()->fallthroughContext(), QStringList()); continue; } newOffset = offset + 1; newFormat = &stateData->topContext()->attributeFormat(); } /** * if we arrive here, some new format has to be set! */ Q_ASSERT(newFormat); /** * on format change, apply the last one and switch to new one */ if (newFormat != currentFormat && newFormat->id() != currentFormat->id()) { if (offset > 0) applyFormat(beginOffset, offset - beginOffset, *currentFormat); beginOffset = offset; currentFormat = newFormat; } /** * we must have made progress if we arrive here! */ Q_ASSERT(newOffset > offset); offset = newOffset; } while (offset < text.size()); /** * apply format for remaining text, if any */ if (beginOffset < offset) applyFormat(beginOffset, text.size() - beginOffset, *currentFormat); /** * handle line end context switches * guard against endless loops * see https://phabricator.kde.org/D18509 */ { int endlessLoopingCounter = 0; while (!stateData->topContext()->lineEndContext().isStay() && !lineContinuation) { if (!d->switchContext(stateData, stateData->topContext()->lineEndContext(), QStringList())) break; // guard against endless loops ++endlessLoopingCounter; if (endlessLoopingCounter > 1024) { qCDebug(Log) << "Endless switch context transitions for line end context, aborting highlighting of line."; break; } } } return newState; } bool AbstractHighlighterPrivate::switchContext(StateData *data, const ContextSwitch &contextSwitch, const QStringList &captures) { // kill as many items as requested from the stack, will always keep the initial context alive! const bool initialContextSurvived = data->pop(contextSwitch.popCount()); // if we have a new context to add, push it // then we always "succeed" if (contextSwitch.context()) { data->push(contextSwitch.context(), captures); return true; } // else we abort, if we did try to pop the initial context return initialContextSurvived; } void AbstractHighlighter::applyFolding(int offset, int length, FoldingRegion region) { Q_UNUSED(offset); Q_UNUSED(length); Q_UNUSED(region); } diff --git a/src/lib/rule.cpp b/src/lib/rule.cpp index b953f4b..0d7d843 100644 --- a/src/lib/rule.cpp +++ b/src/lib/rule.cpp @@ -1,648 +1,647 @@ /* Copyright (C) 2016 Volker Krause Copyright (C) 2018 Christoph Cullmann Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "context_p.h" #include "definition_p.h" #include "ksyntaxhighlighting_logging.h" #include "rule_p.h" #include "xml_p.h" #include #include using namespace KSyntaxHighlighting; static bool isOctalChar(QChar c) { return c.isNumber() && c != QLatin1Char('9') && c != QLatin1Char('8'); } static bool isHexChar(QChar c) { return c.isNumber() || c == QLatin1Char('a') || c == QLatin1Char('A') || c == QLatin1Char('b') || c == QLatin1Char('B') || c == QLatin1Char('c') || c == QLatin1Char('C') || c == QLatin1Char('d') || c == QLatin1Char('D') || c == QLatin1Char('e') || c == QLatin1Char('E') || c == QLatin1Char('f') || c == QLatin1Char('F'); } static int matchEscapedChar(const QString &text, int offset) { if (text.at(offset) != QLatin1Char('\\') || text.size() < offset + 2) return offset; const auto c = text.at(offset + 1); static const auto controlChars = QStringLiteral("abefnrtv\"'?\\"); if (controlChars.contains(c)) return offset + 2; // hex encoded character if (c == QLatin1Char('x')) { auto newOffset = offset + 2; for (int i = 0; i < 2 && newOffset + i < text.size(); ++i, ++newOffset) { if (!isHexChar(text.at(newOffset))) break; } if (newOffset == offset + 2) return offset; return newOffset; } // octal encoding, simple \0 is OK, too, unlike simple \x above if (isOctalChar(c)) { auto newOffset = offset + 2; for (int i = 0; i < 2 && newOffset + i < text.size(); ++i, ++newOffset) { if (!isOctalChar(text.at(newOffset))) break; } return newOffset; } return offset; } static QString replaceCaptures(const QString &pattern, const QStringList &captures, bool quote) { auto result = pattern; for (int i = captures.size() - 1; i >= 1; --i) { result.replace(QLatin1Char('%') + QString::number(i), quote ? QRegularExpression::escape(captures.at(i)) : captures.at(i)); } return result; } Definition Rule::definition() const { return m_def.definition(); } void Rule::setDefinition(const Definition &def) { m_def = def; } bool Rule::load(QXmlStreamReader &reader) { Q_ASSERT(reader.tokenType() == QXmlStreamReader::StartElement); m_attribute = reader.attributes().value(QStringLiteral("attribute")).toString(); if (reader.name() != QLatin1String("IncludeRules")) // IncludeRules uses this with a different semantic m_context.parse(reader.attributes().value(QStringLiteral("context"))); m_firstNonSpace = Xml::attrToBool(reader.attributes().value(QStringLiteral("firstNonSpace"))); m_lookAhead = Xml::attrToBool(reader.attributes().value(QStringLiteral("lookAhead"))); bool colOk = false; m_column = reader.attributes().value(QStringLiteral("column")).toInt(&colOk); if (!colOk) m_column = -1; auto regionName = reader.attributes().value(QLatin1String("beginRegion")); if (!regionName.isEmpty()) m_beginRegion = FoldingRegion(FoldingRegion::Begin, DefinitionData::get(m_def.definition())->foldingRegionId(regionName.toString())); regionName = reader.attributes().value(QLatin1String("endRegion")); if (!regionName.isEmpty()) m_endRegion = FoldingRegion(FoldingRegion::End, DefinitionData::get(m_def.definition())->foldingRegionId(regionName.toString())); auto result = doLoad(reader); if (m_lookAhead && m_context.isStay()) result = false; // be done with this rule, skip all subelements, e.g. no longer supported sub-rules reader.skipCurrentElement(); return result; } void Rule::resolveContext() { m_context.resolve(m_def.definition()); // cache for DefinitionData::wordDelimiters, is accessed VERY often m_wordDelimiter = &DefinitionData::get(m_def.definition())->wordDelimiters; } void Rule::resolveAttributeFormat(Context *lookupContext) { /** * try to get our format from the definition we stem from */ if (!m_attribute.isEmpty()) { m_attributeFormat = DefinitionData::get(definition())->formatByName(m_attribute); if (!m_attributeFormat.isValid()) { qCWarning(Log) << "Rule: Unknown format" << m_attribute << "in context" << lookupContext->name() << "of definition" << definition().name(); } } } bool Rule::doLoad(QXmlStreamReader &reader) { Q_UNUSED(reader); return true; } Rule::Ptr Rule::create(const QStringRef &name) { Rule *rule = nullptr; if (name == QLatin1String("AnyChar")) rule = new AnyChar; else if (name == QLatin1String("DetectChar")) rule = new DetectChar; else if (name == QLatin1String("Detect2Chars")) rule = new Detect2Char; else if (name == QLatin1String("DetectIdentifier")) rule = new DetectIdentifier; else if (name == QLatin1String("DetectSpaces")) rule = new DetectSpaces; else if (name == QLatin1String("Float")) rule = new Float; else if (name == QLatin1String("Int")) rule = new Int; else if (name == QLatin1String("HlCChar")) rule = new HlCChar; else if (name == QLatin1String("HlCHex")) rule = new HlCHex; else if (name == QLatin1String("HlCOct")) rule = new HlCOct; else if (name == QLatin1String("HlCStringChar")) rule = new HlCStringChar; else if (name == QLatin1String("IncludeRules")) rule = new IncludeRules; else if (name == QLatin1String("keyword")) rule = new KeywordListRule; else if (name == QLatin1String("LineContinue")) rule = new LineContinue; else if (name == QLatin1String("RangeDetect")) rule = new RangeDetect; else if (name == QLatin1String("RegExpr")) rule = new RegExpr; else if (name == QLatin1String("StringDetect")) rule = new StringDetect; else if (name == QLatin1String("WordDetect")) rule = new WordDetect; else qCWarning(Log) << "Unknown rule type:" << name; return Ptr(rule); } bool Rule::isWordDelimiter(QChar c) const { // perf tells contains is MUCH faster than binary search here, very short array return m_wordDelimiter.contains(c); } bool AnyChar::doLoad(QXmlStreamReader &reader) { m_chars = reader.attributes().value(QStringLiteral("String")).toString(); if (m_chars.size() == 1) qCDebug(Log) << "AnyChar rule with just one char: use DetectChar instead."; return !m_chars.isEmpty(); } MatchResult AnyChar::doMatch(const QString &text, int offset, const QStringList &) const { if (m_chars.contains(text.at(offset))) return offset + 1; return offset; } bool DetectChar::doLoad(QXmlStreamReader &reader) { const auto s = reader.attributes().value(QStringLiteral("char")); if (s.isEmpty()) return false; m_char = s.at(0); m_dynamic = Xml::attrToBool(reader.attributes().value(QStringLiteral("dynamic"))); if (m_dynamic) { m_captureIndex = m_char.digitValue(); } return true; } MatchResult DetectChar::doMatch(const QString &text, int offset, const QStringList &captures) const { if (m_dynamic) { if (m_captureIndex == 0 || captures.size() <= m_captureIndex || captures.at(m_captureIndex).isEmpty()) return offset; if (text.at(offset) == captures.at(m_captureIndex).at(0)) return offset + 1; return offset; } if (text.at(offset) == m_char) return offset + 1; return offset; } bool Detect2Char::doLoad(QXmlStreamReader &reader) { const auto s1 = reader.attributes().value(QStringLiteral("char")); const auto s2 = reader.attributes().value(QStringLiteral("char1")); if (s1.isEmpty() || s2.isEmpty()) return false; m_char1 = s1.at(0); m_char2 = s2.at(0); return true; } MatchResult Detect2Char::doMatch(const QString &text, int offset, const QStringList &) const { if (text.size() - offset < 2) return offset; if (text.at(offset) == m_char1 && text.at(offset + 1) == m_char2) return offset + 2; return offset; } MatchResult DetectIdentifier::doMatch(const QString &text, int offset, const QStringList &) const { if (!text.at(offset).isLetter() && text.at(offset) != QLatin1Char('_')) return offset; for (int i = offset + 1; i < text.size(); ++i) { const auto c = text.at(i); if (!c.isLetterOrNumber() && c != QLatin1Char('_')) return i; } return text.size(); } MatchResult DetectSpaces::doMatch(const QString &text, int offset, const QStringList &) const { while (offset < text.size() && text.at(offset).isSpace()) ++offset; return offset; } MatchResult Float::doMatch(const QString &text, int offset, const QStringList &) const { if (offset > 0 && !isWordDelimiter(text.at(offset - 1))) return offset; auto newOffset = offset; while (newOffset < text.size() && text.at(newOffset).isDigit()) ++newOffset; if (newOffset >= text.size() || text.at(newOffset) != QLatin1Char('.')) return offset; ++newOffset; while (newOffset < text.size() && text.at(newOffset).isDigit()) ++newOffset; if (newOffset == offset + 1) // we only found a decimal point return offset; auto expOffset = newOffset; if (expOffset >= text.size() || (text.at(expOffset) != QLatin1Char('e') && text.at(expOffset) != QLatin1Char('E'))) return newOffset; ++expOffset; if (expOffset < text.size() && (text.at(expOffset) == QLatin1Char('+') || text.at(expOffset) == QLatin1Char('-'))) ++expOffset; bool foundExpDigit = false; while (expOffset < text.size() && text.at(expOffset).isDigit()) { ++expOffset; foundExpDigit = true; } if (!foundExpDigit) return newOffset; return expOffset; } MatchResult HlCChar::doMatch(const QString &text, int offset, const QStringList &) const { if (text.size() < offset + 3) return offset; if (text.at(offset) != QLatin1Char('\'') || text.at(offset + 1) == QLatin1Char('\'')) return offset; auto newOffset = matchEscapedChar(text, offset + 1); if (newOffset == offset + 1) { if (text.at(newOffset) == QLatin1Char('\\')) return offset; else ++newOffset; } if (newOffset >= text.size()) return offset; if (text.at(newOffset) == QLatin1Char('\'')) return newOffset + 1; return offset; } MatchResult HlCHex::doMatch(const QString &text, int offset, const QStringList &) const { if (offset > 0 && !isWordDelimiter(text.at(offset - 1))) return offset; if (text.size() < offset + 3) return offset; if (text.at(offset) != QLatin1Char('0') || (text.at(offset + 1) != QLatin1Char('x') && text.at(offset + 1) != QLatin1Char('X'))) return offset; if (!isHexChar(text.at(offset + 2))) return offset; offset += 3; while (offset < text.size() && isHexChar(text.at(offset))) ++offset; // TODO Kate matches U/L suffix, QtC does not? return offset; } MatchResult HlCOct::doMatch(const QString &text, int offset, const QStringList &) const { if (offset > 0 && !isWordDelimiter(text.at(offset - 1))) return offset; if (text.size() < offset + 2) return offset; if (text.at(offset) != QLatin1Char('0')) return offset; if (!isOctalChar(text.at(offset + 1))) return offset; offset += 2; while (offset < text.size() && isOctalChar(text.at(offset))) ++offset; return offset; } MatchResult HlCStringChar::doMatch(const QString &text, int offset, const QStringList &) const { return matchEscapedChar(text, offset); } QString IncludeRules::contextName() const { return m_contextName; } QString IncludeRules::definitionName() const { return m_defName; } bool IncludeRules::includeAttribute() const { return m_includeAttribute; } bool IncludeRules::doLoad(QXmlStreamReader &reader) { const auto s = reader.attributes().value(QLatin1String("context")); #if QT_VERSION < QT_VERSION_CHECK(5, 15, 0) const auto split = s.split(QLatin1String("##"), QString::KeepEmptyParts); #else const auto split = s.split(QLatin1String("##"), Qt::KeepEmptyParts); #endif if (split.isEmpty()) return false; m_contextName = split.at(0).toString(); if (split.size() > 1) m_defName = split.at(1).toString(); m_includeAttribute = Xml::attrToBool(reader.attributes().value(QLatin1String("includeAttrib"))); return !m_contextName.isEmpty() || !m_defName.isEmpty(); } MatchResult IncludeRules::doMatch(const QString &text, int offset, const QStringList &) const { Q_UNUSED(text); qCWarning(Log) << "Unresolved include rule for" << m_contextName << "##" << m_defName; return offset; } MatchResult Int::doMatch(const QString &text, int offset, const QStringList &) const { if (offset > 0 && !isWordDelimiter(text.at(offset - 1))) return offset; while (offset < text.size() && text.at(offset).isDigit()) ++offset; return offset; } bool KeywordListRule::doLoad(QXmlStreamReader &reader) { /** * get our keyword list, if not found => bail out */ auto defData = DefinitionData::get(definition()); m_keywordList = defData->keywordList(reader.attributes().value(QLatin1String("String")).toString()); if (!m_keywordList) { return false; } /** * we might overwrite the case sensitivity * then we need to init the list for lookup of that sensitivity setting */ if (reader.attributes().hasAttribute(QLatin1String("insensitive"))) { m_hasCaseSensitivityOverride = true; m_caseSensitivityOverride = Xml::attrToBool(reader.attributes().value(QLatin1String("insensitive"))) ? Qt::CaseInsensitive : Qt::CaseSensitive; m_keywordList->initLookupForCaseSensitivity(m_caseSensitivityOverride); } else { m_hasCaseSensitivityOverride = false; } return !m_keywordList->isEmpty(); } MatchResult KeywordListRule::doMatch(const QString &text, int offset, const QStringList &) const { auto newOffset = offset; while (text.size() > newOffset && !isWordDelimiter(text.at(newOffset))) ++newOffset; if (newOffset == offset) return offset; if (m_hasCaseSensitivityOverride) { if (m_keywordList->contains(text.midRef(offset, newOffset - offset), m_caseSensitivityOverride)) return newOffset; } else { if (m_keywordList->contains(text.midRef(offset, newOffset - offset))) return newOffset; } // we don't match, but we can skip until newOffset as we can't start a keyword in-between return MatchResult(offset, newOffset); } bool LineContinue::doLoad(QXmlStreamReader &reader) { const auto s = reader.attributes().value(QStringLiteral("char")); if (s.isEmpty()) m_char = QLatin1Char('\\'); else m_char = s.at(0); return true; } MatchResult LineContinue::doMatch(const QString &text, int offset, const QStringList &) const { if (offset == text.size() - 1 && text.at(offset) == m_char) return offset + 1; return offset; } bool RangeDetect::doLoad(QXmlStreamReader &reader) { const auto s1 = reader.attributes().value(QStringLiteral("char")); const auto s2 = reader.attributes().value(QStringLiteral("char1")); if (s1.isEmpty() || s2.isEmpty()) return false; m_begin = s1.at(0); m_end = s2.at(0); return true; } MatchResult RangeDetect::doMatch(const QString &text, int offset, const QStringList &) const { if (text.size() - offset < 2) return offset; if (text.at(offset) != m_begin) return offset; auto newOffset = offset + 1; while (newOffset < text.size()) { if (text.at(newOffset) == m_end) return newOffset + 1; ++newOffset; } return offset; } bool RegExpr::doLoad(QXmlStreamReader &reader) { m_regexp.setPattern(reader.attributes().value(QStringLiteral("String")).toString()); const auto isMinimal = Xml::attrToBool(reader.attributes().value(QStringLiteral("minimal"))); const auto isCaseInsensitive = Xml::attrToBool(reader.attributes().value(QStringLiteral("insensitive"))); m_regexp.setPatternOptions((isMinimal ? QRegularExpression::InvertedGreedinessOption : QRegularExpression::NoPatternOption) | (isCaseInsensitive ? QRegularExpression::CaseInsensitiveOption : QRegularExpression::NoPatternOption)); // optimize the pattern for the non-dynamic case, we use them OFTEN m_dynamic = Xml::attrToBool(reader.attributes().value(QStringLiteral("dynamic"))); if (!m_dynamic) { m_regexp.optimize(); } // always using m_regexp.isValid() would be better, but parses the regexp and thus is way too expensive for release builds if (Log().isDebugEnabled()) { if (!m_regexp.isValid()) qCDebug(Log) << "Invalid regexp:" << m_regexp.pattern(); } return !m_regexp.pattern().isEmpty(); } MatchResult RegExpr::doMatch(const QString &text, int offset, const QStringList &captures) const { /** * for dynamic case: create new pattern with right instantiation */ const auto ®exp = m_dynamic ? QRegularExpression(replaceCaptures(m_regexp.pattern(), captures, true), m_regexp.patternOptions()) : m_regexp; /** * match the pattern */ const auto result = regexp.match(text, offset, QRegularExpression::NormalMatch, QRegularExpression::DontCheckSubjectStringMatchOption); if (result.capturedStart() == offset) { /** * we only need to compute the captured texts if we have real capture groups * highlightings should only address %1..%.., see e.g. replaceCaptures * DetectChar ignores %0, too */ if (result.lastCapturedIndex() > 0) { return MatchResult(offset + result.capturedLength(), result.capturedTexts()); } /** * else: ignore the implicit 0 group we always capture, no need to allocate stuff for that */ return MatchResult(offset + result.capturedLength()); } /** * no match - * the pattern of a dynamic regex depends on the previous contexts - * so that skipOffset cannot be computed + * we can always compute the skip offset as the highlighter will invalidate the cache for changed captures for dynamic rules! */ - return MatchResult(offset, m_dynamic ? 0 : result.capturedStart()); + return MatchResult(offset, result.capturedStart()); } bool StringDetect::doLoad(QXmlStreamReader &reader) { m_string = reader.attributes().value(QStringLiteral("String")).toString(); m_caseSensitivity = Xml::attrToBool(reader.attributes().value(QStringLiteral("insensitive"))) ? Qt::CaseInsensitive : Qt::CaseSensitive; m_dynamic = Xml::attrToBool(reader.attributes().value(QStringLiteral("dynamic"))); return !m_string.isEmpty(); } MatchResult StringDetect::doMatch(const QString &text, int offset, const QStringList &captures) const { /** * for dynamic case: create new pattern with right instantiation */ const auto &pattern = m_dynamic ? replaceCaptures(m_string, captures, false) : m_string; if (text.midRef(offset, pattern.size()).compare(pattern, m_caseSensitivity) == 0) return offset + pattern.size(); return offset; } bool WordDetect::doLoad(QXmlStreamReader &reader) { m_word = reader.attributes().value(QStringLiteral("String")).toString(); m_caseSensitivity = Xml::attrToBool(reader.attributes().value(QStringLiteral("insensitive"))) ? Qt::CaseInsensitive : Qt::CaseSensitive; return !m_word.isEmpty(); } MatchResult WordDetect::doMatch(const QString &text, int offset, const QStringList &) const { if (text.size() - offset < m_word.size()) return offset; /** * detect delimiter characters on the inner and outer boundaries of the string * NOTE: m_word isn't empty */ if (offset > 0 && !isWordDelimiter(text.at(offset - 1)) && !isWordDelimiter(text.at(offset))) return offset; if (text.midRef(offset, m_word.size()).compare(m_word, m_caseSensitivity) != 0) return offset; if (text.size() == offset + m_word.size() || isWordDelimiter(text.at(offset + m_word.size())) || isWordDelimiter(text.at(offset + m_word.size() - 1))) return offset + m_word.size(); return offset; } diff --git a/src/lib/rule_p.h b/src/lib/rule_p.h index 213de9a..3a3dd41 100644 --- a/src/lib/rule_p.h +++ b/src/lib/rule_p.h @@ -1,289 +1,295 @@ /* Copyright (C) 2016 Volker Krause Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef KSYNTAXHIGHLIGHTING_RULE_P_H #define KSYNTAXHIGHLIGHTING_RULE_P_H #include "contextswitch_p.h" #include "definition.h" #include "definitionref_p.h" #include "foldingregion.h" #include "format.h" #include "keywordlist_p.h" #include "matchresult_p.h" #include #include #include QT_BEGIN_NAMESPACE class QXmlStreamReader; QT_END_NAMESPACE namespace KSyntaxHighlighting { class Rule { public: Rule() = default; virtual ~Rule() = default; typedef std::shared_ptr Ptr; Definition definition() const; void setDefinition(const Definition &def); const Format &attributeFormat() const { return m_attributeFormat; } const ContextSwitch &context() const { return m_context; } bool isLookAhead() const { return m_lookAhead; } + bool isDynamic() const + { + return m_dynamic; + } + bool firstNonSpace() const { return m_firstNonSpace; } int requiredColumn() const { return m_column; } const FoldingRegion &beginRegion() const { return m_beginRegion; } const FoldingRegion &endRegion() const { return m_endRegion; } bool load(QXmlStreamReader &reader); void resolveContext(); void resolveAttributeFormat(Context *lookupContext); virtual MatchResult doMatch(const QString &text, int offset, const QStringList &captures) const = 0; static Rule::Ptr create(const QStringRef &name); protected: virtual bool doLoad(QXmlStreamReader &reader); bool isWordDelimiter(QChar c) const; private: Q_DISABLE_COPY(Rule) DefinitionRef m_def; QString m_attribute; Format m_attributeFormat; ContextSwitch m_context; int m_column = -1; FoldingRegion m_beginRegion; FoldingRegion m_endRegion; bool m_firstNonSpace = false; bool m_lookAhead = false; // cache for DefinitionData::wordDelimiters, is accessed VERY often QStringRef m_wordDelimiter; + +protected: + bool m_dynamic = false; }; class AnyChar : public Rule { protected: bool doLoad(QXmlStreamReader &reader) override; MatchResult doMatch(const QString &text, int offset, const QStringList &) const override; private: QString m_chars; }; class DetectChar : public Rule { protected: bool doLoad(QXmlStreamReader &reader) override; MatchResult doMatch(const QString &text, int offset, const QStringList &captures) const override; private: QChar m_char; - bool m_dynamic = false; + int m_captureIndex = 0; }; class Detect2Char : public Rule { protected: bool doLoad(QXmlStreamReader &reader) override; MatchResult doMatch(const QString &text, int offset, const QStringList &captures) const override; private: QChar m_char1; QChar m_char2; }; class DetectIdentifier : public Rule { protected: MatchResult doMatch(const QString &text, int offset, const QStringList &) const override; }; class DetectSpaces : public Rule { protected: MatchResult doMatch(const QString &text, int offset, const QStringList &) const override; }; class Float : public Rule { protected: MatchResult doMatch(const QString &text, int offset, const QStringList &) const override; }; class IncludeRules : public Rule { public: QString contextName() const; QString definitionName() const; bool includeAttribute() const; protected: bool doLoad(QXmlStreamReader &reader) override; MatchResult doMatch(const QString &text, int offset, const QStringList &) const override; private: QString m_contextName; QString m_defName; bool m_includeAttribute; }; class Int : public Rule { protected: MatchResult doMatch(const QString &text, int offset, const QStringList &captures) const override; }; class HlCChar : public Rule { protected: MatchResult doMatch(const QString &text, int offset, const QStringList &) const override; }; class HlCHex : public Rule { protected: MatchResult doMatch(const QString &text, int offset, const QStringList &) const override; }; class HlCOct : public Rule { protected: MatchResult doMatch(const QString &text, int offset, const QStringList &) const override; }; class HlCStringChar : public Rule { protected: MatchResult doMatch(const QString &text, int offset, const QStringList &) const override; }; class KeywordListRule : public Rule { protected: bool doLoad(QXmlStreamReader &reader) override; MatchResult doMatch(const QString &text, int offset, const QStringList &) const override; private: KeywordList *m_keywordList; bool m_hasCaseSensitivityOverride; Qt::CaseSensitivity m_caseSensitivityOverride; }; class LineContinue : public Rule { protected: bool doLoad(QXmlStreamReader &reader) override; MatchResult doMatch(const QString &text, int offset, const QStringList &) const override; private: QChar m_char; }; class RangeDetect : public Rule { protected: bool doLoad(QXmlStreamReader &reader) override; MatchResult doMatch(const QString &text, int offset, const QStringList &) const override; private: QChar m_begin; QChar m_end; }; class RegExpr : public Rule { protected: bool doLoad(QXmlStreamReader &reader) override; MatchResult doMatch(const QString &text, int offset, const QStringList &captures) const override; private: QRegularExpression m_regexp; - bool m_dynamic = false; }; class StringDetect : public Rule { protected: bool doLoad(QXmlStreamReader &reader) override; MatchResult doMatch(const QString &text, int offset, const QStringList &captures) const override; private: QString m_string; Qt::CaseSensitivity m_caseSensitivity; - bool m_dynamic = false; }; class WordDetect : public Rule { protected: bool doLoad(QXmlStreamReader &reader) override; MatchResult doMatch(const QString &text, int offset, const QStringList &captures) const override; private: QString m_word; Qt::CaseSensitivity m_caseSensitivity; }; } #endif // KSYNTAXHIGHLIGHTING_RULE_P_H