diff --git a/src/indexer/katehighlightingindexer.cpp b/src/indexer/katehighlightingindexer.cpp --- a/src/indexer/katehighlightingindexer.cpp +++ b/src/indexer/katehighlightingindexer.cpp @@ -32,8 +32,22 @@ #include #endif +#include +#include +#include + namespace { +namespace Xml { + +bool attrToBool(const QStringRef &str) +{ + return str == QLatin1String("1") || str.compare(QLatin1String("true"), Qt:: +CaseInsensitive) == 0; +} + +} + QStringList readListing(const QString &fileName) { QFile file(fileName); @@ -117,13 +131,15 @@ const QString c = xml.attributes().value(QLatin1String("char")).toString(); if (c.size() != 1) { qWarning() << hlFilename << "line" << xml.lineNumber() << "'char' must contain exactly one char:" << c; + return false; } } if (testChar1) { const QString c = xml.attributes().value(QLatin1String("char1")).toString(); if (c.size() != 1) { qWarning() << hlFilename << "line" << xml.lineNumber() << "'char1' must contain exactly one char:" << c; + return false; } } @@ -136,7 +152,7 @@ { if (xml.attributes().hasAttribute(QStringLiteral("lookAhead"))) { auto lookAhead = xml.attributes().value(QStringLiteral("lookAhead")); - if (lookAhead == QStringLiteral("true")) { + if (Xml::attrToBool(lookAhead)) { auto context = xml.attributes().value(QStringLiteral("context")); if (context == QStringLiteral("#stay")) { qWarning() << hlFilename << "line" << xml.lineNumber() << "Infinite loop: lookAhead with context #stay"; @@ -375,6 +391,227 @@ QSet m_existingAttributeNames; }; +//! Proposes to replace StringDetect with DetectChar or Detect2Chars +bool suggestForStringDetect(const QString &hlFilename, QXmlStreamReader &xml) +{ + if (xml.name() == QLatin1String("StringDetect") && !Xml::attrToBool(xml.attributes().value(QLatin1String("dynamic")))) { + if (!Xml::attrToBool(xml.attributes().value(QLatin1String("insensitive")))) { + const auto str = xml.attributes().value(QLatin1String("String")).toString(); + if (str.size() == 1) { + qWarning() << hlFilename << "line" << xml.lineNumber() << "StringDetect candidate for DetectChar:" << str; + return false; + } + else if (str.size() == 2) { + qWarning() << hlFilename << "line" << xml.lineNumber() << "StringDetect candidate for Detect2Chars:" << str; + return false; + } + } + } + + return true; +} + +//! Proposes to replace AnyChar with DetectChar +bool suggestForAnyChar(const QString &hlFilename, QXmlStreamReader &xml) +{ + if (xml.name() == QLatin1String("AnyChar")) { + const auto string = xml.attributes().value(QLatin1String("String")).toString(); + if (string.size() == 1) { + qWarning() << hlFilename << "line" << xml.lineNumber() << "AnyChar candidate for DetectChar:" << string; + return false; + } + } + + return true; +} + +struct SuggestPattern +{ + enum Flags + { + NoFlag, + HasInsensitiveAttribute = 1, + HasDynamicAttribute = 2, + OnlyIfColumn0 = 4, + }; + + QLatin1String suggest; + QRegularExpression regex; + Flags flags; + + SuggestPattern(const char *suggest, const QString &stringRegex, Flags flags) + : suggest(suggest) + , regex(stringRegex) + , flags(flags) + { + Q_ASSERT(regex.isValid()); + } +}; + +SuggestPattern::Flags operator|(const SuggestPattern::Flags &a, const SuggestPattern::Flags &b) +{ + return static_cast(int(a) | int(b)); +} + +#define EscapedChar R"(\\[0-9]{2,3}|\\x[0-9a-fA-F]{2}|\\[^bBwWsSdD])" +#define SingleChar "(?:" \ + EscapedChar \ +"|" "\\[(?:[^^\\\\]|" EscapedChar ")\\]" \ +"|" "[^^\\\\$.+*?|{[]" \ +")" +// ( `C` | `(C)` | `(?:C)` ) (`{1}` | `{1,1}`)? +#define CharReg "(?:(?:" SingleChar R"(|\((\?:)?)" SingleChar R"(\)(?:\{1\}|\{1,1\})?)))" +SuggestPattern suggestPatterns[]{ + {"DetectChar", QStringLiteral("^" CharReg "(?:{1(?:,1)?\\})?$"), SuggestPattern::HasInsensitiveAttribute}, + {"Detect2Chars", QStringLiteral("^" CharReg CharReg "(?:\\{1(?:,1)?)?\\}$|^" CharReg "(?:{2(?:,2)?\\})$"), SuggestPattern::NoFlag}, + {"AnyChar", QStringLiteral(R"(^\^?\[(?:\\\]|[^\]^])(?:\\\]|[^\]])*\](?:\|\[(?:\\\]|[^\]^])(?:\\\]|[^\]])*\])*\$?$)"), SuggestPattern::HasInsensitiveAttribute}, + // (`C` | `[aA]`)* for "[sS][tT][rR]" + {"StringDetect", QStringLiteral("^(?:" CharReg "|\\[[a-zA-Z]{2}\\])*$"), SuggestPattern::HasInsensitiveAttribute | SuggestPattern::HasDynamicAttribute}, + {"WordDetect", QStringLiteral(R"(^\\b)" CharReg R"(*\\b$)"), SuggestPattern::HasInsensitiveAttribute}, + {"WordDetect", QStringLiteral("^" CharReg R"(*\\b$)"), SuggestPattern::HasInsensitiveAttribute | SuggestPattern::OnlyIfColumn0}, + {"DetectIdentifier", QStringLiteral(R"(^\[(?:_|a-z){2}\]\[(?:(?:0-9|a-z|_){3}|\\w)\]\*$)"), SuggestPattern::NoFlag}, + {"DetectIdentifier", QStringLiteral(R"(^\[(?:_|a-z|A-Z){3}\]\[(?:(?:0-9|a-z|A-Z|_){4}|\\w)\]\*$)"), SuggestPattern::HasInsensitiveAttribute}, + {"DetectSpace", QStringLiteral(R"(^\\s\*?\$?$)"), SuggestPattern::HasInsensitiveAttribute}, + {"LineContinue", QStringLiteral("^" CharReg "\\$$"), SuggestPattern::NoFlag}, + {"HlCOct", QStringLiteral(R"(^0(?:\[0-7\]|\[01234567\])[+*]$)"), SuggestPattern::HasInsensitiveAttribute}, + {"HlCHex", QStringLiteral(R"(^0x\[(?:0-9|a-f|A-F){3}\][*+]$)"), SuggestPattern::NoFlag}, + {"HlCHex", QStringLiteral(R"(^0x\[(?:0-9|a-f|A-F){2,3}\][*+]$)"), SuggestPattern::HasInsensitiveAttribute}, + {"HlCStringChar", QStringLiteral(R"(^\\\[[abefnrtv"'?\\]{11,}\]$)"), SuggestPattern::NoFlag}, + {"HlCChar", QStringLiteral(R"(^'?(?:\((?:\?:)?)?(?:(?:)" + R"('?\[\^\\\\\]'?|)" + R"('?\\\\\[[abefnrtv"?\\']{12,}\]'?|)" + R"('?\\\\x\[(?:0-9|a-f|(?:A-F)?)+\](?:\{2(?:,2)?\})?'?|)" + R"('?\\\\0(?:\[0-7\]|\[01234567\])+(?:\{2(?:,2)?\})?'?)" + R"()\|?){3,}\)?'?$)"), SuggestPattern::NoFlag}, + {"Int", QStringLiteral(R"(^(?:\[0-9\]|\\d)(?:\+|(?:(?:\[0-9\]|\\d)\*))$)"), SuggestPattern::NoFlag}, + {"RangeDetect", QStringLiteral("^(" CharReg R"()\[\^\1\]\*\1$)"), SuggestPattern::NoFlag}, +}; +#undef CharReg +#undef SingleChar +#undef EscapedChar + +//! Proposes to replace RegExpr with another element (HlCHex, HlCStringChar, RangeDetect, etc) +bool suggestForRegExp(const QString &hlFilename, QXmlStreamReader &xml) +{ + if (xml.name() == QLatin1String("RegExpr")) { + auto string = xml.attributes().value(QLatin1String("String")).toString(); + if (xml.attributes().hasAttribute(QLatin1String("lookAhead"))) { + const auto lookAhead = xml.attributes().value(QLatin1String("lookAhead")); + if (string == QLatin1String("^$") || (Xml::attrToBool(lookAhead) && string == QLatin1String("."))) { + const auto context = xml.attributes().value(QLatin1String("context")); + qWarning() << hlFilename << "line" << xml.lineNumber() + << QStringLiteral("RegExpr candidate for fallthroughContext=\"") + context + QStringLiteral("\" fallthrough=\"true\""); + return false; + } + } + + const QString originalString = string; + const bool isInsensitive = Xml::attrToBool(xml.attributes().value(QLatin1String("insensitive"))); + const bool isDynamic = Xml::attrToBool(xml.attributes().value(QLatin1String("dynamic"))); + const bool asFirstNonSpace = string.startsWith(QLatin1String("^\\s*")); + if (asFirstNonSpace) { + string.remove(0, 4); + } + const bool asColumn0 = string.startsWith(QLatin1String("^")); + if (asColumn0) { + string.remove(0, 1); + } + const auto extraMessage = (asFirstNonSpace ? "with firstNonSpace=\"true\":" : (asColumn0 ? "with column=\"0\":" : ":")); + for (const auto &suggestPattern : suggestPatterns) { + if (isInsensitive && !bool(suggestPattern.flags & SuggestPattern::HasInsensitiveAttribute)) { + continue; + } + if (bool(suggestPattern.flags & SuggestPattern::OnlyIfColumn0) && xml.attributes().value(QLatin1String("column")) != QLatin1String("0")) { + continue; + } + if (!bool(suggestPattern.flags & SuggestPattern::HasDynamicAttribute) && isDynamic) { + if (string.contains(QLatin1String("%1")) || string.contains(QLatin1String("%2"))) { + continue; + } + } + if (suggestPattern.regex.match(string).hasMatch()) { + qWarning() << hlFilename << "line" << xml.lineNumber() << "RegExpr candidate for" << suggestPattern.suggest << extraMessage << originalString; + return false; + } + } + } + + return true; +} + +/** + * Helper class that suggests rule mergers + */ +class RuleMergeSuggest +{ +public: + bool suggest(const QString &hlFilename, QXmlStreamReader &xml) + { + if (xml.attributes().hasAttribute(QLatin1String("attribute")) && xml.name() != QLatin1String("context")) { + const std::size_t specificAttributeLength = 3; + const QStringRef values[] { + xml.attributes().value(QLatin1String("String")), + xml.attributes().value(QLatin1String("char")), + xml.attributes().value(QLatin1String("char1")), + xml.name(), + xml.attributes().value(QLatin1String("attribute")), + xml.attributes().value(QLatin1String("context")), + xml.attributes().value(QLatin1String("beginRegion")), + xml.attributes().value(QLatin1String("endRegion")), + Xml::attrToBool(xml.attributes().value(QLatin1String("lookAhead"))) ? QStringRef(&m_true) : QStringRef(&m_false), + Xml::attrToBool(xml.attributes().value(QLatin1String("firstNonSpace"))) ? QStringRef(&m_true) : QStringRef(&m_false), + xml.attributes().value(QLatin1String("column")), + }; + // check the size of the tables and display the number of elements in a compilation error in case of mismatch + std::extent::type{} = std::extent::type{}; + + if (std::equal(std::begin(values) + specificAttributeLength, std::end(values), std::begin(m_previousRule) + specificAttributeLength) + && (canBeMerged(m_previousRule, values) + || canBeMerged(values, m_previousRule) + || std::equal(std::begin(values), std::end(values) - specificAttributeLength, std::begin(m_previousRule)) + )) { + qWarning() << hlFilename << "line" << xml.lineNumber() << xml.name() << "can be merged with the previous rule"; + return false; + } + + std::size_t i = 0; + for (auto& stringRef : values) { + m_previousRule[i] = stringRef.toString(); + ++i; + } + } + else { + m_previousRule[3].clear(); + } + + return true; + } + +private: + template + bool canBeMerged(const Array1 &array1, const Array2 &array2) { + if (array1[3] == QLatin1String("RegExpr")) { + return true; + } + if (array1[3] == QLatin1String("DetectChar")) { + if (array2[3] == QLatin1String("AnyChar")) { + return true; + } + if (array2[3] == QLatin1String("RangeDetect")) { + const auto cleft = array2[1][0].digitValue(); + const auto cright = array2[2][0].digitValue(); + const auto c = array1[1][0].digitValue(); + return ((cleft <= c && c <= cright) || (c + 1 == cleft) || (cright + 1 == c)); + } + } + return false; + } + + QString m_previousRule[11]; + QString m_true = QStringLiteral("1"); + QString m_false = QStringLiteral("0"); +}; + } int main(int argc, char *argv[]) @@ -456,14 +693,14 @@ hl[QStringLiteral("priority")] = xml.attributes().value(QLatin1String("priority")).toInt(); // add boolean one - const QString hidden = xml.attributes().value(QLatin1String("hidden")).toString(); - hl[QStringLiteral("hidden")] = (hidden == QLatin1String("true") || hidden == QLatin1String("1")); + hl[QStringLiteral("hidden")] = Xml::attrToBool( xml.attributes().value(QLatin1String("hidden"))); // remember hl hls[QFileInfo(hlFile).fileName()] = hl; AttributeChecker attributeChecker(hlFilename); KeywordChecker keywordChecker(hlFilename); + RuleMergeSuggest ruleMergeSuggest; const QString hlName = hl[QStringLiteral("name")].toString(); // scan for broken regex or keywords with spaces @@ -505,6 +742,30 @@ anyError = 7; continue; } + + // suggestions for StringDetect + if (!suggestForStringDetect(hlFilename, xml)) { + //anyError = 12; + continue; + } + + // suggestions for AnyChar + if (!suggestForAnyChar(hlFilename, xml)) { + //anyError = 12; + continue; + } + + // suggestions for RegExp + if (!suggestForRegExp(hlFilename, xml)) { + //anyError = 12; + continue; + } + + // suggestions for a rule merge + if (ruleMergeSuggest.suggest(hlFilename, xml)) { + //anyError = 12; + continue; + } } if (!attributeChecker.check()) {