diff --git a/autotests/src/regexpsearch_test.cpp b/autotests/src/regexpsearch_test.cpp --- a/autotests/src/regexpsearch_test.cpp +++ b/autotests/src/regexpsearch_test.cpp @@ -24,6 +24,8 @@ #include #include +#include + #include QTEST_MAIN(RegExpSearchTest) @@ -228,11 +230,11 @@ testNewRow() << "fe$" << Range(0, 0, 0, 8) << false << Range(0, 6, 0, 8); testNewRow() << "fe$" << Range(0, 7, 0, 8) << false << Range::invalid(); testNewRow() << "fe$" << Range(0, 6, 0, 8) << false << Range(0, 6, 0, 8); - // testNewRow() << "fe$" << Range(0, 0, 0, 5) << false << Range::invalid(); // only match at line end, fails + testNewRow() << "fe$" << Range(0, 0, 0, 5) << false << Range::invalid(); // only match at line end, fails testNewRow() << "fe$" << Range(0, 0, 0, 8) << true << Range(0, 6, 0, 8); testNewRow() << "fe$" << Range(0, 7, 0, 8) << true << Range::invalid(); testNewRow() << "fe$" << Range(0, 6, 0, 8) << true << Range(0, 6, 0, 8); - // testNewRow() << "fe$" << Range(0, 0, 0, 5) << true << Range::invalid(); // fails due to $-shortcoming in QRegExp + testNewRow() << "fe$" << Range(0, 0, 0, 5) << true << Range::invalid(); testNewRow() << "^fe fe fe$" << Range(0, 0, 0, 8) << false << Range(0, 0, 0, 8); testNewRow() << "^fe fe fe$" << Range(0, 3, 0, 8) << false << Range::invalid(); @@ -249,19 +251,22 @@ testNewRow() << "fe( fe)*" << Range(0, 0, 0, 8) << false << Range(0, 0, 0, 8); testNewRow() << "^fe( fe)*$" << Range(0, 3, 0, 8) << false << Range::invalid(); testNewRow() << "fe( fe)*$" << Range(0, 3, 0, 8) << false << Range(0, 3, 0, 8); - // testNewRow() << "^fe( fe)*$" << Range(0, 0, 0, 5) << false << Range::invalid(); // fails due to $-shortcoming in QRegExp - testNewRow() << "^fe( fe)*" << Range(0, 0, 0, 5) << false << Range(0, 0, 0, 5); + testNewRow() << "^fe( fe)*$" << Range(0, 0, 0, 5) << false << Range::invalid(); + // fails because the whole line is fed to QRegularExpression, then matches + // that end beyond the search range are rejected, see KateRegExpSearch::searchText() + // testNewRow() << "^fe( fe)*" << Range(0, 0, 0, 5) << false << Range(0, 0, 0, 5); + testNewRow() << "^fe( fe)*$" << Range(0, 0, 0, 8) << true << Range(0, 0, 0, 8); testNewRow() << "^fe( fe)*" << Range(0, 0, 0, 8) << true << Range(0, 0, 0, 8); - // testNewRow() << "fe( fe)*$" << Range(0, 0, 0, 8) << true << Range(0, 0, 0, 8); // fails, shouldn't matching be greedy? - // testNewRow() << "fe( fe)*" << Range(0, 0, 0, 8) << true << Range(0, 0, 0, 8); // fails, shouldn't matching be greedy? + testNewRow() << "fe( fe)*$" << Range(0, 0, 0, 8) << true << Range(0, 0, 0, 8); + testNewRow() << "fe( fe)*" << Range(0, 0, 0, 8) << true << Range(0, 0, 0, 8); testNewRow() << "^fe( fe)*$" << Range(0, 3, 0, 8) << true << Range::invalid(); - // testNewRow() << "fe( fe)*$" << Range(0, 3, 0, 8) << true << Range(0, 3, 0, 8); // fails, shouldn't matching be greedy? - // testNewRow() << "^fe( fe)*$" << Range(0, 0, 0, 5) << true << Range::invalid(); // fails due to $-shortcoming in QRegExp + testNewRow() << "fe( fe)*$" << Range(0, 3, 0, 8) << true << Range(0, 3, 0, 8); + testNewRow() << "^fe( fe)*$" << Range(0, 0, 0, 5) << true << Range::invalid(); testNewRow() << "^fe|fe$" << Range(0, 0, 0, 5) << false << Range(0, 0, 0, 2); testNewRow() << "^fe|fe$" << Range(0, 3, 0, 8) << false << Range(0, 6, 0, 8); - // testNewRow() << "^fe|fe$" << Range(0, 0, 0, 5) << true << Range(0, 0, 0, 2); // fails due to $-shortcoming in QRegExp + testNewRow() << "^fe|fe$" << Range(0, 0, 0, 5) << true << Range(0, 0, 0, 2); testNewRow() << "^fe|fe$" << Range(0, 3, 0, 8) << true << Range(0, 6, 0, 8); } @@ -275,15 +280,15 @@ KTextEditor::DocumentPrivate doc; doc.setText("fe fe fe"); - KateRegExpSearch searcher(&doc, Qt::CaseInsensitive); + KateRegExpSearch searcher(&doc); static int i = 0; if (i == 34 || i == 36) { qDebug() << i; } i++; - const Range result = searcher.search(pattern, inputRange, backwards)[0]; + const Range result = searcher.search(pattern, inputRange, backwards, QRegularExpression::CaseInsensitiveOption)[0]; QCOMPARE(result, expected); } @@ -293,8 +298,8 @@ KTextEditor::DocumentPrivate doc; doc.setText(" \\piinfercong"); - KateRegExpSearch search(&doc, Qt::CaseSensitive); - const Range result = search.search("\\\\piinfer(\\w)", Range(0, 2, 0, 15))[0]; + KateRegExpSearch searcher(&doc); + const Range result = searcher.search("\\\\piinfer(\\w)", Range(0, 2, 0, 15), false)[0]; QCOMPARE(result, Range(0, 2, 0, 11)); } @@ -304,8 +309,8 @@ KTextEditor::DocumentPrivate doc; doc.setText("foobar foo bar foo bar foo"); - KateRegExpSearch search(&doc, Qt::CaseSensitive); - const Range result = search.search("foo", Range(0, 0, 0, 15), true)[0]; + KateRegExpSearch searcher(&doc); + const Range result = searcher.search("foo", Range(0, 0, 0, 15), true)[0]; QCOMPARE(result, Range(0, 7, 0, 10)); } @@ -315,8 +320,8 @@ KTextEditor::DocumentPrivate doc; doc.setText("\\newcommand{\\piReductionOut}"); - KateRegExpSearch search(&doc, Qt::CaseSensitive); - const QVector result = search.search("\\\\piReduction(\\S)", Range(0, 10, 0, 28), true); + KateRegExpSearch searcher(&doc); + const QVector result = searcher.search("\\\\piReduction(\\S)", Range(0, 10, 0, 28), true); QCOMPARE(result.size(), 2); QCOMPARE(result[0], Range(0, 12, 0, 25)); diff --git a/autotests/src/searchbar_test.cpp b/autotests/src/searchbar_test.cpp --- a/autotests/src/searchbar_test.cpp +++ b/autotests/src/searchbar_test.cpp @@ -513,12 +513,10 @@ KateSearchBar bar(true, &view, &config); - bar.setSearchPattern("$"); + bar.setSearchPattern(QStringLiteral("$")); bar.setSearchMode(KateSearchBar::MODE_REGEX); bar.setReplacementPattern("D"); - bar.replaceAll(); - QCOMPARE(doc.text(), QString("aaaD\nbbbD\ncccD\nD\nD\naaaD\nbbbD\ncccD\ndddD\n")); } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -131,7 +131,6 @@ render/katelinelayout.cpp # search stuff -search/kateregexp.cpp search/kateplaintextsearch.cpp search/kateregexpsearch.cpp search/katematch.cpp diff --git a/src/document/katedocument.cpp b/src/document/katedocument.cpp --- a/src/document/katedocument.cpp +++ b/src/document/katedocument.cpp @@ -36,7 +36,6 @@ #include "katemodemanager.h" #include "katepartdebug.h" #include "kateplaintextsearch.h" -#include "kateregexp.h" #include "kateregexpsearch.h" #include "katerenderer.h" #include "kateschema.h" @@ -1772,8 +1771,10 @@ if (regexMode) { // regexp search // escape sequences are supported by definition - KateRegExpSearch searcher(this, caseSensitivity); - return searcher.search(pattern, range, backwards); + QRegularExpression::PatternOptions patternOptions; + patternOptions |= (caseSensitivity == Qt::CaseInsensitive) ? QRegularExpression::CaseInsensitiveOption : QRegularExpression::NoPatternOption; + KateRegExpSearch searcher(this); + return searcher.search(pattern, range, backwards, patternOptions); } if (escapeSequences) { diff --git a/src/search/kateplaintextsearch.cpp b/src/search/kateplaintextsearch.cpp --- a/src/search/kateplaintextsearch.cpp +++ b/src/search/kateplaintextsearch.cpp @@ -57,7 +57,10 @@ // escape dot and friends const QString workPattern = QStringLiteral("\\b%1\\b").arg(QRegularExpression::escape(text)); - return KateRegExpSearch(m_document, m_caseSensitivity).search(workPattern, inputRange, backwards).at(0); + QRegularExpression::PatternOptions options; + options |= m_caseSensitivity == Qt::CaseInsensitive ? QRegularExpression::CaseInsensitiveOption : QRegularExpression::NoPatternOption; + + return KateRegExpSearch(m_document).search(workPattern, inputRange, backwards, options).at(0); } if (text.isEmpty() || !inputRange.isValid() || (inputRange.start() == inputRange.end())) { diff --git a/src/search/kateregexp.h b/src/search/kateregexp.h deleted file mode 100644 --- a/src/search/kateregexp.h +++ /dev/null @@ -1,99 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.0-or-later - - Copyright (C) 2009 Bernhard Beschow - Copyright (C) 2007 Sebastian Pipping - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public License - along with this library; see the file COPYING.LIB. If not, write to - the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - Boston, MA 02110-1301, USA. -*/ - -#ifndef _KATE_REGEXP_H_ -#define _KATE_REGEXP_H_ - -#include - -class KateRegExp -{ -public: - explicit KateRegExp(const QString &pattern, Qt::CaseSensitivity cs = Qt::CaseSensitive, QRegExp::PatternSyntax syntax = QRegExp::RegExp2); - - bool isEmpty() const - { - return m_regExp.isEmpty(); - } - bool isValid() const - { - return m_regExp.isValid(); - } - QString pattern() const - { - return m_regExp.pattern(); - } - int numCaptures() const - { - return m_regExp.captureCount(); - } - int pos(int nth = 0) const - { - return m_regExp.pos(nth); - } - QString cap(int nth = 0) const - { - return m_regExp.cap(nth); - } - int matchedLength() const - { - return m_regExp.matchedLength(); - } - - int indexIn(const QString &str, int offset, int end) const; - - /** - * This function is a replacement for QRegExp.lastIndexIn that - * returns the last match that would have been found when - * searching forwards, which QRegExp.lastIndexIn does not. - * We need this behavior to allow the user to jump back to - * the last match. - * - * \param str Text to search in - * \param offset Offset (-1 starts from end, -2 from one before the end) - * \return Index of match or -1 if no match is found - */ - int lastIndexIn(const QString &str, int offset, int end) const; - - /** - * Repairs a regular Expression pattern. - * This is a workaround to make "." and "\s" not match - * newlines, which currently is the unconfigurable - * default in QRegExp. - * - * \param stillMultiLine Multi-line after reparation flag - * \return Number of replacements done - */ - int repairPattern(bool &stillMultiLine); - - /** - * States, whether the pattern matches multiple lines, - * even if it was repaired using @p repairPattern(). - * - * \return Whether the pattern matches multiple lines - */ - bool isMultiLine() const; - -private: - QRegExp m_regExp; -}; - -#endif // KATEREGEXP_H diff --git a/src/search/kateregexp.cpp b/src/search/kateregexp.cpp deleted file mode 100644 --- a/src/search/kateregexp.cpp +++ /dev/null @@ -1,295 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.0-or-later - - Copyright (C) 2009 Bernhard Beschow - Copyright (C) 2007 Sebastian Pipping - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public License - along with this library; see the file COPYING.LIB. If not, write to - the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - Boston, MA 02110-1301, USA. -*/ - -#include "kateregexp.h" - -KateRegExp::KateRegExp(const QString &pattern, Qt::CaseSensitivity cs, QRegExp::PatternSyntax syntax) - : m_regExp(pattern, cs, syntax) -{ -} - -// these things can besides '.' and '\s' make pattern multi-line: -// \n, \x000A, \x????-\x????, \0012, \0???-\0??? -// a multi-line pattern must not pass as single-line, the other -// way around will just result in slower searches and is therefore -// not as critical -int KateRegExp::repairPattern(bool &stillMultiLine) -{ - const QString &text = pattern(); // read-only input for parsing - - // get input - const int inputLen = text.length(); - int input = 0; // walker index - - // prepare output - QString output; - output.reserve(2 * inputLen + 1); // twice should be enough for the average case - - // parser state - stillMultiLine = false; - int replaceCount = 0; - bool insideClass = false; - - while (input < inputLen) { - if (insideClass) { - // wait for closing, unescaped ']' - switch (text[input].unicode()) { - case L'\\': - switch (text[input + 1].unicode()) { - case L'x': - if (input + 5 < inputLen) { - // copy "\x????" unmodified - output.append(text.midRef(input, 6)); - input += 6; - } else { - // copy "\x" unmodified - output.append(text.midRef(input, 2)); - input += 2; - } - stillMultiLine = true; - break; - - case L'0': - if (input + 4 < inputLen) { - // copy "\0???" unmodified - output.append(text.midRef(input, 5)); - input += 5; - } else { - // copy "\0" unmodified - output.append(text.midRef(input, 2)); - input += 2; - } - stillMultiLine = true; - break; - - case L's': - // replace "\s" with "[ \t]" - output.append(QLatin1String(" \\t")); - input += 2; - replaceCount++; - break; - - case L'n': - stillMultiLine = true; - // FALLTROUGH - Q_FALLTHROUGH(); - default: - // copy "\?" unmodified - output.append(text.midRef(input, 2)); - input += 2; - } - break; - - case L']': - // copy "]" unmodified - insideClass = false; - output.append(text[input]); - input++; - break; - - default: - // copy "?" unmodified - output.append(text[input]); - input++; - } - } else { - // search for real dots and \S - switch (text[input].unicode()) { - case L'\\': - switch (text[input + 1].unicode()) { - case L'x': - if (input + 5 < inputLen) { - // copy "\x????" unmodified - output.append(text.midRef(input, 6)); - input += 6; - } else { - // copy "\x" unmodified - output.append(text.midRef(input, 2)); - input += 2; - } - stillMultiLine = true; - break; - - case L'0': - if (input + 4 < inputLen) { - // copy "\0???" unmodified - output.append(text.midRef(input, 5)); - input += 5; - } else { - // copy "\0" unmodified - output.append(text.midRef(input, 2)); - input += 2; - } - stillMultiLine = true; - break; - - case L's': - // replace "\s" with "[ \t]" - output.append(QLatin1String("[ \\t]")); - input += 2; - replaceCount++; - break; - - case L'n': - stillMultiLine = true; - // FALLTROUGH - Q_FALLTHROUGH(); - default: - // copy "\?" unmodified - output.append(text.midRef(input, 2)); - input += 2; - } - break; - - case L'.': - // replace " with "[^\n]" - output.append(QLatin1String("[^\\n]")); - input++; - replaceCount++; - break; - - case L'[': - // copy "]" unmodified - insideClass = true; - output.append(text[input]); - input++; - break; - - default: - // copy "?" unmodified - output.append(text[input]); - input++; - } - } - } - - // Overwrite with repaired pattern - m_regExp.setPattern(output); - return replaceCount; -} - -bool KateRegExp::isMultiLine() const -{ - const QString &text = pattern(); - - // parser state - bool insideClass = false; - - for (int input = 0; input < text.length(); /*empty*/) { - if (insideClass) { - // wait for closing, unescaped ']' - switch (text[input].unicode()) { - case L'\\': - switch (text[input + 1].unicode()) { - case L'x': - return true; - - case L'0': - return true; - - case L's': - // replace "\s" with "[ \t]" - input += 2; - break; - - case L'n': - return true; - // FALLTROUGH - - default: - // copy "\?" unmodified - input += 2; - } - break; - - case L']': - // copy "]" unmodified - insideClass = false; - input++; - break; - - default: - // copy "?" unmodified - input++; - } - } else { - // search for real dots and \S - switch (text[input].unicode()) { - case L'\\': - switch (text[input + 1].unicode()) { - case L'x': - return true; - - case L'0': - return true; - - case L's': - // replace "\s" with "[ \t]" - input += 2; - break; - - case L'n': - return true; - - default: - // copy "\?" unmodified - input += 2; - } - break; - - case L'.': - // replace " with "[^\n]" - input++; - break; - - case L'[': - // copy "]" unmodified - insideClass = true; - input++; - break; - - default: - // copy "?" unmodified - input++; - } - } - } - - return false; -} - -int KateRegExp::indexIn(const QString &str, int start, int end) const -{ - return m_regExp.indexIn(str.left(end), start, QRegExp::CaretAtZero); -} - -int KateRegExp::lastIndexIn(const QString &str, int start, int end) const -{ - const int index = m_regExp.lastIndexIn(str.mid(start, end - start), -1, QRegExp::CaretAtZero); - - if (index == -1) { - return -1; - } - - const int index2 = m_regExp.indexIn(str.left(end), start + index, QRegExp::CaretAtZero); - - return index2; -} diff --git a/src/search/kateregexpsearch.h b/src/search/kateregexpsearch.h --- a/src/search/kateregexpsearch.h +++ b/src/search/kateregexpsearch.h @@ -23,6 +23,7 @@ #define _KATE_REGEXPSEARCH_H_ #include +#include #include @@ -41,7 +42,7 @@ class KTEXTEDITOR_EXPORT KateRegExpSearch { public: - explicit KateRegExpSearch(const KTextEditor::Document *document, Qt::CaseSensitivity caseSensitivity); + explicit KateRegExpSearch(const KTextEditor::Document *document); ~KateRegExpSearch(); // @@ -51,17 +52,21 @@ /** * Search for the regular expression \p pattern inside the range * \p inputRange. If \p backwards is \e true, the search direction will - * be reversed. + * be reversed. \p options is a set of QRegularExpression::PatternOptions + * OR flags that control certain aspects of the search, e.g. case + * sensitivity and if the dot "." metacharacter matches any character + * including a newline. * * \param pattern text to search for * \param inputRange Range to search in * \param backwards if \e true, the search will be backwards + * \param options QRegularExpression pattern options * \return Vector of ranges, one for each capture. The first range (index zero) - * spans the full match. If the pattern does not match the vector - * has length 1 and holds the invalid range (see Range::isValid()). + * spans the full match. If the pattern does not match, the vector + * will contain one element, an invalid range (see Range::isValid()). * \see KTextEditor::Range, QRegularExpression */ - QVector search(const QString &pattern, const KTextEditor::Range &inputRange, bool backwards = false); + QVector search(const QString &pattern, const KTextEditor::Range &inputRange, bool backwards = false, QRegularExpression::PatternOptions options = QRegularExpression::NoPatternOption); /** * Returns a modified version of text where escape sequences are resolved, e.g. "\\n" to "\n". @@ -96,9 +101,20 @@ */ static QString buildReplacement(const QString &text, const QStringList &capturedTexts, int replacementCounter, bool replacementGoodies); + /** + * Checks the search pattern for escape sequences that can make a match span multiple lines. And + * in that case sets \param stillMultiLine to true. + * + * "\s" is treated specially so that it doesn't match new line characters; this achieved by + * replacing any occurences of "\s" with "[ \t]" in the search @param pattern. + * \param pattern the regular expression search pattern + * \param stillMultiLine is \c true if the search pattern can still match multiple lines even + * after replacing "\s" with "[ \t]"; otherwise it's false + */ + QString repairPattern(const QString &pattern, bool &stillMultiLine); + private: const KTextEditor::Document *const m_document; - Qt::CaseSensitivity m_caseSensitivity; class ReplacementStream; }; diff --git a/src/search/kateregexpsearch.cpp b/src/search/kateregexpsearch.cpp --- a/src/search/kateregexpsearch.cpp +++ b/src/search/kateregexpsearch.cpp @@ -21,7 +21,6 @@ // BEGIN includes #include "kateregexpsearch.h" -#include "kateregexp.h" #include // END includes @@ -161,9 +160,8 @@ // // KateSearch Constructor // -KateRegExpSearch::KateRegExpSearch(const KTextEditor::Document *document, Qt::CaseSensitivity caseSensitivity) +KateRegExpSearch::KateRegExpSearch(const KTextEditor::Document *document) : m_document(document) - , m_caseSensitivity(caseSensitivity) { } @@ -191,266 +189,297 @@ int closeIndex; }; -QVector KateRegExpSearch::search(const QString &pattern, const KTextEditor::Range &inputRange, bool backwards) +QVector KateRegExpSearch::search(const QString &pattern, const KTextEditor::Range &inputRange, bool backwards, QRegularExpression::PatternOptions options) { - // regex search - KateRegExp regexp(pattern, m_caseSensitivity); - - if (regexp.isEmpty() || !regexp.isValid() || !inputRange.isValid() || (inputRange.start() == inputRange.end())) { - QVector result; - result.append(KTextEditor::Range::invalid()); - return result; + // Enable multiline mode: + // - "^" will match at the beginning of the subject string and will + // also match right after a newline character (unless the newline + // is the last character in the searched range) + // - "$" will match at the end of the subject string and will also match + // right before a newline character + // + // This is neccessary because of the way Ranges are passed to QRegularExpression, + // from its POV, the end of the range is the end of the string it's + // matching against so it would match "$" at the end of the range _but_ + // "$" must not match in the middle of a document line, that's why here + // QRegularExpression is fed whole lines, regardless of where the range + // ends, then matches that aren't contained in the searched range are rejected. + options |= QRegularExpression::MultilineOption; + + QRegularExpression regexp; + + bool stillMultiLine; + const QString repairedPattern = repairPattern(pattern, stillMultiLine); + + regexp.setPattern(repairedPattern); + regexp.setPatternOptions(options); + + // returned if no matches are found + QVector noResult(1, KTextEditor::Range::invalid()); + + if (pattern.isEmpty() || !regexp.isValid() || !inputRange.isValid() || inputRange.isEmpty()) { + return noResult; } - // detect pattern type (single- or mutli-line) - bool isMultiLine; + const int rangeStartLine = inputRange.start().line(); + const int rangeStartCol = inputRange.start().column(); - // detect '.' and '\s' and fix them - const bool dotMatchesNewline = false; // TODO - const int replacements = regexp.repairPattern(isMultiLine); - if (dotMatchesNewline && (replacements > 0)) { - isMultiLine = true; - } + const int rangeEndLine = inputRange.end().line(); + const int rangeEndCol = inputRange.end().column(); - const int firstLineIndex = inputRange.start().line(); - const int minColStart = inputRange.start().column(); - // const int maxColEnd = inputRange.end().column(); - if (isMultiLine) { - // multi-line regex search (both forward and backward mode) - QString wholeDocument; - const int inputLineCount = inputRange.end().line() - inputRange.start().line() + 1; - FAST_DEBUG("multi line search (lines " << firstLineIndex << ".." << firstLineIndex + inputLineCount - 1 << ")"); + if (stillMultiLine) { + const int inputLineCount = rangeEndLine - rangeStartLine + 1; + FAST_DEBUG("regular expression search (lines " << rangeStartLine << ".." << rangeEndLine << ")"); // nothing to do... - if (firstLineIndex >= m_document->lines()) { - QVector result; - result.append(KTextEditor::Range::invalid()); - return result; + if (rangeStartLine >= m_document->lines()) { + return noResult; } QVector lineLens(inputLineCount); + int maxMatchOffset = 0; + + // all lines in the input range + QString wholeRange; + const QString sep = QLatin1String("\n"); + for (int i = 0; i < inputLineCount; ++i) { + const int docLineIndex = rangeStartLine + i; + if (docLineIndex < 0 || m_document->lines() <= docLineIndex) { + return noResult; + } - // first line - if (firstLineIndex < 0 || m_document->lines() <= firstLineIndex) { - QVector result; - result.append(KTextEditor::Range::invalid()); - return result; + const QString textLine = m_document->line(docLineIndex); + lineLens[i] = textLine.length(); + wholeRange.append(textLine); + if (i != rangeEndLine) { + wholeRange.append(sep); + } + + maxMatchOffset += (i == rangeEndLine) ? rangeEndCol : lineLens[i] + 1; + + FAST_DEBUG(" line" << i << "has length" << lineLens[i]); } - const QString firstLine = m_document->line(firstLineIndex); + FAST_DEBUG("Max. match offset" << maxMatchOffset); - const int firstLineLen = firstLine.length() - minColStart; - wholeDocument.append(firstLine.rightRef(firstLineLen)); - lineLens[0] = firstLineLen; - FAST_DEBUG(" line" << 0 << "has length" << lineLens[0]); + QRegularExpressionMatch match; - // second line and after - for (int i = 1; i < inputLineCount; i++) { - const int lineNum = firstLineIndex + i; - if (lineNum < 0 || m_document->lines() <= lineNum) { - QVector result; - result.append(KTextEditor::Range::invalid()); - return result; - } - const QString text = m_document->line(lineNum); + bool found = false; + QRegularExpressionMatch curMatch; - lineLens[i] = text.length(); - wholeDocument.append(QLatin1Char('\n')); - wholeDocument.append(text); - FAST_DEBUG(" line" << i << "has length" << lineLens[i]); + QRegularExpressionMatchIterator iter = regexp.globalMatch(wholeRange, rangeStartCol); + + if (backwards) { + while (iter.hasNext()) { + curMatch = iter.next(); + if (curMatch.hasMatch() && curMatch.capturedEnd() <= maxMatchOffset) { + match.swap(curMatch); + found = true; + } + } + } else { /* forwards */ + if (iter.hasNext()) { + curMatch = iter.next(); + } + if (curMatch.hasMatch() && curMatch.capturedEnd() <= maxMatchOffset) { + match.swap(curMatch); + found = true; + } } - const int pos = backwards ? regexp.lastIndexIn(wholeDocument, 0, wholeDocument.length()) : regexp.indexIn(wholeDocument, 0, wholeDocument.length()); - if (pos == -1) { + if (!found) { // no match FAST_DEBUG("not found"); - { - QVector result; - result.append(KTextEditor::Range::invalid()); - return result; - } + return noResult; } -#ifdef FAST_DEBUG_ENABLE - const int matchLen = regexp.matchedLength(); - FAST_DEBUG("found at relative pos " << pos << ", length " << matchLen); -#endif - - // save opening and closing indices and build a map. - // the correct values will be written into it later. - QMap indicesToCursors; - const int numCaptures = regexp.numCaptures(); - QVector indexPairs(1 + numCaptures); - for (int z = 0; z <= numCaptures; z++) { - const int openIndex = regexp.pos(z); - IndexPair &pair = indexPairs[z]; - if (openIndex == -1) { - // empty capture gives invalid - pair.openIndex = -1; - pair.closeIndex = -1; - FAST_DEBUG("capture []"); - } else { - const int closeIndex = openIndex + regexp.cap(z).length(); - pair.openIndex = openIndex; - pair.closeIndex = closeIndex; - FAST_DEBUG("capture [" << pair.openIndex << ".." << pair.closeIndex << "]"); - - // each key no more than once - if (!indicesToCursors.contains(openIndex)) { - TwoViewCursor *twoViewCursor = new TwoViewCursor; - twoViewCursor->index = openIndex; - indicesToCursors.insert(openIndex, twoViewCursor); - FAST_DEBUG(" border index added: " << openIndex); - } - if (!indicesToCursors.contains(closeIndex)) { - TwoViewCursor *twoViewCursor = new TwoViewCursor; - twoViewCursor->index = closeIndex; - indicesToCursors.insert(closeIndex, twoViewCursor); - FAST_DEBUG(" border index added: " << closeIndex); + if (found) { + // save opening and closing indices and build a map. + // the correct values will be written into it later. + QMap indicesToCursors; + const int numCaptures = regexp.captureCount(); + QVector indexPairs(numCaptures + 1); + for (int c = 0; c <= numCaptures; ++c) { + const int openIndex = match.capturedStart(c); + IndexPair &pair = indexPairs[c]; + if (openIndex == -1) { + // empty capture gives invalid + pair.openIndex = -1; + pair.closeIndex = -1; + FAST_DEBUG("capture []"); + } else { + const int closeIndex = match.capturedEnd(c); + pair.openIndex = openIndex; + pair.closeIndex = closeIndex; + FAST_DEBUG("capture [" << pair.openIndex << ".." << pair.closeIndex << "]"); + + // each key no more than once + if (!indicesToCursors.contains(openIndex)) { + TwoViewCursor *twoViewCursor = new TwoViewCursor; + twoViewCursor->index = openIndex; + indicesToCursors.insert(openIndex, twoViewCursor); + FAST_DEBUG(" border index added: " << openIndex); + } + if (!indicesToCursors.contains(closeIndex)) { + TwoViewCursor *twoViewCursor = new TwoViewCursor; + twoViewCursor->index = closeIndex; + indicesToCursors.insert(closeIndex, twoViewCursor); + FAST_DEBUG(" border index added: " << closeIndex); + } } } - } - - // find out where they belong - int curRelLine = 0; - int curRelCol = 0; - int curRelIndex = 0; - QMap::const_iterator iter = indicesToCursors.constBegin(); - while (iter != indicesToCursors.constEnd()) { - // forward to index, save line/col - const int index = (*iter)->index; - FAST_DEBUG("resolving position" << index); - TwoViewCursor &twoViewCursor = *(*iter); - while (curRelIndex <= index) { - FAST_DEBUG("walk pos (" << curRelLine << "," << curRelCol << ") = " << curRelIndex << "relative, steps more to go" << index - curRelIndex); - const int curRelLineLen = lineLens[curRelLine]; - const int curLineRemainder = curRelLineLen - curRelCol; - const int lineFeedIndex = curRelIndex + curLineRemainder; - if (index <= lineFeedIndex) { - if (index == lineFeedIndex) { - // on this line _on_ line feed - FAST_DEBUG(" on line feed"); - const int absLine = curRelLine + firstLineIndex; - twoViewCursor.openLine = twoViewCursor.closeLine = absLine; - twoViewCursor.openCol = twoViewCursor.closeCol = ((curRelLine == 0) ? minColStart : 0) + curRelLineLen; + // find out where they belong + int curRelLine = 0; + int curRelCol = 0; + int curRelIndex = 0; + + auto iter = indicesToCursors.constBegin(); + while (iter != indicesToCursors.constEnd()) { + // forward to index, save line/col + const int index = (*iter)->index; + FAST_DEBUG("resolving position" << index); + TwoViewCursor &twoViewCursor = *(*iter); + while (curRelIndex <= index) { + FAST_DEBUG("walk pos (" << curRelLine << "," << curRelCol << ") = " << curRelIndex << "relative, steps more to go" << index - curRelIndex); + + const int curRelLineLen = lineLens[curRelLine]; + const int curLineRemainder = curRelLineLen - curRelCol; + const int lineFeedIndex = curRelIndex + curLineRemainder; + if (index <= lineFeedIndex) { + if (index == lineFeedIndex) { + // on this line _on_ line feed + FAST_DEBUG(" on line feed"); + const int absLine = curRelLine + rangeStartLine; + twoViewCursor.openLine = twoViewCursor.closeLine = absLine; + twoViewCursor.openCol = twoViewCursor.closeCol = curRelLineLen; + + // advance to next line + const int advance = (index - curRelIndex) + 1; + ++curRelLine; + curRelCol = 0; + curRelIndex += advance; + } else { // index < lineFeedIndex + // on this line _before_ line feed + FAST_DEBUG(" before line feed"); + const int diff = (index - curRelIndex); + const int absLine = curRelLine + rangeStartLine; + const int absCol = curRelCol + diff; + twoViewCursor.openLine = twoViewCursor.closeLine = absLine; + twoViewCursor.openCol = twoViewCursor.closeCol = absCol; + + // advance on same line + const int advance = diff + 1; + curRelCol += advance; + curRelIndex += advance; + } + FAST_DEBUG("open(" << twoViewCursor.openLine << "," << twoViewCursor.openCol << ") close(" << twoViewCursor.closeLine << "," << twoViewCursor.closeCol << ")"); + } else { // if (index > lineFeedIndex) + // not on this line // advance to next line - const int advance = (index - curRelIndex) + 1; - curRelLine++; + FAST_DEBUG(" not on this line"); + ++curRelLine; curRelCol = 0; - curRelIndex += advance; - } else { // index < lineFeedIndex - // on this line _before_ line feed - FAST_DEBUG(" before line feed"); - const int diff = (index - curRelIndex); - const int absLine = curRelLine + firstLineIndex; - const int absCol = ((curRelLine == 0) ? minColStart : 0) + curRelCol + diff; - twoViewCursor.openLine = twoViewCursor.closeLine = absLine; - twoViewCursor.openCol = twoViewCursor.closeCol = absCol; - - // advance on same line - const int advance = diff + 1; - curRelCol += advance; + const int advance = curLineRemainder + 1; curRelIndex += advance; } - FAST_DEBUG("open(" << twoViewCursor.openLine << "," << twoViewCursor.openCol << ") close(" << twoViewCursor.closeLine << "," << twoViewCursor.closeCol << ")"); - } else { // if (index > lineFeedIndex) - // not on this line - // advance to next line - FAST_DEBUG(" not on this line"); - const int advance = curLineRemainder + 1; - curRelLine++; - curRelCol = 0; - curRelIndex += advance; } + ++iter; } - ++iter; - } + // build result array + QVector result(numCaptures + 1, KTextEditor::Range::invalid()); + for (int y = 0; y <= numCaptures; y++) { + IndexPair &pair = indexPairs[y]; + if (!(pair.openIndex == -1) || !(pair.closeIndex == -1)) { + const TwoViewCursor *const openCursors = indicesToCursors[pair.openIndex]; + const TwoViewCursor *const closeCursors = indicesToCursors[pair.closeIndex]; + const int startLine = openCursors->openLine; + const int startCol = openCursors->openCol; + const int endLine = closeCursors->closeLine; + const int endCol = closeCursors->closeCol; + FAST_DEBUG("range " << y << ": (" << startLine << ", " << startCol << ")..(" << endLine << ", " << endCol << ")"); + result[y] = KTextEditor::Range(startLine, startCol, endLine, endCol); + } + } - // build result array - QVector result(1 + numCaptures); - for (int y = 0; y <= numCaptures; y++) { - IndexPair &pair = indexPairs[y]; - if ((pair.openIndex == -1) || (pair.closeIndex == -1)) { - result[y] = KTextEditor::Range::invalid(); - } else { - const TwoViewCursor *const openCursors = indicesToCursors[pair.openIndex]; - const TwoViewCursor *const closeCursors = indicesToCursors[pair.closeIndex]; - const int startLine = openCursors->openLine; - const int startCol = openCursors->openCol; - const int endLine = closeCursors->closeLine; - const int endCol = closeCursors->closeCol; - FAST_DEBUG("range " << y << ": (" << startLine << ", " << startCol << ")..(" << endLine << ", " << endCol << ")"); - result[y] = KTextEditor::Range(startLine, startCol, endLine, endCol); + // free structs allocated for indicesToCursors + iter = indicesToCursors.constBegin(); + while (iter != indicesToCursors.constEnd()) { + TwoViewCursor *const twoViewCursor = *iter; + delete twoViewCursor; + ++iter; } - } - // free structs allocated for indicesToCursors - iter = indicesToCursors.constBegin(); - while (iter != indicesToCursors.constEnd()) { - TwoViewCursor *const twoViewCursor = *iter; - delete twoViewCursor; - ++iter; + return result; } - return result; } else { // single-line regex search (both forward of backward mode) - const int minLeft = inputRange.start().column(); - const uint maxRight = inputRange.end().column(); // first not included - const int forMin = inputRange.start().line(); - const int forMax = inputRange.end().line(); - const int forInit = backwards ? forMax : forMin; + const int rangeStartCol = inputRange.start().column(); + const uint rangeEndCol = inputRange.end().column(); + + const int rangeStartLine = inputRange.start().line(); + const int rangeEndLine = inputRange.end().line(); + + const int forInit = backwards ? rangeEndLine : rangeStartLine; + const int forInc = backwards ? -1 : +1; - FAST_DEBUG("single line " << (backwards ? forMax : forMin) << ".." << (backwards ? forMin : forMax)); - for (int j = forInit; (forMin <= j) && (j <= forMax); j += forInc) { + + FAST_DEBUG("single line " << (backwards ? rangeEndLine : rangeStartLine) << ".." << (backwards ? rangeStartLine : rangeEndLine)); + + for (int j = forInit; (rangeStartLine <= j) && (j <= rangeEndLine); j += forInc) { if (j < 0 || m_document->lines() <= j) { FAST_DEBUG("searchText | line " << j << ": no"); - QVector result; - result.append(KTextEditor::Range::invalid()); - return result; + return noResult; } + const QString textLine = m_document->line(j); - // Find (and don't match ^ in between...) - const int first = (j == forMin) ? minLeft : 0; - const int last = (j == forMax) ? maxRight : textLine.length(); - const int foundAt = (backwards ? regexp.lastIndexIn(textLine, first, last) : regexp.indexIn(textLine, first, last)); - const bool found = (foundAt != -1); - - /* - TODO do we still need this? - - // A special case which can only occur when searching with a regular expression consisting - // only of a lookahead (e.g. ^(?=\{) for a function beginning without selecting '{'). - if (myMatchLen == 0 && line == startPosition.line() && foundAt == (uint) col) - { - if (col < lineLength(line)) - col++; - else { - line++; - col = 0; - } - continue; + const int offset = (j == rangeStartLine) ? rangeStartCol : 0; + const int endLineMaxOffset = (j == rangeEndLine) ? rangeEndCol : textLine.length(); + + bool found = false; + + QRegularExpressionMatch match; + + if (backwards) { + QRegularExpressionMatchIterator iter = regexp.globalMatch(textLine, offset); + while (iter.hasNext()) { + QRegularExpressionMatch curMatch = iter.next(); + if (curMatch.hasMatch() && curMatch.capturedEnd() <= endLineMaxOffset) { + match.swap(curMatch); + found = true; + } + } + } else { + match = regexp.match(textLine, offset); + if (match.hasMatch() && match.capturedEnd() <= endLineMaxOffset) { + found = true; } - */ + } if (found) { FAST_DEBUG("line " << j << ": yes"); // build result array - const int numCaptures = regexp.numCaptures(); - QVector result(1 + numCaptures); - result[0] = KTextEditor::Range(j, foundAt, j, foundAt + regexp.matchedLength()); - FAST_DEBUG("result range " << 0 << ": (" << j << ", " << foundAt << ")..(" << j << ", " << foundAt + regexp.matchedLength() << ")"); - for (int y = 1; y <= numCaptures; y++) { - const int openIndex = regexp.pos(y); + const int numCaptures = regexp.captureCount(); + QVector result(numCaptures + 1); + result[0] = KTextEditor::Range(j, match.capturedStart(), j, match.capturedEnd()); + + FAST_DEBUG("result range " << 0 << ": (" << j << ", " << match.capturedStart << ")..(" << j << ", " << match.capturedEnd() << ")"); + + for (int y = 1; y <= numCaptures; ++y) { + const int openIndex = match.capturedStart(y); + if (openIndex == -1) { result[y] = KTextEditor::Range::invalid(); + FAST_DEBUG("capture []"); } else { - const int closeIndex = openIndex + regexp.cap(y).length(); + const int closeIndex = match.capturedEnd(y); + FAST_DEBUG("result range " << y << ": (" << j << ", " << openIndex << ")..(" << j << ", " << closeIndex << ")"); + result[y] = KTextEditor::Range(j, openIndex, j, closeIndex); } } @@ -460,10 +489,7 @@ } } } - - QVector result; - result.append(KTextEditor::Range::invalid()); - return result; + return noResult; } /*static*/ QString KateRegExpSearch::escapePlaintext(const QString &text) @@ -722,6 +748,152 @@ return out.str(); } +QString KateRegExpSearch::repairPattern(const QString &pattern, bool &stillMultiLine) +{ + // these characters can make a pattern multi-line: + // \n, \x000A, \x????-\x????, \0012, \0???-\0??? + // a multi-line pattern must not pass as single-line, the other + // way around will just result in slower searches and is therefore + // not as critical + + const int inputLen = pattern.length(); + + // prepare output + QString output; + output.reserve(2 * inputLen + 1); // twice should be enough for the average case + + // parser state + bool insideClass = false; + + stillMultiLine = false; + int input = 0; + while (input < inputLen) { + if (insideClass) { + // wait for closing, unescaped ']' + switch (pattern[input].unicode()) { + case L'\\': + switch (pattern[input + 1].unicode()) { + case L'x': + if (input + 5 < inputLen) { + // copy "\x????" unmodified + output.append(pattern.midRef(input, 6)); + input += 6; + } else { + // copy "\x" unmodified + output.append(pattern.midRef(input, 2)); + input += 2; + } + stillMultiLine = true; + break; + + case L'0': + if (input + 4 < inputLen) { + // copy "\0???" unmodified + output.append(pattern.midRef(input, 5)); + input += 5; + } else { + // copy "\0" unmodified + output.append(pattern.midRef(input, 2)); + input += 2; + } + stillMultiLine = true; + break; + + case L's': + // replace "\s" with "[ \t]" + output.append(QLatin1String(" \\t")); + input += 2; + break; + + case L'n': + stillMultiLine = true; + // FALLTROUGH + Q_FALLTHROUGH(); + + default: + // copy "\?" unmodified + output.append(pattern.midRef(input, 2)); + input += 2; + } + break; + + case L']': + // copy "]" unmodified + insideClass = false; + output.append(pattern[input]); + ++input; + break; + + default: + // copy "?" unmodified + output.append(pattern[input]); + ++input; + } + } else { + // search for real dots and \s + switch (pattern[input].unicode()) { + case L'\\': + switch (pattern[input + 1].unicode()) { + case L'x': + if (input + 5 < inputLen) { + // copy "\x????" unmodified + output.append(pattern.midRef(input, 6)); + input += 6; + } else { + // copy "\x" unmodified + output.append(pattern.midRef(input, 2)); + input += 2; + } + stillMultiLine = true; + break; + + case L'0': + if (input + 4 < inputLen) { + // copy "\0???" unmodified + output.append(pattern.midRef(input, 5)); + input += 5; + } else { + // copy "\0" unmodified + output.append(pattern.midRef(input, 2)); + input += 2; + } + stillMultiLine = true; + break; + + case L's': + // replace "\s" with "[ \t]" + output.append(QLatin1String("[ \\t]")); + input += 2; + break; + + case L'n': + stillMultiLine = true; + // FALLTROUGH + Q_FALLTHROUGH(); + default: + // copy "\?" unmodified + output.append(pattern.midRef(input, 2)); + input += 2; + } + break; + + case L'[': + // copy "[" unmodified + insideClass = true; + output.append(pattern[input]); + ++input; + break; + + default: + // copy "?" unmodified + output.append(pattern[input]); + ++input; + } + } + } + return output; +} + // Kill our helpers again #ifdef FAST_DEBUG_ENABLE #undef FAST_DEBUG_ENABLE diff --git a/src/search/katesearchbar.h b/src/search/katesearchbar.h --- a/src/search/katesearchbar.h +++ b/src/search/katesearchbar.h @@ -194,7 +194,6 @@ void addCurrentTextToHistory(QComboBox *combo); void backupConfig(bool ofPower); void sendConfig(); - void fixForSingleLine(KTextEditor::Range &range, SearchDirection searchDirection); void showResultMessage(); void showSearchWrappedHint(SearchDirection searchDirection); diff --git a/src/search/katesearchbar.cpp b/src/search/katesearchbar.cpp --- a/src/search/katesearchbar.cpp +++ b/src/search/katesearchbar.cpp @@ -27,7 +27,6 @@ #include "katedocument.h" #include "kateglobal.h" #include "katematch.h" -#include "kateregexp.h" #include "katerenderer.h" #include "kateundomanager.h" #include "kateview.h" @@ -478,42 +477,6 @@ return isPower() ? m_powerUi->matchCase->isChecked() : m_incUi->matchCase->isChecked(); } -void KateSearchBar::fixForSingleLine(Range &range, SearchDirection searchDirection) -{ - FAST_DEBUG("Single-line workaround checking BEFORE" << range); - if (searchDirection == SearchForward) { - const int line = range.start().line(); - const int col = range.start().column(); - const int maxColWithNewline = m_view->document()->lineLength(line) + 1; - if (col == maxColWithNewline) { - FAST_DEBUG("Starting on a newline" << range); - const int maxLine = m_view->document()->lines() - 1; - if (line < maxLine) { - range.setRange(Cursor(line + 1, 0), range.end()); - FAST_DEBUG("Search range fixed to " << range); - } else { - FAST_DEBUG("Already at last line"); - range = Range::invalid(); - } - } - } else { - const int col = range.end().column(); - if (col == 0) { - FAST_DEBUG("Ending after a newline" << range); - const int line = range.end().line(); - if (line > 0) { - const int maxColWithNewline = m_view->document()->lineLength(line - 1); - range.setRange(range.start(), Cursor(line - 1, maxColWithNewline)); - FAST_DEBUG("Search range fixed to " << range); - } else { - FAST_DEBUG("Already at first line"); - range = Range::invalid(); - } - } - } - FAST_DEBUG("Single-line workaround checking AFTER" << range); -} - void KateSearchBar::onReturnPressed() { const Qt::KeyboardModifiers modifiers = QApplication::keyboardModifiers(); @@ -574,46 +537,50 @@ } FAST_DEBUG("Search range is" << inputRange); - { - const bool regexMode = enabledOptions.testFlag(Regex); - const bool multiLinePattern = regexMode ? KateRegExp(searchPattern()).isMultiLine() : false; - - // Single-line pattern workaround - if (regexMode && !multiLinePattern) { - fixForSingleLine(inputRange, searchDirection); - } - } - KateMatch match(m_view->doc(), enabledOptions); Range afterReplace = Range::invalid(); // Find, first try match.searchText(inputRange, searchPattern()); - if (match.isValid() && match.range() == selection) { - // Same match again - if (replacement != nullptr) { - // Selection is match -> replace - KTextEditor::MovingRange *smartInputRange = m_view->doc()->newMovingRange(inputRange, KTextEditor::MovingRange::ExpandLeft | KTextEditor::MovingRange::ExpandRight); - afterReplace = match.replace(*replacement, m_view->blockSelection()); - inputRange = *smartInputRange; - delete smartInputRange; - } + if (match.isValid()) { + if (match.range() == selection) { + // Same match again + if (replacement != nullptr) { + // Selection is match -> replace + KTextEditor::MovingRange *smartInputRange = m_view->doc()->newMovingRange(inputRange, KTextEditor::MovingRange::ExpandLeft | KTextEditor::MovingRange::ExpandRight); + afterReplace = match.replace(*replacement, m_view->blockSelection()); + inputRange = *smartInputRange; + delete smartInputRange; + } - if (!selectionOnly()) { - // Find, second try after old selection - if (searchDirection == SearchForward) { - const Cursor start = (replacement != nullptr) ? afterReplace.end() : selection.end(); - inputRange.setRange(start, inputRange.end()); - } else { - const Cursor end = (replacement != nullptr) ? afterReplace.start() : selection.start(); - inputRange.setRange(inputRange.start(), end); + if (!selectionOnly()) { + // Find, second try after old selection + if (searchDirection == SearchForward) { + const Cursor start = (replacement != nullptr) ? afterReplace.end() : selection.end(); + inputRange.setRange(start, inputRange.end()); + } else { + const Cursor end = (replacement != nullptr) ? afterReplace.start() : selection.start(); + inputRange.setRange(inputRange.start(), end); + } } - } - // Single-line pattern workaround - fixForSingleLine(inputRange, searchDirection); + match.searchText(inputRange, searchPattern()); - match.searchText(inputRange, searchPattern()); + } else if (match.isEmpty()) { + // valid, zero-length match, e.g.: '^', '$', '\b' + // advance the range to avoid looping + KTextEditor::DocumentCursor zeroLenMatch(m_view->doc(), match.range().end()); + + if (searchDirection == SearchForward) { + zeroLenMatch.move(1); + inputRange.setRange(zeroLenMatch.toCursor(), inputRange.end()); + } else { // SearchBackward + zeroLenMatch.move(-1); + inputRange.setRange(inputRange.start(), zeroLenMatch.toCursor()); + } + + match.searchText(inputRange, searchPattern()); + } } bool askWrap = !match.isValid() && (!selection.isValid() || !selectionOnly()); @@ -795,9 +762,6 @@ { const SearchOptions enabledOptions = searchOptions(SearchForward); - const bool regexMode = enabledOptions.testFlag(Regex); - const bool multiLinePattern = regexMode ? KateRegExp(searchPattern()).isMultiLine() : false; - // we highlight all ranges of a replace, up to some hard limit // e.g. if you replace 100000 things, rendering will break down otherwise ;=) const int maxHighlightings = 65536; @@ -861,10 +825,6 @@ // Can happen for regex patterns like "^". // If we don't advance here we will loop forever... workingStart.move(1); - } else if (regexMode && !multiLinePattern && workingStart.atEndOfLine()) { - // single-line regexps might match the naked line end - // therefore we better advance to the next line - workingStart.move(1); } m_workingRange->setRange(workingStart.toCursor(), m_workingRange->end()); @@ -1146,12 +1106,13 @@ addMenuManager.addEntry(QStringLiteral("^"), QString(), i18n("Beginning of line")); addMenuManager.addEntry(QStringLiteral("$"), QString(), i18n("End of line")); addMenuManager.addSeparator(); - addMenuManager.addEntry(QStringLiteral("."), QString(), i18n("Any single character (excluding line breaks)")); - addMenuManager.addSeparator(); + addMenuManager.addEntry(QStringLiteral("."), QString(), i18n("Match any character execluding new line (by default)")); addMenuManager.addEntry(QStringLiteral("+"), QString(), i18n("One or more occurrences")); addMenuManager.addEntry(QStringLiteral("*"), QString(), i18n("Zero or more occurrences")); addMenuManager.addEntry(QStringLiteral("?"), QString(), i18n("Zero or one occurrences")); addMenuManager.addEntry(QStringLiteral("{a"), QStringLiteral(",b}"), i18n(" through occurrences"), QStringLiteral("{"), QStringLiteral(",}")); + + addMenuManager.addSeparator(); addMenuManager.addSeparator(); addMenuManager.addEntry(QStringLiteral("("), QStringLiteral(")"), i18n("Group, capturing")); addMenuManager.addEntry(QStringLiteral("|"), QString(), i18n("Or")); @@ -1186,20 +1147,25 @@ addMenuManager.addEntry(QStringLiteral("\\d"), QString(), i18n("Digit")); addMenuManager.addEntry(QStringLiteral("\\D"), QString(), i18n("Non-digit")); addMenuManager.addEntry(QStringLiteral("\\s"), QString(), i18n("Whitespace (excluding line breaks)")); - addMenuManager.addEntry(QStringLiteral("\\S"), QString(), i18n("Non-whitespace (excluding line breaks)")); + addMenuManager.addEntry(QStringLiteral("\\S"), QString(), i18n("Non-whitespace")); addMenuManager.addEntry(QStringLiteral("\\w"), QString(), i18n("Word character (alphanumerics plus '_')")); addMenuManager.addEntry(QStringLiteral("\\W"), QString(), i18n("Non-word character")); } addMenuManager.addEntry(QStringLiteral("\\0???"), QString(), i18n("Octal character 000 to 377 (2^8-1)"), QStringLiteral("\\0")); - addMenuManager.addEntry(QStringLiteral("\\x????"), QString(), i18n("Hex character 0000 to FFFF (2^16-1)"), QStringLiteral("\\x")); + addMenuManager.addEntry(QStringLiteral("\\x{????}"), QString(), i18n("Hex character 0000 to FFFF (2^16-1)"), QStringLiteral("\\x{....}")); addMenuManager.addEntry(QStringLiteral("\\\\"), QString(), i18n("Backslash")); if (forPattern && regexMode) { addMenuManager.addSeparator(); addMenuManager.addEntry(QStringLiteral("(?:E"), QStringLiteral(")"), i18n("Group, non-capturing"), QStringLiteral("(?:")); - addMenuManager.addEntry(QStringLiteral("(?=E"), QStringLiteral(")"), i18n("Lookahead"), QStringLiteral("(?=")); + addMenuManager.addEntry(QStringLiteral("(?=E"), QStringLiteral(")"), i18n("Positive Lookahead"), QStringLiteral("(?=")); addMenuManager.addEntry(QStringLiteral("(?!E"), QStringLiteral(")"), i18n("Negative lookahead"), QStringLiteral("(?!")); + // variable length positive/negative lookbehind is an experimental feature in Perl 5.30 + // see: https://perldoc.perl.org/perlre.html + // currently QRegularExpression only supports fixed-length positive/negative lookbehind (2020-03-01) + addMenuManager.addEntry(QStringLiteral("(?<=E"), QStringLiteral(")"), i18n("Fixed-length positive lookbehind"), QStringLiteral("(?<=")); + addMenuManager.addEntry(QStringLiteral("(?(); } - return m_regExpSearch.search(m_findPattern, KTextEditor::Range(m_currentSearchPos, m_doc->documentEnd())); + QRegularExpression::PatternOptions options; + if (m_caseSensitive == Qt::CaseInsensitive) { + options |= (QRegularExpression::CaseInsensitiveOption); + } + return m_regExpSearch.search(m_findPattern, KTextEditor::Range(m_currentSearchPos, m_doc->documentEnd()), false /* search backwards */, options); } QString KateCommands::SedReplace::InteractiveSedReplacer::replacementTextForCurrentMatch()