diff --git a/autotests/ktexttohtmltest.cpp b/autotests/ktexttohtmltest.cpp --- a/autotests/ktexttohtmltest.cpp +++ b/autotests/ktexttohtmltest.cpp @@ -439,6 +439,59 @@ << KTextToHTML::Options(KTextToHTML::PreserveSpaces) << "@@ -55,6 +55,10 @@ xsi:schemaLocation="http://www.kde.org/standards/kcfg/1.0 http://www.kde.org/"; + const auto opt = KTextToHTML::PreserveSpaces | KTextToHTML::ConvertPhoneNumbers; + // tel: urls + QTest::newRow("tel url compact") << "bla bla bla bla" << opt + << "bla bla <tel:+491234567890> bla bla"; + QTest::newRow("tel url fancy") << "bla bla tel:+49-321-123456 bla bla" << opt + << "bla bla tel:+49-321-123456 bla bla"; + + // negative tel: url tests + QTest::newRow("empty tel url") << "bla tel: blub" << opt + << "bla tel: blub"; + + // phone numbers + QTest::newRow("tel compact international") << "call +49123456789, then hang up" << opt + << "call +49123456789, then hang up"; + QTest::newRow("tel parenthesis/spaces international") << "phone:+33 (01) 12 34 56 78 blub" << opt + << "phone:+33 (01) 12 34 56 78 blub"; + QTest::newRow("tel dashes international") << "bla +44-321-1-234-567" << opt + << "bla +44-321-1-234-567"; + QTest::newRow("tel dashes/spaces international") << "+1 123-456-7000 blub" << opt + << "+1 123-456-7000 blub"; + QTest::newRow("tel spaces international") << "bla +32 1 234 5678 blub" << opt + << "bla +32 1 234 5678 blub"; + QTest::newRow("tel slash domestic") << "bla 030/12345678 blub" << opt + << "bla 030/12345678 blub"; + QTest::newRow("tel slash/space domestic") << "Tel.: 089 / 12 34 56 78" << opt + << "Tel.: 089 / 12 34 56 78"; + QTest::newRow("tel follow by parenthesis") << "Telefon: 0 18 05 / 12 23 46 (14 Cent/Min.*)" << opt + << "Telefon: 0 18 05 / 12 23 46 (14 Cent/Min.*)"; + QTest::newRow("tel space single digit at end") << "0123/123 456 7" << opt + << "0123/123 456 7"; + QTest::newRow("tel space around dash") << "bla +49 (0) 12 23 - 45 6000 blub" << opt + << "bla +49 (0) 12 23 - 45 6000 blub"; + QTest::newRow("tel two numbers speparated by dash") << "bla +49 (0) 12 23 46 78 - +49 0123/123 456 78 blub" << opt + << "bla +49 (0) 12 23 46 78 - +49 0123/123 456 78 blub"; + + // negative tests for phone numbers + QTest::newRow("non-tel number") << "please send 1200 cakes" << opt + << "please send 1200 cakes"; + QTest::newRow("non-tel alpha-numeric") << "bla 1-123-456-ABCD blub" << opt + << "bla 1-123-456-ABCD blub"; + QTest::newRow("non-tel alpha prefix") << "ABCD0123-456-789" << opt + << "ABCD0123-456-789"; + QTest::newRow("non-tel date") << "bla 02/03/2019 blub" << opt + << "bla 02/03/2019 blub"; + QTest::newRow("non-tel too long") << "bla +012-4567890123456 blub" << opt + << "bla +012-4567890123456 blub"; + QTest::newRow("non-tel unbalanced") << "bla +012-456789(01 blub" << opt + << "bla +012-456789(01 blub"; + QTest::newRow("non-tel nested") << "bla +012-4(56(78)90)1 blub" << opt + << "bla +012-4(56(78)90)1 blub"; + QTest::newRow("tel extraction disabled") << "call +49123456789 now" + << KTextToHTML::Options(KTextToHTML::PreserveSpaces) + << "call +49123456789 now"; } diff --git a/src/lib/text/ktexttohtml.h b/src/lib/text/ktexttohtml.h --- a/src/lib/text/ktexttohtml.h +++ b/src/lib/text/ktexttohtml.h @@ -60,7 +60,13 @@ * Interpret text highlighting markup, like *bold*, _underline_ and /italic/, * and wrap them in corresponding HTML entities. */ - HighlightText = 1 << 4 + HighlightText = 1 << 4, + + /** + * Replace phone numbers with tel: links. + * @since 5.56.0 + */ + ConvertPhoneNumbers = 1 << 5 }; Q_DECLARE_FLAGS(Options, Option) diff --git a/src/lib/text/ktexttohtml.cpp b/src/lib/text/ktexttohtml.cpp --- a/src/lib/text/ktexttohtml.cpp +++ b/src/lib/text/ktexttohtml.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -143,6 +144,72 @@ return address; } +QString KTextToHTMLHelper::getPhoneNumber() +{ + if (!mText[mPos].isDigit() && mText[mPos] != QLatin1Char('+')) { + return {}; + } + + static const QString allowedBeginSeparators = QStringLiteral(" \r\t\n:"); + if (mPos > 0 && !allowedBeginSeparators.contains(mText[mPos - 1])) { + return {}; + } + + // this isn't 100% accurate, we filter stuff below that is too hard to capture with a regexp + static const QRegularExpression telPattern(QStringLiteral(R"([+0](( |( ?[/-] ?)?)\(?\d+\)?+){6,30})")); + const auto match = telPattern.match(mText, mPos, QRegularExpression::NormalMatch, QRegularExpression::AnchoredMatchOption); + if (match.hasMatch()) { + auto m = match.captured(); + // check for maximum number of digits (15), see https://en.wikipedia.org/wiki/Telephone_numbering_plan + if (std::count_if(m.begin(), m.end(), [](const QChar &c) { return c.isDigit(); }) > 15) { + return {}; + } + // only one / is allowed, otherwise we trigger on dates + if (std::count(m.begin(), m.end(), QLatin1Char('/')) > 1) { + return {}; + } + + // parenthesis need to be balanced, and must not be nested + int openIdx = -1; + for (int i = 0; i < m.size(); ++i) { + if ((m[i] == QLatin1Char('(') && openIdx >= 0) || (m[i] == QLatin1Char(')') && openIdx < 0)) { + return {}; + } + if (m[i] == QLatin1Char('(')) { + openIdx = i; + } else if (m[i] == QLatin1Char(')')) { + openIdx = -1; + } + } + if (openIdx > 0) { + m = m.left(openIdx - 1).trimmed(); + } + + // check if there's a plausible separator at the end + static const QString allowedEndSeparators = QStringLiteral(" \r\t\n,."); + const auto l = m.size(); + if (mText.size() > mPos + l && !allowedEndSeparators.contains(mText[mPos + l])) { + return {}; + } + + mPos += l - 1; + return m; + } + return {}; +} + +static QString normalizePhoneNumber(const QString &str) +{ + QString res; + res.reserve(str.size()); + for (const auto c : str) { + if (c.isDigit() || c == QLatin1Char('+')) { + res.push_back(c); + } + } + return res; +} + bool KTextToHTMLHelper::atUrl() const { // the following characters are allowed in a dot-atom (RFC 2822): @@ -170,7 +237,9 @@ (ch == QLatin1Char('w') && mText.midRef(mPos, 4) == QLatin1String("www.")) || (ch == QLatin1Char('f') && (mText.midRef(mPos, 4) == QLatin1String("ftp.") || mText.midRef(mPos, 7) == QLatin1String("file://"))) || - (ch == QLatin1Char('n') && mText.midRef(mPos, 5) == QLatin1String("news:")); + (ch == QLatin1Char('n') && mText.midRef(mPos, 5) == QLatin1String("news:")) || + (ch == QLatin1Char('t') && mText.midRef(mPos, 4) == QLatin1String("tel:")); + } bool KTextToHTMLHelper::isEmptyUrl(const QString &url) const @@ -188,7 +257,9 @@ url == QLatin1String("www") || url == QLatin1String("ftp") || url == QLatin1String("news") || - url == QLatin1String("news://"); + url == QLatin1String("news://") || + url == QLatin1String("tel") || + url == QLatin1String("tel:"); } QString KTextToHTMLHelper::getUrl(bool *badurl) @@ -499,6 +570,14 @@ x += str.length() - 1; continue; } + if (flags & ConvertPhoneNumbers) { + str = helper.getPhoneNumber(); + if (!str.isEmpty()) { + result += QLatin1String("") + str + QLatin1String(""); + x += str.length() - 1; + continue; + } + } } if (flags & HighlightText) { str = helper.highlightedText(); diff --git a/src/lib/text/ktexttohtml_p.h b/src/lib/text/ktexttohtml_p.h --- a/src/lib/text/ktexttohtml_p.h +++ b/src/lib/text/ktexttohtml_p.h @@ -47,6 +47,7 @@ KTextToHTMLEmoticonsInterface *emoticonsInterface() const; QString getEmailAddress(); + QString getPhoneNumber(); bool atUrl() const; bool isEmptyUrl(const QString &url) const; QString getUrl(bool *badurl = nullptr);