diff --git a/src/lib/text/ktexttohtml.cpp b/src/lib/text/ktexttohtml.cpp
index 5a8cdc7..62fd16e 100644
--- a/src/lib/text/ktexttohtml.cpp
+++ b/src/lib/text/ktexttohtml.cpp
@@ -1,608 +1,608 @@
/*
Copyright (c) 2002 Dave Corrie
Copyright (c) 2014 Daniel Vrátil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public License
along with this library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA.
*/
#include "ktexttohtml.h"
#include "ktexttohtml_p.h"
#include "ktexttohtmlemoticonsinterface.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include "kcoreaddons_debug.h"
static KTextToHTMLEmoticonsInterface *s_emoticonsInterface = nullptr;
static void loadEmoticonsPlugin()
{
static bool triedLoadPlugin = false;
if (!triedLoadPlugin) {
triedLoadPlugin = true;
// Check if QGuiApplication::platformName property exists. This is a
// hackish way of determining whether we are running QGuiApplication,
// because we cannot load the FrameworkIntegration plugin into a
// QCoreApplication, as it would crash immediately
if (qApp->metaObject()->indexOfProperty("platformName") > -1) {
QPluginLoader lib(QStringLiteral("kf5/KEmoticonsIntegrationPlugin"));
QObject *rootObj = lib.instance();
if (rootObj) {
s_emoticonsInterface = rootObj->property(KTEXTTOHTMLEMOTICONS_PROPERTY).value();
}
}
}
if (!s_emoticonsInterface) {
s_emoticonsInterface = new KTextToHTMLEmoticonsDummy();
}
}
KTextToHTMLHelper::KTextToHTMLHelper(const QString &plainText, int pos, int maxUrlLen, int maxAddressLen)
: mText(plainText)
, mMaxUrlLen(maxUrlLen)
, mMaxAddressLen(maxAddressLen)
, mPos(pos)
{
}
KTextToHTMLEmoticonsInterface* KTextToHTMLHelper::emoticonsInterface() const
{
if (!s_emoticonsInterface) {
loadEmoticonsPlugin();
}
return s_emoticonsInterface;
}
QString KTextToHTMLHelper::getEmailAddress()
{
QString address;
if (mText[mPos] == QLatin1Char('@')) {
// the following characters are allowed in a dot-atom (RFC 2822):
// a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~
static const QString allowedSpecialChars = QStringLiteral(".!#$%&'*+-/=?^_`{|}~");
// determine the local part of the email address
int start = mPos - 1;
while (start >= 0 && mText[start].unicode() < 128 &&
(mText[start].isLetterOrNumber() ||
mText[start] == QLatin1Char('@') || // allow @ to find invalid email addresses
allowedSpecialChars.indexOf(mText[start]) != -1)) {
if (mText[start] == QLatin1Char('@')) {
return QString(); // local part contains '@' -> no email address
}
--start;
}
++start;
// we assume that an email address starts with a letter or a digit
while ((start < mPos) && !mText[start].isLetterOrNumber()) {
++start;
}
if (start == mPos) {
return QString(); // local part is empty -> no email address
}
// determine the domain part of the email address
int dotPos = INT_MAX;
int end = mPos + 1;
while (end < mText.length() &&
(mText[end].isLetterOrNumber() ||
mText[end] == QLatin1Char('@') || // allow @ to find invalid email addresses
mText[end] == QLatin1Char('.') ||
mText[end] == QLatin1Char('-'))) {
if (mText[end] == QLatin1Char('@')) {
return QString(); // domain part contains '@' -> no email address
}
if (mText[end] == QLatin1Char('.')) {
dotPos = qMin(dotPos, end); // remember index of first dot in domain
}
++end;
}
// we assume that an email address ends with a letter or a digit
while ((end > mPos) && !mText[end - 1].isLetterOrNumber()) {
--end;
}
if (end == mPos) {
return QString(); // domain part is empty -> no email address
}
if (dotPos >= end) {
return QString(); // domain part doesn't contain a dot
}
if (end - start > mMaxAddressLen) {
return QString(); // too long -> most likely no email address
}
address = mText.mid(start, end - start);
mPos = end - 1;
}
return address;
}
QString KTextToHTMLHelper::getPhoneNumber()
{
if (!mText[mPos].isDigit() && mText[mPos] != QLatin1Char('+')) {
return {};
}
static const QString allowedBeginSeparators = QStringLiteral(" \r\t\n:");
if (mPos > 0 && !allowedBeginSeparators.contains(mText[mPos - 1])) {
return {};
}
// this isn't 100% accurate, we filter stuff below that is too hard to capture with a regexp
static const QRegularExpression telPattern(QStringLiteral(R"([+0](( |( ?[/-] ?)?)\(?\d+\)?+){6,30})"));
const auto match = telPattern.match(mText, mPos, QRegularExpression::NormalMatch, QRegularExpression::AnchoredMatchOption);
if (match.hasMatch()) {
auto m = match.captured();
// check for maximum number of digits (15), see https://en.wikipedia.org/wiki/Telephone_numbering_plan
if (std::count_if(m.begin(), m.end(), [](const QChar &c) { return c.isDigit(); }) > 15) {
return {};
}
// only one / is allowed, otherwise we trigger on dates
if (std::count(m.begin(), m.end(), QLatin1Char('/')) > 1) {
return {};
}
// parenthesis need to be balanced, and must not be nested
int openIdx = -1;
for (int i = 0; i < m.size(); ++i) {
if ((m[i] == QLatin1Char('(') && openIdx >= 0) || (m[i] == QLatin1Char(')') && openIdx < 0)) {
return {};
}
if (m[i] == QLatin1Char('(')) {
openIdx = i;
} else if (m[i] == QLatin1Char(')')) {
openIdx = -1;
}
}
if (openIdx > 0) {
m = m.left(openIdx - 1).trimmed();
}
// check if there's a plausible separator at the end
static const QString allowedEndSeparators = QStringLiteral(" \r\t\n,.");
const auto l = m.size();
if (mText.size() > mPos + l && !allowedEndSeparators.contains(mText[mPos + l])) {
return {};
}
mPos += l - 1;
return m;
}
return {};
}
static QString normalizePhoneNumber(const QString &str)
{
QString res;
res.reserve(str.size());
for (const auto c : str) {
if (c.isDigit() || c == QLatin1Char('+')) {
res.push_back(c);
}
}
return res;
}
bool KTextToHTMLHelper::atUrl() const
{
// the following characters are allowed in a dot-atom (RFC 2822):
// a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~
static const QString allowedSpecialChars = QStringLiteral(".!#$%&'*+-/=?^_`{|}~");
// the character directly before the URL must not be a letter, a number or
// any other character allowed in a dot-atom (RFC 2822).
if ((mPos > 0) &&
(mText[mPos - 1].isLetterOrNumber() ||
(allowedSpecialChars.indexOf(mText[mPos - 1]) != -1))) {
return false;
}
QChar ch = mText[mPos];
return
(ch == QLatin1Char('h') && (mText.midRef(mPos, 7) == QLatin1String("http://") ||
mText.midRef(mPos, 8) == QLatin1String("https://"))) ||
(ch == QLatin1Char('v') && mText.midRef(mPos, 6) == QLatin1String("vnc://")) ||
(ch == QLatin1Char('f') && (mText.midRef(mPos, 7) == QLatin1String("fish://") ||
mText.midRef(mPos, 6) == QLatin1String("ftp://") ||
mText.midRef(mPos, 7) == QLatin1String("ftps://"))) ||
(ch == QLatin1Char('s') && (mText.midRef(mPos, 7) == QLatin1String("sftp://") ||
mText.midRef(mPos, 6) == QLatin1String("smb://"))) ||
(ch == QLatin1Char('m') && mText.midRef(mPos, 7) == QLatin1String("mailto:")) ||
(ch == QLatin1Char('w') && mText.midRef(mPos, 4) == QLatin1String("www.")) ||
(ch == QLatin1Char('f') && (mText.midRef(mPos, 4) == QLatin1String("ftp.") ||
mText.midRef(mPos, 7) == QLatin1String("file://"))) ||
(ch == QLatin1Char('n') && mText.midRef(mPos, 5) == QLatin1String("news:")) ||
(ch == QLatin1Char('t') && mText.midRef(mPos, 4) == QLatin1String("tel:"));
}
bool KTextToHTMLHelper::isEmptyUrl(const QString &url) const
{
return url.isEmpty() ||
url == QLatin1String("http://") ||
url == QLatin1String("https://") ||
url == QLatin1String("fish://") ||
url == QLatin1String("ftp://") ||
url == QLatin1String("ftps://") ||
url == QLatin1String("sftp://") ||
url == QLatin1String("smb://") ||
url == QLatin1String("vnc://") ||
url == QLatin1String("mailto") ||
url == QLatin1String("www") ||
url == QLatin1String("ftp") ||
url == QLatin1String("news") ||
url == QLatin1String("news://") ||
url == QLatin1String("tel") ||
url == QLatin1String("tel:");
}
QString KTextToHTMLHelper::getUrl(bool *badurl)
{
QString url;
if (atUrl()) {
// NOTE: see http://tools.ietf.org/html/rfc3986#appendix-A and especially appendix-C
// Appendix-C mainly says, that when extracting URLs from plain text, line breaks shall
// be allowed and should be ignored when the URI is extracted.
// This implementation follows this recommendation and
// allows the URL to be enclosed within different kind of brackets/quotes
// If an URL is enclosed, whitespace characters are allowed and removed, otherwise
// the URL ends with the first whitespace
// Also, if the URL is enclosed in brackets, the URL itself is not allowed
// to contain the closing bracket, as this would be detected as the end of the URL
QChar beforeUrl, afterUrl;
// detect if the url has been surrounded by brackets or quotes
if (mPos > 0) {
beforeUrl = mText[mPos - 1];
/*if ( beforeUrl == '(' ) {
afterUrl = ')';
} else */if (beforeUrl == QLatin1Char('[')) {
afterUrl = QLatin1Char(']');
} else if (beforeUrl == QLatin1Char('<')) {
afterUrl = QLatin1Char('>');
} else if (beforeUrl == QLatin1Char('>')) { // for e.g. http://.....
afterUrl = QLatin1Char('<');
} else if (beforeUrl == QLatin1Char('"')) {
afterUrl = QLatin1Char('"');
}
}
url.reserve(mMaxUrlLen); // avoid allocs
int start = mPos;
bool previousCharIsSpace = false;
bool previousCharIsADoubleQuote = false;
bool previousIsAnAnchor = false;
while ((mPos < mText.length()) &&
(mText[mPos].isPrint() || mText[mPos].isSpace()) &&
((afterUrl.isNull() && !mText[mPos].isSpace()) ||
(!afterUrl.isNull() && mText[mPos] != afterUrl))) {
if (!previousCharIsSpace && (mText[mPos] == QLatin1Char('<')) && ((mPos + 1) < mText.length())) {
// Fix Bug #346132: allow "http://www.foo.bar"
// < inside a URL is not allowed, however there is a test which
// checks that "http://some/path" should be allowed
// Therefore: check if what follows is another URL and if so, stop here
mPos++;
if (atUrl()) {
mPos--;
break;
}
mPos--;
}
if (!previousCharIsSpace && (mText[mPos] == QLatin1Char(' ')) && ((mPos + 1) < mText.length())) {
// Fix kmail bug: allow "http://www.foo.bar http://foo.bar/"
// Therefore: check if what follows is another URL and if so, stop here
mPos++;
if (atUrl()) {
mPos--;
break;
}
mPos--;
}
if (mText[mPos].isSpace()) {
previousCharIsSpace = true;
} else if (!previousIsAnAnchor && mText[mPos] == QLatin1Char('[')) {
break;
} else if (!previousIsAnAnchor && mText[mPos] == QLatin1Char(']')) {
break;
} else { // skip whitespace
if (previousCharIsSpace && mText[mPos] == QLatin1Char('<')) {
url.append(QLatin1Char(' '));
break;
}
previousCharIsSpace = false;
if (mText[mPos] == QLatin1Char('>') && previousCharIsADoubleQuote) {
//it's an invalid url
if (badurl) {
*badurl = true;
}
return QString();
}
if (mText[mPos] == QLatin1Char('"')) {
previousCharIsADoubleQuote = true;
} else {
previousCharIsADoubleQuote = false;
}
if (mText[mPos] == QLatin1Char('#')) {
previousIsAnAnchor = true;
}
url.append(mText[mPos]);
if (url.length() > mMaxUrlLen) {
break;
}
}
++mPos;
}
if (isEmptyUrl(url) || (url.length() > mMaxUrlLen)) {
mPos = start;
url.clear();
return url;
} else {
--mPos;
}
}
// HACK: This is actually against the RFC. However, most people don't properly escape the URL in
// their text with "" or <>. That leads to people writing an url, followed immediately by
// a dot to finish the sentence. That would lead the parser to include the dot in the url,
// even though that is not wanted. So work around that here.
// Most real-life URLs hopefully don't end with dots or commas.
static const QString wordBoundaries = QStringLiteral(".,:!?)>");
if (url.length() > 1) {
do {
if (wordBoundaries.contains(url.at(url.length() - 1))) {
url.chop(1);
--mPos;
} else {
break;
}
} while (url.length() > 1);
}
return url;
}
QString KTextToHTMLHelper::highlightedText()
{
// formating symbols must be prepended with a whitespace
if ((mPos > 0) && !mText[mPos - 1].isSpace()) {
return QString();
}
const QChar ch = mText[mPos];
if (ch != QLatin1Char('/') && ch != QLatin1Char('*') && ch != QLatin1Char('_') && ch != QLatin1Char('-')) {
return QString();
}
- QRegularExpression re(QStringLiteral("\\%1([^\\s].*[^\\s])\\%2").arg(ch).arg(ch));
+ QRegularExpression re(QStringLiteral("\\%1([^\\s].*[^\\s])\\%1").arg(ch));
re.setPatternOptions(QRegularExpression::InvertedGreedinessOption);
const auto match = re.match(mText, mPos, QRegularExpression::NormalMatch, QRegularExpression::AnchoredMatchOption);
if (match.hasMatch()) {
if (match.capturedStart() == mPos) {
int length = match.capturedLength();
// there must be a whitespace after the closing formating symbol
if (mPos + length < mText.length() && !mText[mPos + length].isSpace()) {
return QString();
}
mPos += length - 1;
switch (ch.toLatin1()) {
case '*':
return QLatin1String("*") + match.capturedRef(1) + QLatin1String("*");
case '_':
return QLatin1String("_") + match.capturedRef(1) + QLatin1String("_");
case '/':
return QLatin1String("/") + match.capturedRef(1) + QLatin1String("/");
case '-':
return QLatin1String("-") + match.capturedRef(1) + QLatin1String("-");
}
}
}
return QString();
}
QString KTextToHTMLHelper::pngToDataUrl(const QString &iconPath) const
{
if (iconPath.isEmpty()) {
return QString();
}
QFile pngFile(iconPath);
if (!pngFile.open(QIODevice::ReadOnly | QIODevice::Unbuffered)) {
return QString();
}
QByteArray ba = pngFile.readAll();
pngFile.close();
return QStringLiteral("data:image/png;base64,%1").arg(QLatin1String(ba.toBase64().constData()));
}
QString KTextToHTML::convertToHtml(const QString &plainText, const KTextToHTML::Options &flags, int maxUrlLen, int maxAddressLen)
{
KTextToHTMLHelper helper(plainText, maxUrlLen, maxAddressLen);
QString str;
QString result(static_cast(nullptr), helper.mText.length() * 2);
QChar ch;
int x;
bool startOfLine = true;
for (helper.mPos = 0, x = 0; helper.mPos < helper.mText.length();
++helper.mPos, ++x) {
ch = helper.mText[helper.mPos];
if (flags & PreserveSpaces) {
if (ch == QLatin1Char(' ')) {
if (helper.mPos + 1 < helper.mText.length()) {
if (helper.mText[helper.mPos + 1] != QLatin1Char(' ')) {
// A single space, make it breaking if not at the start or end of the line
const bool endOfLine = helper.mText[helper.mPos + 1] == QLatin1Char('\n');
if (!startOfLine && !endOfLine) {
result += QLatin1Char(' ');
} else {
result += QLatin1String(" ");
}
} else {
// Whitespace of more than one space, make it all non-breaking
while (helper.mPos < helper.mText.length() && helper.mText[helper.mPos] == QLatin1Char(' ')) {
result += QLatin1String(" ");
++helper.mPos;
++x;
}
// We incremented once to often, undo that
--helper.mPos;
--x;
}
} else {
// Last space in the text, it is non-breaking
result += QLatin1String(" ");
}
if (startOfLine) {
startOfLine = false;
}
continue;
} else if (ch == QLatin1Char('\t')) {
do {
result += QLatin1String(" ");
++x;
} while ((x & 7) != 0);
--x;
startOfLine = false;
continue;
}
}
if (ch == QLatin1Char('\n')) {
result += QLatin1String(" \n"); // Keep the \n, so apps can figure out the quoting levels correctly.
startOfLine = true;
x = -1;
continue;
}
startOfLine = false;
if (ch == QLatin1Char('&')) {
result += QLatin1String("&");
} else if (ch == QLatin1Char('"')) {
result += QLatin1String(""");
} else if (ch == QLatin1Char('<')) {
result += QLatin1String("<");
} else if (ch == QLatin1Char('>')) {
result += QLatin1String(">");
} else {
const int start = helper.mPos;
if (!(flags & IgnoreUrls)) {
bool badUrl = false;
str = helper.getUrl(&badUrl);
if (badUrl) {
QString resultBadUrl;
const int helperTextSize(helper.mText.count());
for (int i = 0; i < helperTextSize; ++i) {
const QChar chBadUrl = helper.mText[i];
if (chBadUrl == QLatin1Char('&')) {
resultBadUrl += QLatin1String("&");
} else if (chBadUrl == QLatin1Char('"')) {
resultBadUrl += QLatin1String(""");
} else if (chBadUrl == QLatin1Char('<')) {
resultBadUrl += QLatin1String("<");
} else if (chBadUrl == QLatin1Char('>')) {
resultBadUrl += QLatin1String(">");
} else {
resultBadUrl += chBadUrl;
}
}
return resultBadUrl;
}
if (!str.isEmpty()) {
QString hyperlink;
if (str.left(4) == QLatin1String("www.")) {
hyperlink = QLatin1String("http://") + str;
} else if (str.left(4) == QLatin1String("ftp.")) {
hyperlink = QLatin1String("ftp://") + str;
} else {
hyperlink = str;
}
result += QLatin1String("") + str.toHtmlEscaped() + QLatin1String("");
x += helper.mPos - start;
continue;
}
str = helper.getEmailAddress();
if (!str.isEmpty()) {
// len is the length of the local part
int len = str.indexOf(QLatin1Char('@'));
QString localPart = str.left(len);
// remove the local part from the result (as '&'s have been expanded to
// & we have to take care of the 4 additional characters per '&')
result.truncate(result.length() -
len - (localPart.count(QLatin1Char('&')) * 4));
x -= len;
result += QLatin1String("") + str + QLatin1String("");
x += str.length() - 1;
continue;
}
if (flags & ConvertPhoneNumbers) {
str = helper.getPhoneNumber();
if (!str.isEmpty()) {
result += QLatin1String("") + str + QLatin1String("");
x += str.length() - 1;
continue;
}
}
}
if (flags & HighlightText) {
str = helper.highlightedText();
if (!str.isEmpty()) {
result += str;
x += helper.mPos - start;
continue;
}
}
result += ch;
}
}
if (flags & ReplaceSmileys) {
const QStringList exclude = { QStringLiteral("(c)"), QStringLiteral("(C)"), QStringLiteral(">:-("), QStringLiteral(">:("), QStringLiteral("(B)"), QStringLiteral("(b)"), QStringLiteral("(P)"), QStringLiteral("(p)")
, QStringLiteral("(O)"), QStringLiteral("(o)"), QStringLiteral("(D)"), QStringLiteral("(d)"), QStringLiteral("(E)"), QStringLiteral("(e)"), QStringLiteral("(K)"), QStringLiteral("(k)")
, QStringLiteral("(I)"), QStringLiteral("(i)"), QStringLiteral("(L)"), QStringLiteral("(l)"), QStringLiteral("(8)"), QStringLiteral("(T)"), QStringLiteral("(t)"), QStringLiteral("(G)")
, QStringLiteral("(g)"), QStringLiteral("(F)"), QStringLiteral("(f)"), QStringLiteral("(H)")
, QStringLiteral("8)"), QStringLiteral("(N)"), QStringLiteral("(n)"), QStringLiteral("(Y)"), QStringLiteral("(y)"), QStringLiteral("(U)"), QStringLiteral("(u)"), QStringLiteral("(W)"), QStringLiteral("(w)")
, QStringLiteral("(6)")};
result = helper.emoticonsInterface()->parseEmoticons(result, true, exclude);
}
return result;
}