Changeset View
Changeset View
Standalone View
Standalone View
src/lib/text/ktexttohtml.cpp
Show All 20 Lines | |||||
21 | #include "ktexttohtml.h" | 21 | #include "ktexttohtml.h" | ||
22 | #include "ktexttohtml_p.h" | 22 | #include "ktexttohtml_p.h" | ||
23 | #include "ktexttohtmlemoticonsinterface.h" | 23 | #include "ktexttohtmlemoticonsinterface.h" | ||
24 | 24 | | |||
25 | #include <QString> | 25 | #include <QString> | ||
26 | #include <QStringList> | 26 | #include <QStringList> | ||
27 | #include <QFile> | 27 | #include <QFile> | ||
28 | #include <QRegExp> | 28 | #include <QRegExp> | ||
29 | #include <QRegularExpression> | ||||
29 | #include <QPluginLoader> | 30 | #include <QPluginLoader> | ||
30 | #include <QVariant> | 31 | #include <QVariant> | ||
31 | #include <QCoreApplication> | 32 | #include <QCoreApplication> | ||
32 | 33 | | |||
33 | #include <limits.h> | 34 | #include <limits.h> | ||
34 | 35 | | |||
35 | #include "kcoreaddons_debug.h" | 36 | #include "kcoreaddons_debug.h" | ||
36 | 37 | | |||
▲ Show 20 Lines • Show All 101 Lines • ▼ Show 20 Line(s) | 85 | if (mText[mPos] == QLatin1Char('@')) { | |||
138 | } | 139 | } | ||
139 | address = mText.mid(start, end - start); | 140 | address = mText.mid(start, end - start); | ||
140 | 141 | | |||
141 | mPos = end - 1; | 142 | mPos = end - 1; | ||
142 | } | 143 | } | ||
143 | return address; | 144 | return address; | ||
144 | } | 145 | } | ||
145 | 146 | | |||
147 | QString KTextToHTMLHelper::getPhoneNumber() | ||||
148 | { | ||||
149 | if (!mText[mPos].isDigit() && mText[mPos] != QLatin1Char('+')) { | ||||
150 | return {}; | ||||
151 | } | ||||
152 | | ||||
153 | static const QString allowedBeginSeparators = QStringLiteral(" \r\t\n:"); | ||||
154 | if (mPos > 0 && !allowedBeginSeparators.contains(mText[mPos - 1])) { | ||||
155 | return {}; | ||||
156 | } | ||||
157 | | ||||
158 | // this isn't 100% accurate, we filter stuff below that is too hard to capture with a regexp | ||||
159 | static const QRegularExpression telPattern(QStringLiteral(R"([+0](( |( ?[/-] ?)?)\(?\d+\)?+){6,30})")); | ||||
svuorela: How are the thread safety of the KTextToHTML functions?
And does that match the thread safety… | |||||
Valid point. This isn't specified in the documentation, things like static bool triedLoadPlugin in the existing code suggest that this isn't thread-safe right now though. vkrause: Valid point. This isn't specified in the documentation, things like `static bool… | |||||
160 | const auto match = telPattern.match(mText, mPos, QRegularExpression::NormalMatch, QRegularExpression::AnchoredMatchOption); | ||||
161 | if (match.hasMatch()) { | ||||
162 | auto m = match.captured(); | ||||
163 | // check for maximum number of digits (15), see https://en.wikipedia.org/wiki/Telephone_numbering_plan | ||||
164 | if (std::count_if(m.begin(), m.end(), [](const QChar &c) { return c.isDigit(); }) > 15) { | ||||
165 | return {}; | ||||
166 | } | ||||
167 | // only one / is allowed, otherwise we trigger on dates | ||||
168 | if (std::count(m.begin(), m.end(), QLatin1Char('/')) > 1) { | ||||
169 | return {}; | ||||
170 | } | ||||
171 | | ||||
172 | // parenthesis need to be balanced, and must not be nested | ||||
173 | int openIdx = -1; | ||||
174 | for (int i = 0; i < m.size(); ++i) { | ||||
175 | if ((m[i] == QLatin1Char('(') && openIdx >= 0) || (m[i] == QLatin1Char(')') && openIdx < 0)) { | ||||
176 | return {}; | ||||
177 | } | ||||
178 | if (m[i] == QLatin1Char('(')) { | ||||
179 | openIdx = i; | ||||
180 | } else if (m[i] == QLatin1Char(')')) { | ||||
181 | openIdx = -1; | ||||
182 | } | ||||
183 | } | ||||
184 | if (openIdx > 0) { | ||||
185 | m = m.left(openIdx - 1).trimmed(); | ||||
186 | } | ||||
187 | | ||||
188 | // check if there's a plausible separator at the end | ||||
189 | static const QString allowedEndSeparators = QStringLiteral(" \r\t\n,."); | ||||
190 | const auto l = m.size(); | ||||
191 | if (mText.size() > mPos + l && !allowedEndSeparators.contains(mText[mPos + l])) { | ||||
192 | return {}; | ||||
193 | } | ||||
194 | | ||||
195 | mPos += l - 1; | ||||
196 | return m; | ||||
197 | } | ||||
198 | return {}; | ||||
199 | } | ||||
200 | | ||||
201 | static QString normalizePhoneNumber(const QString &str) | ||||
202 | { | ||||
203 | QString res; | ||||
204 | res.reserve(str.size()); | ||||
205 | for (const auto c : str) { | ||||
206 | if (c.isDigit() || c == QLatin1Char('+')) { | ||||
207 | res.push_back(c); | ||||
208 | } | ||||
209 | } | ||||
210 | return res; | ||||
211 | } | ||||
212 | | ||||
146 | bool KTextToHTMLHelper::atUrl() const | 213 | bool KTextToHTMLHelper::atUrl() const | ||
147 | { | 214 | { | ||
148 | // the following characters are allowed in a dot-atom (RFC 2822): | 215 | // the following characters are allowed in a dot-atom (RFC 2822): | ||
149 | // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~ | 216 | // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~ | ||
150 | static const QString allowedSpecialChars = QStringLiteral(".!#$%&'*+-/=?^_`{|}~"); | 217 | static const QString allowedSpecialChars = QStringLiteral(".!#$%&'*+-/=?^_`{|}~"); | ||
151 | 218 | | |||
152 | // the character directly before the URL must not be a letter, a number or | 219 | // the character directly before the URL must not be a letter, a number or | ||
153 | // any other character allowed in a dot-atom (RFC 2822). | 220 | // any other character allowed in a dot-atom (RFC 2822). | ||
Show All 11 Lines | 231 | (ch == QLatin1Char('f') && (mText.midRef(mPos, 7) == QLatin1String("fish://") || | |||
165 | mText.midRef(mPos, 6) == QLatin1String("ftp://") || | 232 | mText.midRef(mPos, 6) == QLatin1String("ftp://") || | ||
166 | mText.midRef(mPos, 7) == QLatin1String("ftps://"))) || | 233 | mText.midRef(mPos, 7) == QLatin1String("ftps://"))) || | ||
167 | (ch == QLatin1Char('s') && (mText.midRef(mPos, 7) == QLatin1String("sftp://") || | 234 | (ch == QLatin1Char('s') && (mText.midRef(mPos, 7) == QLatin1String("sftp://") || | ||
168 | mText.midRef(mPos, 6) == QLatin1String("smb://"))) || | 235 | mText.midRef(mPos, 6) == QLatin1String("smb://"))) || | ||
169 | (ch == QLatin1Char('m') && mText.midRef(mPos, 7) == QLatin1String("mailto:")) || | 236 | (ch == QLatin1Char('m') && mText.midRef(mPos, 7) == QLatin1String("mailto:")) || | ||
170 | (ch == QLatin1Char('w') && mText.midRef(mPos, 4) == QLatin1String("www.")) || | 237 | (ch == QLatin1Char('w') && mText.midRef(mPos, 4) == QLatin1String("www.")) || | ||
171 | (ch == QLatin1Char('f') && (mText.midRef(mPos, 4) == QLatin1String("ftp.") || | 238 | (ch == QLatin1Char('f') && (mText.midRef(mPos, 4) == QLatin1String("ftp.") || | ||
172 | mText.midRef(mPos, 7) == QLatin1String("file://"))) || | 239 | mText.midRef(mPos, 7) == QLatin1String("file://"))) || | ||
173 | (ch == QLatin1Char('n') && mText.midRef(mPos, 5) == QLatin1String("news:")); | 240 | (ch == QLatin1Char('n') && mText.midRef(mPos, 5) == QLatin1String("news:")) || | ||
241 | (ch == QLatin1Char('t') && mText.midRef(mPos, 4) == QLatin1String("tel:")); | ||||
242 | | ||||
174 | } | 243 | } | ||
175 | 244 | | |||
176 | bool KTextToHTMLHelper::isEmptyUrl(const QString &url) const | 245 | bool KTextToHTMLHelper::isEmptyUrl(const QString &url) const | ||
177 | { | 246 | { | ||
178 | return url.isEmpty() || | 247 | return url.isEmpty() || | ||
179 | url == QLatin1String("http://") || | 248 | url == QLatin1String("http://") || | ||
180 | url == QLatin1String("https://") || | 249 | url == QLatin1String("https://") || | ||
181 | url == QLatin1String("fish://") || | 250 | url == QLatin1String("fish://") || | ||
182 | url == QLatin1String("ftp://") || | 251 | url == QLatin1String("ftp://") || | ||
183 | url == QLatin1String("ftps://") || | 252 | url == QLatin1String("ftps://") || | ||
184 | url == QLatin1String("sftp://") || | 253 | url == QLatin1String("sftp://") || | ||
185 | url == QLatin1String("smb://") || | 254 | url == QLatin1String("smb://") || | ||
186 | url == QLatin1String("vnc://") || | 255 | url == QLatin1String("vnc://") || | ||
187 | url == QLatin1String("mailto") || | 256 | url == QLatin1String("mailto") || | ||
188 | url == QLatin1String("www") || | 257 | url == QLatin1String("www") || | ||
189 | url == QLatin1String("ftp") || | 258 | url == QLatin1String("ftp") || | ||
190 | url == QLatin1String("news") || | 259 | url == QLatin1String("news") || | ||
191 | url == QLatin1String("news://"); | 260 | url == QLatin1String("news://") || | ||
261 | url == QLatin1String("tel") || | ||||
262 | url == QLatin1String("tel:"); | ||||
192 | } | 263 | } | ||
193 | 264 | | |||
194 | QString KTextToHTMLHelper::getUrl(bool *badurl) | 265 | QString KTextToHTMLHelper::getUrl(bool *badurl) | ||
195 | { | 266 | { | ||
196 | QString url; | 267 | QString url; | ||
197 | if (atUrl()) { | 268 | if (atUrl()) { | ||
198 | // NOTE: see http://tools.ietf.org/html/rfc3986#appendix-A and especially appendix-C | 269 | // NOTE: see http://tools.ietf.org/html/rfc3986#appendix-A and especially appendix-C | ||
199 | // Appendix-C mainly says, that when extracting URLs from plain text, line breaks shall | 270 | // Appendix-C mainly says, that when extracting URLs from plain text, line breaks shall | ||
▲ Show 20 Lines • Show All 294 Lines • ▼ Show 20 Line(s) | 558 | if (!str.isEmpty()) { | |||
494 | result.truncate(result.length() - | 565 | result.truncate(result.length() - | ||
495 | len - (localPart.count(QLatin1Char('&')) * 4)); | 566 | len - (localPart.count(QLatin1Char('&')) * 4)); | ||
496 | x -= len; | 567 | x -= len; | ||
497 | 568 | | |||
498 | result += QLatin1String("<a href=\"mailto:") + str + QLatin1String("\">") + str + QLatin1String("</a>"); | 569 | result += QLatin1String("<a href=\"mailto:") + str + QLatin1String("\">") + str + QLatin1String("</a>"); | ||
499 | x += str.length() - 1; | 570 | x += str.length() - 1; | ||
500 | continue; | 571 | continue; | ||
501 | } | 572 | } | ||
573 | if (flags & ConvertPhoneNumbers) { | ||||
574 | str = helper.getPhoneNumber(); | ||||
575 | if (!str.isEmpty()) { | ||||
576 | result += QLatin1String("<a href=\"tel:") + normalizePhoneNumber(str) + QLatin1String("\">") + str + QLatin1String("</a>"); | ||||
577 | x += str.length() - 1; | ||||
578 | continue; | ||||
579 | } | ||||
580 | } | ||||
502 | } | 581 | } | ||
503 | if (flags & HighlightText) { | 582 | if (flags & HighlightText) { | ||
504 | str = helper.highlightedText(); | 583 | str = helper.highlightedText(); | ||
505 | if (!str.isEmpty()) { | 584 | if (!str.isEmpty()) { | ||
506 | result += str; | 585 | result += str; | ||
507 | x += helper.mPos - start; | 586 | x += helper.mPos - start; | ||
508 | continue; | 587 | continue; | ||
509 | } | 588 | } | ||
Show All 18 Lines |
How are the thread safety of the KTextToHTML functions?
And does that match the thread safety of the QRegularExpression and the const methods in there?