diff --git a/webenginepart/src/webengineparthtmlembedder.cpp b/webenginepart/src/webengineparthtmlembedder.cpp index b93b1f0ab..739c918fe 100644 --- a/webenginepart/src/webengineparthtmlembedder.cpp +++ b/webenginepart/src/webengineparthtmlembedder.cpp @@ -1,120 +1,124 @@ /* * This file is part of the KDE project * Copyright (C) 2018 Stefano Crocco * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of * the License or (at your option) version 3 or any later version * accepted by the membership of KDE e.V. (or its successor approved * by the membership of KDE e.V.), which shall act as a proxy * defined in Section 14 of version 3 of the license. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public License * along with this library; see the file COPYING.LIB. If not, write to * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, * Boston, MA 02110-1301, USA. * */ #include "webengineparthtmlembedder.h" #include #include #include #include #include #include #include #include #include static const char s_extractUrlsJs[] = "extractUrlsForTag = function(name, attr){\n" " var elems = document.getElementsByTagName(name);\n" " var urls = [];\n" " for(var i = 0; i < elems.length; i++){\n" " var url = elems[i].getAttribute(attr);\n" " if(url.length > 0) urls.push(url);\n" " }\n" " return urls;\n" "};\n" "extractUrlsForTag(\"link\", \"href\").concat(extractUrlsForTag(\"img\", \"src\"));"; static const char s_replaceUrlsJs[] = "urlMap = %1;\n" "replaceUrlsForTag = function(name, attr){\n" " var elems = document.getElementsByTagName(name);\n" " var urls = [];\n" " for(var i = 0; i < elems.length; i++){\n" " var url = elems[i].getAttribute(attr);\n" " var repl = urlMap[url];\n" " if(repl) elems[i].setAttribute(attr, repl);\n" " }\n" "}\n" "replaceUrlsForTag(\"link\", \"href\");\n" "replaceUrlsForTag(\"map\", \"src\");"; WebEnginePartHtmlEmbedder::WebEnginePartHtmlEmbedder(QObject* parent) : QObject(parent), m_profile(new QWebEngineProfile(this)), m_page(new QWebEnginePage(m_profile, this)) { - connect(m_page, &QWebEnginePage::loadFinished, this, &WebEnginePartHtmlEmbedder::startExtractingUrls); connect(this, &WebEnginePartHtmlEmbedder::urlsExtracted, this, &WebEnginePartHtmlEmbedder::startReplacingUrls); connect(this, &WebEnginePartHtmlEmbedder::urlsReplaced, this, &WebEnginePartHtmlEmbedder::startRetrievingHtml); } void WebEnginePartHtmlEmbedder::startEmbedding(const QByteArray& html, const QString& mimeType) { + //Try avoiding problems with redirection (see documentation for this class) + connect(m_page, &QWebEnginePage::loadFinished, this, &WebEnginePartHtmlEmbedder::startExtractingUrls); m_page->setContent(html, mimeType, QUrl::fromLocalFile("/")); } void WebEnginePartHtmlEmbedder::startExtractingUrls() { + //Try avoiding problems with redirection (see documentation for this class) + disconnect(m_page, &QWebEnginePage::loadFinished, this, &WebEnginePartHtmlEmbedder::startExtractingUrls); auto lambda = [this](const QVariant &res){emit urlsExtracted(res.toStringList());}; m_page->runJavaScript(s_extractUrlsJs, lambda); } void WebEnginePartHtmlEmbedder::startReplacingUrls(const QStringList& urls) { QStringList uniqueUrls(urls); uniqueUrls.removeDuplicates(); QHash map; foreach(const QString &url, uniqueUrls){ QUrl u(url); QString data = dataUrl(u); if (!data.isEmpty()) { map[url] = QVariant(data); } } QJsonDocument doc = QJsonDocument::fromVariant(QVariant::fromValue(map)); QString js = QString(s_replaceUrlsJs).arg(QString(doc.toJson())); m_page->runJavaScript(js, [this](const QVariant &){emit urlsReplaced();}); } void WebEnginePartHtmlEmbedder::startRetrievingHtml() { - m_page->toHtml([this](const QString &html){emit finished(html);}); + auto callback = [this](const QString &html){emit finished(html);}; + m_page->toHtml(callback); } QString WebEnginePartHtmlEmbedder::dataUrl(const QUrl& url) const { if (url.scheme() != "file") { return QString(); } QString path = url.toLocalFile(); if (QFileInfo(path).isRelative()) { return QString(); } QFile file(path); if (!file.open(QIODevice::ReadOnly)) { return QString(); } QByteArray content = file.readAll().toBase64(); return "data:" + QMimeDatabase().mimeTypeForFile(path).name()+";charset=UTF-8;base64," + content; } diff --git a/webenginepart/src/webengineparthtmlembedder.h b/webenginepart/src/webengineparthtmlembedder.h index 3a3ac15c7..a607d08d6 100644 --- a/webenginepart/src/webengineparthtmlembedder.h +++ b/webenginepart/src/webengineparthtmlembedder.h @@ -1,176 +1,182 @@ /* * This file is part of the KDE project * Copyright (C) 2018 Stefano Crocco * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of * the License or (at your option) version 3 or any later version * accepted by the membership of KDE e.V. (or its successor approved * by the membership of KDE e.V.), which shall act as a proxy * defined in Section 14 of version 3 of the license. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public License * along with this library; see the file COPYING.LIB. If not, write to * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, * Boston, MA 02110-1301, USA. * */ #ifndef WEBENGINEPARTHTMLEMBEDDER_H #define WEBENGINEPARTHTMLEMBEDDER_H #include "utils.h" #include class QWebEnginePage; class QWebEngineProfile; /** * @brief Class which embeds content from local files referenced in (X)HTML code inside the * page itself using `data` URLs * * This class works asynchronously: startEmbedding() starts the embedding process and the finished() * signal is emitted when the embedding has finished with the resulting (X)HTML code as argument. * * @internal * * This class parses the HTML generated by KIO using `QWebEnginePage`, then replaces URLs pointing * to local files with their content embedded as `data` URLs. Currently, this is only done for * `link` (stylesheet) and `img` elements. * * The main complications with this approach is that the to query the `QWebEnginePage` for * the HTML elements it contains one must use javascript and that the related functions * are all asynchronous. For this reason, we must rely on signals to tell when each step is done: * * `QWebEnginePage::setHtml` -> `QWebEnginePage::loadFinished` -> startExtractingUrls() -> urlsExtracted() -> * startReplacingUrls() -> urlsReplaced() -> startRetrievingHtml() -> htmlRetrieved() -> sendReply() + * + * A problem can arise if the (X)HTML code contains a `` element with attribute `http-equiv="refresh"`. + * Since QWebEnginePage is not just a parser, it honours this refresh request and, after emitting the `loadFinished` + * signal, it refreshes the page, which means that `loadFinished` is emitted again and again. To avoid + * doing the embeddding every time, the `loadFinished` signal is disconnected after the first time it's + * emitted and the connection is remade the next time startEmbedding() is called. * * @note This class uses and internal `QWebEnginePage` and an associated, off the record, `QWebEngineProfile` * so as not interfere with the rest of the part */ class WebEnginePartHtmlEmbedder : public QObject { Q_OBJECT public: /** * Constructor * * @param parent the parent object */ WebEnginePartHtmlEmbedder(QObject* parent = Q_NULLPTR); /** * @brief Destructor */ ~WebEnginePartHtmlEmbedder(){} /** * @brief Starts the embedding process asynchronously * * The finished() signal is emitted when the HTML with embedded URLs is ready * * @param html: the HTML code * @param mimeType: the mime type (to distinguish between HTML and XHTML) */ void startEmbedding(const QByteArray &html, const QString &mimeType); signals: /** * @brief Signal emitted when the javascript code which extracts `link` and `img` URLs finishes running * * @param urls all the urls in `img` and `link` elements in the HTML code. The list contains URLs with any scheme and may contain duplicates */ void urlsExtracted(const QStringList &urls); /** * @brief Signal emitted when the javascript code which replaces `file` URLs with `data` URLS in the HTML code finishes */ void urlsReplaced(); /** * @brief Signal emitted when the HTML code with the embedded data is ready * * Users of this class should connect to this signal to retrieve the HTML code. * * @param html the HTML code of the page */ void finished(const QString &html); private slots: /** * @brief Extracts the Urls for `link` and `img` elements * * This function is asynchronous, as it uses `QWebEnginePage::runJavascript`. urlsExtracted() will be emitted when the javascript code has run. * * This function is called in response to the `loadFinished` signal of the internal `QWebEnginePage` */ void startExtractingUrls(); /** * @brief Replaces the `file` URLs in the list with `data` URLs * * This function is asynchronous, as it uses `QWebEnginePage::runJavascript`. urlsReplaced() will be emitted when the javascript code has run. * * This function is called in response to the urlsExtracted() signal * * @param urls the URLs to replace. Only the appropriate ones (see dataUrl()) will be replaced. */ void startReplacingUrls(const QStringList &urls); /** * @brief Calls the `QWebEnginePage::toHtml` on the internal page * * This function is asynchronous. htmlRetrieved() will be emitted when the HTML is ready * * This function is called in response to the urlsReplaced() signal. */ void startRetrievingHtml(); private: /** * @brief Converts an url to a `data` URL embeddding its contents * * The conversion is performed only for URLs satisfying the following conditions: * * * its scheme is `file` * * its filename is absolute * * the corresponding file can be opened in read mode. * * If one of these conditions is not satisfied, an empty URL is returned * * @param url the url to convert * @return the converted URL or an empty string if the URL can't be converted according to the rules above */ QString dataUrl(const QUrl &url) const; /** * @brief The `QWebEngineProfile` used by the page which parses the HTML code * * @internal * The default profile can't be used because of possible interferences (for example, it can cause problems if * this class is used by an URL scheme handler whose output is HTML containing its own URL scheme; this is the * case of the `help` scheme, for example) */ QWebEngineProfile *m_profile; /** * @brief The `QWebEnginePage` used to parse the HTML code produced by kioslaves * */ QWebEnginePage *m_page; }; #endif // WEBENGINEPARTHTMLEMBEDDER_H diff --git a/webenginepart/src/webenginepartkiohandler.cpp b/webenginepart/src/webenginepartkiohandler.cpp index c17607f9d..10b215312 100644 --- a/webenginepart/src/webenginepartkiohandler.cpp +++ b/webenginepart/src/webenginepartkiohandler.cpp @@ -1,114 +1,113 @@ /* * This file is part of the KDE project * Copyright (C) 2018 Stefano Crocco * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of * the License or (at your option) version 3 or any later version * accepted by the membership of KDE e.V. (or its successor approved * by the membership of KDE e.V.), which shall act as a proxy * defined in Section 14 of version 3 of the license. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public License * along with this library; see the file COPYING.LIB. If not, write to * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, * Boston, MA 02110-1301, USA. * */ #include "webenginepartkiohandler.h" #include "webengineparthtmlembedder.h" #include #include #include #include -#include #include void WebEnginePartKIOHandler::requestStarted(QWebEngineUrlRequestJob *req) { m_queuedRequests << RequestJobPointer(req); processNextRequest(); } WebEnginePartKIOHandler::WebEnginePartKIOHandler(QObject* parent): QWebEngineUrlSchemeHandler(parent), m_embedder(Q_NULLPTR) { connect(this, &WebEnginePartKIOHandler::ready, this, &WebEnginePartKIOHandler::sendReply); } void WebEnginePartKIOHandler::sendReply() { if (m_currentRequest) { if (isSuccessful()) { QBuffer *buf = new QBuffer; buf->open(QBuffer::ReadWrite); buf->write(m_data); buf->seek(0); connect(buf, &QIODevice::aboutToClose, buf, &QObject::deleteLater); m_currentRequest->reply(m_mimeType.name().toUtf8(), buf); } else { m_currentRequest->fail(QWebEngineUrlRequestJob::UrlInvalid); } m_currentRequest.clear(); } processNextRequest(); } void WebEnginePartKIOHandler::processNextRequest() { if (m_currentRequest) { return; } while (!m_currentRequest && !m_queuedRequests.isEmpty()) { m_currentRequest = m_queuedRequests.takeFirst(); } if (!m_currentRequest) { return; } KIO::StoredTransferJob *job = KIO::storedGet(m_currentRequest ->requestUrl(), KIO::NoReload, KIO::HideProgressInfo); connect(job, &KIO::StoredTransferJob::result, this, [this, job](){kioJobFinished(job);}); } void WebEnginePartKIOHandler::embedderFinished(const QString& html) { m_data = html.toUtf8(); emit ready(); } void WebEnginePartKIOHandler::processSlaveOutput() { if (m_mimeType.inherits("text/html") || m_mimeType.inherits("application/xhtml+xml")) { htmlEmbedder()->startEmbedding(m_data, m_mimeType.name()); } else { emit ready(); } } void WebEnginePartKIOHandler::kioJobFinished(KIO::StoredTransferJob* job) { m_error = job->error() == 0 ? QWebEngineUrlRequestJob::NoError : QWebEngineUrlRequestJob::RequestFailed; m_errorMessage = isSuccessful() ? job->errorString() : QString(); m_data = job->data(); m_mimeType = QMimeDatabase().mimeTypeForData(m_data); processSlaveOutput(); } WebEnginePartHtmlEmbedder * WebEnginePartKIOHandler::htmlEmbedder() { if (!m_embedder) { m_embedder = new WebEnginePartHtmlEmbedder(this); connect(htmlEmbedder(), &WebEnginePartHtmlEmbedder::finished, this, &WebEnginePartKIOHandler::embedderFinished); } return m_embedder; }