diff --git a/src/io/fileinfo.cpp b/src/io/fileinfo.cpp index 8666be6e..5f7a25b8 100644 --- a/src/io/fileinfo.cpp +++ b/src/io/fileinfo.cpp @@ -1,382 +1,370 @@ /*************************************************************************** - * Copyright (C) 2004-2018 by Thomas Fischer * + * Copyright (C) 2004-2019 by Thomas Fischer * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, see . * ***************************************************************************/ #include "fileinfo.h" #include #include #include #include #include #include #include -#include -#include - #include "kbibtex.h" #include "entry.h" #include "logging_io.h" FileInfo::FileInfo() { /// nothing } const QString FileInfo::mimetypeOctetStream = QStringLiteral("application/octet-stream"); const QString FileInfo::mimetypeHTML = QStringLiteral("text/html"); const QString FileInfo::mimetypeBibTeX = QStringLiteral("text/x-bibtex"); const QString FileInfo::mimetypeRIS = QStringLiteral("application/x-research-info-systems"); const QString FileInfo::mimetypePDF = QStringLiteral("application/pdf"); QMimeType FileInfo::mimeTypeForUrl(const QUrl &url) { if (!url.isValid() || url.isEmpty()) { qCWarning(LOG_KBIBTEX_IO) << "Cannot determine mime type for empty or invalid QUrl"; return QMimeType(); ///< invalid input gives invalid mime type } static const QMimeDatabase db; static const QMimeType mtHTML(db.mimeTypeForName(mimetypeHTML)); static const QMimeType mtOctetStream(db.mimeTypeForName(mimetypeOctetStream)); static const QMimeType mtBibTeX(db.mimeTypeForName(mimetypeBibTeX)); static const QMimeType mtPDF(db.mimeTypeForName(mimetypePDF)); static const QMimeType mtRIS(db.mimeTypeForName(mimetypeRIS)); /// Test if mime type for BibTeX is registered before determining file extension static const QString mimetypeBibTeXExt = mtBibTeX.preferredSuffix(); /// Test if mime type for RIS is registered before determining file extension static const QString mimetypeRISExt = mtRIS.preferredSuffix(); /// Test if mime type for PDF is registered before determining file extension static const QString mimetypePDFExt = mtPDF.preferredSuffix(); const QString extension = db.suffixForFileName(url.fileName()).toLower(); /// First, check preferred suffixes if (extension == mimetypeBibTeXExt) return mtBibTeX; else if (extension == mimetypeRISExt) return mtRIS; else if (extension == mimetypePDFExt) return mtPDF; /// Second, check any other suffixes else if (mtBibTeX.suffixes().contains(extension)) return mtBibTeX; else if (mtRIS.suffixes().contains(extension)) return mtRIS; else if (mtPDF.suffixes().contains(extension)) return mtPDF; /// Let the KDE subsystem guess the mime type QMimeType result = db.mimeTypeForUrl(url); /// Fall back to application/octet-stream if something goes wrong if (!result.isValid()) result = mtOctetStream; /// In case that KDE could not determine mime type, /// do some educated guesses on our own if (result.name() == mimetypeOctetStream) { if (url.scheme().startsWith(QStringLiteral("http"))) result = mtHTML; // TODO more tests? } return result; } void FileInfo::urlsInText(const QString &text, const TestExistence testExistence, const QString &baseDirectory, QSet &result) { if (text.isEmpty()) return; /// DOI identifiers have to extracted first as KBibTeX::fileListSeparatorRegExp /// contains characters that can be part of a DOI (e.g. ';') and thus could split /// a DOI in between. QString internalText = text; int pos = 0; QRegularExpressionMatch doiRegExpMatch; while ((doiRegExpMatch = KBibTeX::doiRegExp.match(internalText, pos)).hasMatch()) { pos = doiRegExpMatch.capturedStart(0); QString doiMatch = doiRegExpMatch.captured(0); const int semicolonHttpPos = doiMatch.indexOf(QStringLiteral(";http")); if (semicolonHttpPos > 0) doiMatch = doiMatch.left(semicolonHttpPos); - const QUrl url(doiUrlPrefix() + QString(doiMatch).remove(QStringLiteral("\\"))); + const QUrl url(KBibTeX::doiUrlPrefix + QString(doiMatch).remove(QStringLiteral("\\"))); if (url.isValid() && !result.contains(url)) result << url; /// remove match from internal text to avoid duplicates /// Cut away any URL that may be right before found DOI number: /// For example, if DOI '10.1000/38-abc' was found in /// 'Lore ipsum http://doi.example.org/10.1000/38-abc Lore ipsum' /// also remove 'http://doi.example.org/' from the text, keeping only /// 'Lore ipsum Lore ipsum' static const QRegularExpression genericDoiUrlPrefix(QStringLiteral("http[s]?://[a-z0-9./-]+/$")); ///< looks like an URL const QRegularExpressionMatch genericDoiUrlPrefixMatch = genericDoiUrlPrefix.match(internalText.left(pos)); if (genericDoiUrlPrefixMatch.hasMatch()) /// genericDoiUrlPrefixMatch.captured(0) may contain (parts of) DOI internalText = internalText.left(genericDoiUrlPrefixMatch.capturedStart(0)) + internalText.mid(pos + doiMatch.length()); else internalText = internalText.left(pos) + internalText.mid(pos + doiMatch.length()); } const QStringList fileList = internalText.split(KBibTeX::fileListSeparatorRegExp, QString::SkipEmptyParts); for (const QString &text : fileList) { internalText = text; /// If testing for the actual existence of a filename found in the text ... if (testExistence == TestExistenceYes) { /// If a base directory (e.g. the location of the parent .bib file) is given /// and the potential filename fragment is NOT an absolute path, ... if (internalText.startsWith(QStringLiteral("~") + QDir::separator())) { const QString fullFilename = QDir::homePath() + internalText.mid(1); const QFileInfo fileInfo(fullFilename); const QUrl url = QUrl::fromLocalFile(fileInfo.canonicalFilePath()); if (fileInfo.exists() && fileInfo.isFile() && url.isValid() && !result.contains(url)) { result << url; /// Stop searching for URLs or filenames in current internal text continue; } } else if (!baseDirectory.isEmpty() && // TODO the following test assumes that absolute paths start // with a dir separator, which may only be true on Unix/Linux, // but not Windows. May be a test for 'first character is a letter, // second is ":", third is "\"' may be necessary. !internalText.startsWith(QDir::separator())) { /// To get the absolute path, prepend filename fragment with base directory const QString fullFilename = baseDirectory + QDir::separator() + internalText; const QFileInfo fileInfo(fullFilename); const QUrl url = QUrl::fromLocalFile(fileInfo.canonicalFilePath()); if (fileInfo.exists() && fileInfo.isFile() && url.isValid() && !result.contains(url)) { result << url; /// Stop searching for URLs or filenames in current internal text continue; } } else { /// Either the filename fragment is an absolute path OR no base directory /// was given (current working directory is assumed), ... const QFileInfo fileInfo(internalText); const QUrl url = QUrl::fromLocalFile(fileInfo.canonicalFilePath()); if (fileInfo.exists() && fileInfo.isFile() && url.isValid() && !result.contains(url)) { result << url; /// stop searching for URLs or filenames in current internal text continue; } } } /// extract URL from current field pos = 0; QRegularExpressionMatch urlRegExpMatch; while ((urlRegExpMatch = KBibTeX::urlRegExp.match(internalText, pos)).hasMatch()) { pos = urlRegExpMatch.capturedStart(0); const QString match = urlRegExpMatch.captured(0); QUrl url(match); if (url.isValid() && (testExistence == TestExistenceNo || !url.isLocalFile() || QFileInfo::exists(url.toLocalFile())) && !result.contains(url)) result << url; /// remove match from internal text to avoid duplicates internalText = internalText.left(pos) + internalText.mid(pos + match.length()); } /// explicitly check URL entry, may be an URL even if http:// or alike is missing pos = 0; QRegularExpressionMatch domainNameRegExpMatch; while ((domainNameRegExpMatch = KBibTeX::domainNameRegExp.match(internalText, pos)).hasMatch()) { pos = domainNameRegExpMatch.capturedStart(0); int pos2 = internalText.indexOf(QStringLiteral(" "), pos + 1); if (pos2 < 0) pos2 = internalText.length(); QString match = internalText.mid(pos, pos2 - pos); const QUrl url(QStringLiteral("http://") + match); // FIXME what about HTTPS? if (url.isValid() && !result.contains(url)) result << url; /// remove match from internal text to avoid duplicates internalText = internalText.left(pos) + internalText.mid(pos + match.length()); } /// extract general file-like patterns pos = 0; QRegularExpressionMatch fileRegExpMatch; while ((fileRegExpMatch = KBibTeX::fileRegExp.match(internalText, pos)).hasMatch()) { pos = fileRegExpMatch.capturedStart(0); const QString match = fileRegExpMatch.captured(0); QUrl url(match); if (url.isValid() && (testExistence == TestExistenceNo || !url.isLocalFile() || QFileInfo::exists(url.toLocalFile())) && !result.contains(url)) result << url; /// remove match from internal text to avoid duplicates internalText = internalText.left(pos) + internalText.mid(pos + match.length()); } } } QSet FileInfo::entryUrls(const QSharedPointer &entry, const QUrl &bibTeXUrl, TestExistence testExistence) { QSet result; if (entry.isNull() || entry->isEmpty()) return result; if (entry->contains(Entry::ftDOI)) { const QString doi = PlainTextValue::text(entry->value(Entry::ftDOI)); QRegularExpressionMatch doiRegExpMatch; if (!doi.isEmpty() && (doiRegExpMatch = KBibTeX::doiRegExp.match(doi)).hasMatch()) { QString match = doiRegExpMatch.captured(0); - QUrl url(doiUrlPrefix() + match.remove(QStringLiteral("\\"))); + QUrl url(KBibTeX::doiUrlPrefix + match.remove(QStringLiteral("\\"))); result.insert(url); } } static const QString etPMID = QStringLiteral("pmid"); if (entry->contains(etPMID)) { const QString pmid = PlainTextValue::text(entry->value(etPMID)); bool ok = false; ok &= pmid.toInt(&ok) > 0; if (ok) { QUrl url(QStringLiteral("https://www.ncbi.nlm.nih.gov/pubmed/") + pmid); result.insert(url); } } static const QString etEPrint = QStringLiteral("eprint"); if (entry->contains(etEPrint)) { const QString eprint = PlainTextValue::text(entry->value(etEPrint)); if (!eprint.isEmpty()) { QUrl url(QStringLiteral("http://arxiv.org/search?query=") + eprint); result.insert(url); } } const QString baseDirectory = bibTeXUrl.isValid() ? bibTeXUrl.adjusted(QUrl::RemoveFilename | QUrl::StripTrailingSlash).path() : QString(); for (Entry::ConstIterator it = entry->constBegin(); it != entry->constEnd(); ++it) { /// skip abstracts, they contain sometimes strange text fragments /// that are mistaken for URLs if (it.key().toLower() == Entry::ftAbstract) continue; const Value v = it.value(); for (const auto &valueItem : v) { QString plainText = PlainTextValue::text(*valueItem); static const QRegularExpression regExpEscapedChars = QRegularExpression(QStringLiteral("\\\\+([&_~])")); plainText.replace(regExpEscapedChars, QStringLiteral("\\1")); urlsInText(plainText, testExistence, baseDirectory, result); } } if (!baseDirectory.isEmpty()) { /// File types supported by "document preview" static const QStringList documentFileExtensions {QStringLiteral(".pdf"), QStringLiteral(".pdf.gz"), QStringLiteral(".pdf.bz2"), QStringLiteral(".ps"), QStringLiteral(".ps.gz"), QStringLiteral(".ps.bz2"), QStringLiteral(".eps"), QStringLiteral(".eps.gz"), QStringLiteral(".eps.bz2"), QStringLiteral(".html"), QStringLiteral(".xhtml"), QStringLiteral(".htm"), QStringLiteral(".dvi"), QStringLiteral(".djvu"), QStringLiteral(".wwf"), QStringLiteral(".jpeg"), QStringLiteral(".jpg"), QStringLiteral(".png"), QStringLiteral(".gif"), QStringLiteral(".tif"), QStringLiteral(".tiff")}; result.reserve(result.size() + documentFileExtensions.size() * 2); /// check if in the same directory as the BibTeX file /// a PDF file exists which filename is based on the entry's id for (const QString &extension : documentFileExtensions) { const QFileInfo fi(baseDirectory + QDir::separator() + entry->id() + extension); if (fi.exists()) { const QUrl url = QUrl::fromLocalFile(fi.canonicalFilePath()); if (!result.contains(url)) result << url; } } /// check if in the same directory as the BibTeX file there is a subdirectory /// similar to the BibTeX file's name and which contains a PDF file exists /// which filename is based on the entry's id static const QRegularExpression filenameExtension(QStringLiteral("\\.[^.]{2,5}$")); const QString basename = bibTeXUrl.fileName().remove(filenameExtension); QString directory = baseDirectory + QDir::separator() + basename; for (const QString &extension : documentFileExtensions) { const QFileInfo fi(directory + QDir::separator() + entry->id() + extension); if (fi.exists()) { const QUrl url = QUrl::fromLocalFile(fi.canonicalFilePath()); if (!result.contains(url)) result << url; } } } return result; } QString FileInfo::pdfToText(const QString &pdfFilename) { /// Build filename for text file where PDF file's plain text is cached const QString cacheDirectory = QStandardPaths::writableLocation(QStandardPaths::CacheLocation) + QStringLiteral("/pdftotext"); if (!QDir(cacheDirectory).exists() && !QDir::home().mkdir(cacheDirectory)) /// Could not create cache directory return QString(); static const QRegularExpression invalidChars(QStringLiteral("[^-a-z0-9_]"), QRegularExpression::CaseInsensitiveOption); const QString textFilename = QString(pdfFilename).remove(invalidChars).append(QStringLiteral(".txt")).prepend(QStringLiteral("/")).prepend(cacheDirectory); /// First, check if there is a cache text file if (QFileInfo::exists(textFilename)) { /// Load text from cache file QFile f(textFilename); if (f.open(QFile::ReadOnly)) { const QString text = QString::fromUtf8(f.readAll()); f.close(); return text; } } else /// No cache file exists, so run text extraction in another thread QtConcurrent::run(extractPDFTextToCache, pdfFilename, textFilename); return QString(); } void FileInfo::extractPDFTextToCache(const QString &pdfFilename, const QString &cacheFilename) { /// In case of multiple calls, skip text extraction if cache file already exists if (QFile(cacheFilename).exists()) return; QString text; QStringList msgList; /// Load PDF file through Poppler Poppler::Document *doc = Poppler::Document::load(pdfFilename); if (doc != nullptr) { static const int maxPages = 64; /// Build text by appending each page's text for (int i = 0; i < qMin(maxPages, doc->numPages()); ++i) text.append(doc->page(i)->text(QRect())).append(QStringLiteral("\n\n")); if (doc->numPages() > maxPages) msgList << QString(QStringLiteral("### Skipped %1 pages as PDF file contained too many pages (limit is %2 pages) ###")).arg(doc->numPages() - maxPages).arg(maxPages); delete doc; } else msgList << QStringLiteral("### Skipped as file could not be opened as PDF file ###"); /// Save text in cache file QFile f(cacheFilename); if (f.open(QFile::WriteOnly)) { static const int maxCharacters = 1 << 18; f.write(text.left(maxCharacters).toUtf8()); ///< keep only the first 2^18 many characters if (text.length() > maxCharacters) msgList << QString(QStringLiteral("### Text too long, skipping %1 characters ###")).arg(text.length() - maxCharacters); /// Write all messages (warnings) to end of text file for (const QString &msg : const_cast(msgList)) { static const char linebreak = '\n'; f.write(&linebreak, 1); f.write(msg.toUtf8()); } f.close(); } } - -QString FileInfo::doiUrlPrefix() -{ - KSharedConfigPtr config(KSharedConfig::openConfig(QStringLiteral("kbibtexrc"))); - static const QString configGroupNameNetworking(QStringLiteral("Networking")); - static const QString keyDOIUrlPrefix(QStringLiteral("DOIUrlPrefix")); - KConfigGroup configGroup(config, configGroupNameNetworking); - return configGroup.readEntry(keyDOIUrlPrefix, KBibTeX::doiUrlPrefix); -} diff --git a/src/io/fileinfo.h b/src/io/fileinfo.h index fb8d079c..116f272b 100644 --- a/src/io/fileinfo.h +++ b/src/io/fileinfo.h @@ -1,106 +1,104 @@ /*************************************************************************** - * Copyright (C) 2004-2017 by Thomas Fischer * + * Copyright (C) 2004-2019 by Thomas Fischer * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, see . * ***************************************************************************/ #ifndef KBIBTEX_IO_FILEINFO_H #define KBIBTEX_IO_FILEINFO_H #include "kbibtexio_export.h" #include #include #include #include #include class Entry; class KBIBTEXIO_EXPORT FileInfo { public: static const QString mimetypeOctetStream; static const QString mimetypeHTML; static const QString mimetypeBibTeX; static const QString mimetypeRIS; static const QString mimetypePDF; enum TestExistence { TestExistenceYes, ///< Test if file exists TestExistenceNo ///< Skip test if file exists }; /** * Finds a QMimeType with the given url. * Tries to guess a file's mime type by its extension first, * but falls back to QMimeType's mimeTypeForName if that does * not work. Background: If a HTTP or WebDAV server claims * that a .bib file is of mime type application/octet-stream, * QMimeType::mimeTypeForName will keep that assessment * instead of inspecting the file extension. * * @see QMimeType::mimeTypeForName * @param url Url to analyze * @return Guessed mime type */ static QMimeType mimeTypeForUrl(const QUrl &url); /** * Find all file or URL references in the given text. Found filenames or * URLs are appended to the addTo list (duplicates are avoided). * Different test may get performed depending of the test for existence * of a potential file should be checked or not checked or if this matter * is undecided/irrelevant (recommended default case). For the test of * existence, baseDirectory is used to resolve relative paths. * @param text text to scan for filenames or URLs * @param testExistence shall be tested for file existence? * @param baseDirectory base directory for tests on relative path names * @param addTo add found URLs/filenames to this list */ static void urlsInText(const QString &text, const TestExistence testExistence, const QString &baseDirectory, QSet &addTo); /** * Find all file or URL references in the given entry. Found filenames or * URLs are appended to the addTo list (duplicates are avoided). * Different test may get performed depending of the test for existence * of a potential file should be checked or not checked or if this matter * is undecided/irrelevant (recommended default case). For the test of * existence, bibTeXUrl is used to resolve relative paths. * @param entry entry to scan for filenames or URLs * @param bibTeXUrl base directory/URL for tests on relative path names * @param testExistence shall be tested for file existence? * @return list of found URLs/filenames (duplicates are avoided) */ static QSet entryUrls(const QSharedPointer &entry, const QUrl &bibTeXUrl, TestExistence testExistence); /** * Load the given PDF file and return the contained plain text. * Makes use of Poppler to load and parse the file. All text * will be cached and loaded from cache if possible. * @param pdfFilename PDF file to load and extract text from * @return extracted plain text, either directly from PDF file or from cache OR QString() if there was an error */ static QString pdfToText(const QString &pdfFilename); - static QString doiUrlPrefix(); - protected: FileInfo(); private: static void extractPDFTextToCache(const QString &pdfFilename, const QString &cacheFilename); }; #endif // KBIBTEX_IO_FILEINFO_H diff --git a/src/networking/findpdf.cpp b/src/networking/findpdf.cpp index f09c9b6e..a9ee0091 100644 --- a/src/networking/findpdf.cpp +++ b/src/networking/findpdf.cpp @@ -1,455 +1,455 @@ /*************************************************************************** - * Copyright (C) 2004-2018 by Thomas Fischer * + * Copyright (C) 2004-2019 by Thomas Fischer * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, see . * ***************************************************************************/ #include "findpdf.h" #include #include #include #include #include #include #include #include #include #include "kbibtex.h" #include "internalnetworkaccessmanager.h" #include "value.h" #include "fileinfo.h" #include "logging_networking.h" int maxDepth = 5; static const char *depthProperty = "depth"; static const char *termProperty = "term"; static const char *originProperty = "origin"; class FindPDF::Private { private: FindPDF *p; public: int aliveCounter; QList result; Entry currentEntry; QSet knownUrls; QSet runningDownloads; Private(FindPDF *parent) : p(parent), aliveCounter(0) { /// nothing } bool queueUrl(const QUrl &url, const QString &term, const QString &origin, int depth) { if (!knownUrls.contains(url) && depth > 0) { knownUrls.insert(url); QNetworkRequest request = QNetworkRequest(url); QNetworkReply *reply = InternalNetworkAccessManager::instance().get(request); InternalNetworkAccessManager::instance().setNetworkReplyTimeout(reply, 15); ///< set a timeout on network connections reply->setProperty(depthProperty, QVariant::fromValue(depth)); reply->setProperty(termProperty, term); reply->setProperty(originProperty, origin); runningDownloads.insert(reply); connect(reply, &QNetworkReply::finished, p, &FindPDF::downloadFinished); ++aliveCounter; return true; } else return false; } void processGeneralHTML(QNetworkReply *reply, const QString &text) { /// fetch some properties from Reply object const QString term = reply->property(termProperty).toString(); const QString origin = reply->property(originProperty).toString(); bool ok = false; int depth = reply->property(depthProperty).toInt(&ok); if (!ok) depth = 0; /// regular expressions to guess links to follow const QVector specificAnchorRegExp = { QRegularExpression(QString(QStringLiteral("]*href=\"([^\"]*%1[^\"]*[.]pdf)\"")).arg(QRegularExpression::escape(term)), QRegularExpression::CaseInsensitiveOption), QRegularExpression(QString(QStringLiteral("]*href=\"([^\"]+)\"[^>]*>[^<]*%1[^<]*[.]pdf")).arg(QRegularExpression::escape(term)), QRegularExpression::CaseInsensitiveOption), QRegularExpression(QString(QStringLiteral("]*href=\"([^\"]*%1[^\"]*)\"")).arg(QRegularExpression::escape(term)), QRegularExpression::CaseInsensitiveOption), QRegularExpression(QString(QStringLiteral("]*href=\"([^\"]+)\"[^>]*>[^<]*%1[^<]*\\b")).arg(QRegularExpression::escape(term)), QRegularExpression::CaseInsensitiveOption) }; static const QRegularExpression genericAnchorRegExp = QRegularExpression(QStringLiteral("]*href=\"([^\"]+)\""), QRegularExpression::CaseInsensitiveOption); bool gotLink = false; for (const QRegularExpression &anchorRegExp : specificAnchorRegExp) { const QRegularExpressionMatch match = anchorRegExp.match(text); if (match.hasMatch()) { const QUrl url = QUrl::fromEncoded(match.captured(1).toLatin1()); queueUrl(reply->url().resolved(url), term, origin, depth - 1); gotLink = true; break; } } if (!gotLink) { /// this is only the last resort: /// to follow the first link found in the HTML document const QRegularExpressionMatch match = genericAnchorRegExp.match(text); if (match.hasMatch()) { const QUrl url = QUrl::fromEncoded(match.captured(1).toLatin1()); queueUrl(reply->url().resolved(url), term, origin, depth - 1); } } } void processGoogleResult(QNetworkReply *reply, const QString &text) { static const QString h3Tag(QStringLiteral("property(termProperty).toString(); bool ok = false; int depth = reply->property(depthProperty).toInt(&ok); if (!ok) depth = 0; /// extract the first numHitsToFollow-many hits found by Google Scholar const int numHitsToFollow = 10; int p = -1; for (int i = 0; i < numHitsToFollow; ++i) { if ((p = text.indexOf(h3Tag, p + 1)) >= 0 && (p = text.indexOf(aTag, p + 1)) >= 0 && (p = text.indexOf(hrefAttrib, p + 1)) >= 0) { int p1 = p + 6; int p2 = text.indexOf(QLatin1Char('"'), p1 + 1); QUrl url(text.mid(p1, p2 - p1)); const QString googleService = reply->url().host().contains(QStringLiteral("scholar.google")) ? QStringLiteral("scholar.google") : QStringLiteral("www.google"); queueUrl(reply->url().resolved(url), term, googleService, depth - 1); } } } void processSpringerLink(QNetworkReply *reply, const QString &text) { static const QRegularExpression fulltextPDFlink(QStringLiteral("href=\"([^\"]+/fulltext.pdf)\"")); const QRegularExpressionMatch match = fulltextPDFlink.match(text); if (match.hasMatch()) { bool ok = false; int depth = reply->property(depthProperty).toInt(&ok); if (!ok) depth = 0; const QUrl url(match.captured(1)); queueUrl(reply->url().resolved(url), QString(), QStringLiteral("springerlink"), depth - 1); } } void processCiteSeerX(QNetworkReply *reply, const QString &text) { static const QRegularExpression downloadPDFlink(QStringLiteral("href=\"(/viewdoc/download[^\"]+type=pdf)\"")); const QRegularExpressionMatch match = downloadPDFlink.match(text); if (match.hasMatch()) { bool ok = false; int depth = reply->property(depthProperty).toInt(&ok); if (!ok) depth = 0; const QUrl url = QUrl::fromEncoded(match.captured(1).toLatin1()); queueUrl(reply->url().resolved(url), QString(), QStringLiteral("citeseerx"), depth - 1); } } void processACMDigitalLibrary(QNetworkReply *reply, const QString &text) { static const QRegularExpression downloadPDFlink(QStringLiteral("href=\"(ft_gateway.cfm\\?id=\\d+&ftid=\\d+&dwn=1&CFID=\\d+&CFTOKEN=\\d+)\"")); const QRegularExpressionMatch match = downloadPDFlink.match(text); if (match.hasMatch()) { bool ok = false; int depth = reply->property(depthProperty).toInt(&ok); if (!ok) depth = 0; const QUrl url = QUrl::fromEncoded(match.captured(1).toLatin1()); queueUrl(reply->url().resolved(url), QString(), QStringLiteral("acmdl"), depth - 1); } } bool processPDF(QNetworkReply *reply, const QByteArray &data) { bool progress = false; const QString origin = reply->property(originProperty).toString(); const QUrl url = reply->url(); /// Search for duplicate URLs bool containsUrl = false; for (const ResultItem &ri : const_cast &>(result)) { containsUrl |= ri.url == url; /// Skip already visited URLs if (containsUrl) break; } if (!containsUrl) { Poppler::Document *doc = Poppler::Document::loadFromData(data); ResultItem resultItem; resultItem.tempFilename = new QTemporaryFile(QStandardPaths::writableLocation(QStandardPaths::TempLocation) + QDir::separator() + QStringLiteral("kbibtex_findpdf_XXXXXX.pdf")); resultItem.tempFilename->setAutoRemove(true); if (resultItem.tempFilename->open()) { const int lenDataWritten = resultItem.tempFilename->write(data); resultItem.tempFilename->close(); if (lenDataWritten != data.length()) { /// Failed to write to temporary file qCWarning(LOG_KBIBTEX_NETWORKING) << "Failed to write to temporary file for filename" << resultItem.tempFilename->fileName(); delete resultItem.tempFilename; resultItem.tempFilename = nullptr; } } else { /// Failed to create temporary file qCWarning(LOG_KBIBTEX_NETWORKING) << "Failed to create temporary file for templaet" << resultItem.tempFilename->fileTemplate(); delete resultItem.tempFilename; resultItem.tempFilename = nullptr; } resultItem.url = url; resultItem.textPreview = doc->info(QStringLiteral("Title")).simplified(); static const int maxTextLen = 1024; for (int i = 0; i < doc->numPages() && resultItem.textPreview.length() < maxTextLen; ++i) { Poppler::Page *page = doc->page(i); if (!resultItem.textPreview.isEmpty()) resultItem.textPreview += QLatin1Char(' '); resultItem.textPreview += page->text(QRect()).simplified().leftRef(maxTextLen); delete page; } resultItem.textPreview.remove(QStringLiteral("Microsoft Word - ")); ///< Some word processors need to put their name everywhere ... resultItem.downloadMode = NoDownload; resultItem.relevance = origin == Entry::ftDOI ? 1.0 : (origin == QStringLiteral("eprint") ? 0.75 : 0.5); result << resultItem; progress = true; delete doc; } return progress; } QUrl ieeeDocumentUrlToDownloadUrl(const QUrl &url) { /// Basic checking if provided URL is from IEEE Xplore if (!url.host().contains(QStringLiteral("ieeexplore.ieee.org"))) return url; /// Assuming URL looks like this: /// http://ieeexplore.ieee.org/document/8092651/ static const QRegularExpression documentIdRegExp(QStringLiteral("/(\\d{6,})/$")); const QRegularExpressionMatch documentIdRegExpMatch = documentIdRegExp.match(url.path()); if (!documentIdRegExpMatch.hasMatch()) return url; /// Use document id extracted above to build URL to PDF file return QUrl(QStringLiteral("http://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=") + documentIdRegExpMatch.captured(1)); } }; FindPDF::FindPDF(QObject *parent) : QObject(parent), d(new Private(this)) { /// nothing } FindPDF::~FindPDF() { abort(); delete d; } bool FindPDF::search(const Entry &entry) { if (d->aliveCounter > 0) return false; d->knownUrls.clear(); d->result.clear(); d->currentEntry = entry; emit progress(0, d->aliveCounter, 0); /// Generate a string which contains the title's beginning QString searchWords; if (entry.contains(Entry::ftTitle)) { const QStringList titleChunks = PlainTextValue::text(entry.value(Entry::ftTitle)).split(QStringLiteral(" "), QString::SkipEmptyParts); if (!titleChunks.isEmpty()) { searchWords = titleChunks[0]; for (int i = 1; i < titleChunks.count() && searchWords.length() < 64; ++i) searchWords += QLatin1Char(' ') + titleChunks[i]; } } const QStringList authors = entry.authorsLastName(); for (int i = 0; i < authors.count() && searchWords.length() < 96; ++i) searchWords += QLatin1Char(' ') + authors[i]; searchWords.remove(QLatin1Char('{')).remove(QLatin1Char('}')); QStringList urlFields {Entry::ftDOI, Entry::ftUrl, QStringLiteral("ee")}; for (int i = 2; i < 256; ++i) urlFields << QString(QStringLiteral("%1%2")).arg(Entry::ftDOI).arg(i) << QString(QStringLiteral("%1%2")).arg(Entry::ftUrl).arg(i); for (const QString &field : const_cast(urlFields)) { if (entry.contains(field)) { const QString fieldText = PlainTextValue::text(entry.value(field)); QRegularExpressionMatchIterator doiRegExpMatchIt = KBibTeX::doiRegExp.globalMatch(fieldText); while (doiRegExpMatchIt.hasNext()) { const QRegularExpressionMatch doiRegExpMatch = doiRegExpMatchIt.next(); - d->queueUrl(QUrl(FileInfo::doiUrlPrefix() + doiRegExpMatch.captured(0)), fieldText, Entry::ftDOI, maxDepth); + d->queueUrl(QUrl(KBibTeX::doiUrlPrefix + doiRegExpMatch.captured(0)), fieldText, Entry::ftDOI, maxDepth); } QRegularExpressionMatchIterator urlRegExpMatchIt = KBibTeX::urlRegExp.globalMatch(fieldText); while (urlRegExpMatchIt.hasNext()) { QRegularExpressionMatch urlRegExpMatch = urlRegExpMatchIt.next(); d->queueUrl(QUrl(urlRegExpMatch.captured(0)), searchWords, Entry::ftUrl, maxDepth); } } } if (entry.contains(QStringLiteral("eprint"))) { /// check eprint fields as used for arXiv const QString eprintId = PlainTextValue::text(entry.value(QStringLiteral("eprint"))); if (!eprintId.isEmpty()) { const QUrl arxivUrl = QUrl::fromUserInput(QStringLiteral("http://arxiv.org/find/all/1/all:+") + eprintId + QStringLiteral("/0/1/0/all/0/1")); d->queueUrl(arxivUrl, eprintId, QStringLiteral("eprint"), maxDepth); } } if (!searchWords.isEmpty()) { /// Search in Google const QUrl googleUrl = QUrl::fromUserInput(QStringLiteral("https://www.google.com/search?hl=en&sa=G&q=filetype:pdf ") + searchWords); d->queueUrl(googleUrl, searchWords, QStringLiteral("www.google"), maxDepth); /// Search in Google Scholar const QUrl googleScholarUrl = QUrl::fromUserInput(QStringLiteral("https://scholar.google.com/scholar?hl=en&btnG=Search&as_sdt=1&q=filetype:pdf ") + searchWords); d->queueUrl(googleScholarUrl, searchWords, QStringLiteral("scholar.google"), maxDepth); /// Search in Bing const QUrl bingUrl = QUrl::fromUserInput(QStringLiteral("https://www.bing.com/search?setlang=en-US&q=filetype:pdf ") + searchWords); d->queueUrl(bingUrl, searchWords, QStringLiteral("bing"), maxDepth); /// Search in CiteSeerX const QUrl citeseerXurl = QUrl::fromUserInput(QStringLiteral("http://citeseerx.ist.psu.edu/search?submit=Search&sort=rlv&t=doc&q=") + searchWords); d->queueUrl(citeseerXurl, searchWords, QStringLiteral("citeseerx"), maxDepth); /// Search in StartPage const QUrl startPageUrl = QUrl::fromUserInput(QStringLiteral("https://www.startpage.com/do/asearch?cat=web&cmd=process_search&language=english&engine0=v1all&abp=-1&t=white&nj=1&prf=23ad6aab054a88d3da5c443280cee596&suggestOn=0&query=filetype:pdf ") + searchWords); d->queueUrl(startPageUrl, searchWords, QStringLiteral("startpage"), maxDepth); } if (d->aliveCounter == 0) { qCWarning(LOG_KBIBTEX_NETWORKING) << "Directly at start, no URLs are queue for a search -> this should never happen"; emit finished(); } return true; } QList FindPDF::results() { if (d->aliveCounter == 0) return d->result; else { /// Return empty list while search is running return QList(); } } void FindPDF::abort() { QSet::Iterator it = d->runningDownloads.begin(); while (it != d->runningDownloads.end()) { QNetworkReply *reply = *it; it = d->runningDownloads.erase(it); reply->abort(); } } void FindPDF::downloadFinished() { static const char *htmlHead1 = "aliveCounter; emit progress(d->knownUrls.count(), d->aliveCounter, d->result.count()); QNetworkReply *reply = static_cast(sender()); d->runningDownloads.remove(reply); const QString term = reply->property(termProperty).toString(); const QString origin = reply->property(originProperty).toString(); bool depthOk = false; int depth = reply->property(depthProperty).toInt(&depthOk); if (!depthOk) depth = 0; if (reply->error() == QNetworkReply::NoError) { const QByteArray data = reply->readAll(); QUrl redirUrl = reply->attribute(QNetworkRequest::RedirectionTargetAttribute).toUrl(); redirUrl = redirUrl.isEmpty() ? QUrl() : reply->url().resolved(redirUrl); qCDebug(LOG_KBIBTEX_NETWORKING) << "finished Downloading " << reply->url().toDisplayString() << " depth=" << depth << " d->aliveCounter=" << d->aliveCounter << " data.size=" << data.size() << " redirUrl=" << redirUrl.toDisplayString() << " origin=" << origin; if (!redirUrl.isEmpty()) { redirUrl = d->ieeeDocumentUrlToDownloadUrl(redirUrl); d->queueUrl(redirUrl, term, origin, depth - 1); } else if (data.contains(htmlHead1) || data.contains(htmlHead2) || data.contains(htmlHead3)) { /// returned data is a HTML file, i.e. contains " 0) { /// Get webpage as plain text /// Assume UTF-8 data const QString text = QString::fromUtf8(data.constData()); /// regular expression to check if this is a Google Scholar result page static const QRegularExpression googleScholarTitleRegExp(QStringLiteral("[^>]* - Google Scholar")); /// regular expression to check if this is a SpringerLink page static const QRegularExpression springerLinkTitleRegExp(QStringLiteral("[^>]* - Springer - [^>]*")); /// regular expression to check if this is a CiteSeerX page static const QRegularExpression citeseerxTitleRegExp(QStringLiteral("CiteSeerX — [^>]*")); /// regular expression to check if this is a ACM Digital Library page static const QString acmDigitalLibraryString(QStringLiteral("The ACM Digital Library is published by the Association for Computing Machinery")); if (googleScholarTitleRegExp.match(text).hasMatch()) d->processGoogleResult(reply, text); else if (springerLinkTitleRegExp.match(text).hasMatch()) d->processSpringerLink(reply, text); else if (citeseerxTitleRegExp.match(text).hasMatch()) d->processCiteSeerX(reply, text); else if (text.contains(acmDigitalLibraryString)) d->processACMDigitalLibrary(reply, text); else { /// regular expression to extract title static const QRegularExpression titleRegExp(QStringLiteral("(.*?)")); const QRegularExpressionMatch match = titleRegExp.match(text); if (match.hasMatch()) qCDebug(LOG_KBIBTEX_NETWORKING) << "Using general HTML processor for page" << match.captured(1) << " URL=" << reply->url().toDisplayString(); else qCDebug(LOG_KBIBTEX_NETWORKING) << "Using general HTML processor for URL=" << reply->url().toDisplayString(); d->processGeneralHTML(reply, text); } } } else if (data.contains(pdfHead)) { /// looks like a PDF file -> grab it const bool gotPDFfile = d->processPDF(reply, data); if (gotPDFfile) emit progress(d->knownUrls.count(), d->aliveCounter, d->result.count()); } else { /// Assume UTF-8 data const QString text = QString::fromUtf8(data.constData()); qCWarning(LOG_KBIBTEX_NETWORKING) << "don't know how to handle " << text.left(256); } } else qCWarning(LOG_KBIBTEX_NETWORKING) << "error from reply: " << reply->errorString() << "(" << reply->url().toDisplayString() << ")" << " term=" << term << " origin=" << origin << " depth=" << depth; if (d->aliveCounter == 0) { /// no more running downloads left emit finished(); } } diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 0127a7ca..e2738677 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -1,186 +1,187 @@ # KBibTeX test program project( test ) include( AddFileDependencies ) include( ECMMarkAsTest ) configure_file(test-config.h.in test-config.h @ONLY) include_directories( ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_SOURCE_DIR}/src/config ${CMAKE_SOURCE_DIR}/src/data ${CMAKE_SOURCE_DIR}/src/io ${CMAKE_SOURCE_DIR}/src/global ${CMAKE_SOURCE_DIR}/src/gui ${CMAKE_SOURCE_DIR}/src/gui/config ${CMAKE_SOURCE_DIR}/src/gui/bibtex ${CMAKE_SOURCE_DIR}/src/gui/element ${CMAKE_SOURCE_DIR}/src/gui/widgets ${CMAKE_SOURCE_DIR}/src/networking ${CMAKE_SOURCE_DIR}/src/networking/onlinesearch ${CMAKE_SOURCE_DIR}/src/processing ) set( kbibtextest_SRCS main.cpp kbibtextest.cpp logging_test.cpp ) set( kbibtexfilestest_SRCS kbibtexfilestest.cpp kbibtexfilestest-rawdata.h ) set( kbibtexnetworkingtest_SRCS kbibtexnetworkingtest.cpp ) set( kbibtexiotest_SRCS kbibtexiotest.cpp + ${CMAKE_SOURCE_DIR}/src/global/kbibtex.cpp ${CMAKE_SOURCE_DIR}/src/global/preferences.cpp ) set( kbibtexdatatest_SRCS kbibtexdatatest.cpp ) if(UNITY_BUILD AND NOT WIN32) # FIXME: Unity build of programs breaks on Windows enable_unity_build(kbibtextest kbibtextest_SRCS) enable_unity_build(kbibtexfilestest kbibtexfilestest_SRCS) enable_unity_build(kbibtexnetworkingtest kbibtexnetworkingtest_SRCS) enable_unity_build(kbibtexiotest kbibtexiotest_SRCS) enable_unity_build(kbibtexdatatest kbibtexdatatest_SRCS) endif(UNITY_BUILD AND NOT WIN32) # Creates kbibtex-git-info.h containing information about the source code's Git revision # (if source directory is a Git clone) add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/kbibtex-git-info.h COMMAND ${CMAKE_COMMAND} -DSOURCE_DIR=${CMAKE_SOURCE_DIR} -DBINARY_DIR=${CMAKE_CURRENT_BINARY_DIR} -P ${CMAKE_SOURCE_DIR}/src/getgit.cmake ) set_source_files_properties( ${CMAKE_CURRENT_BINARY_DIR}/kbibtex-git-info.h PROPERTIES GENERATED 1 HEADER_FILE_ONLY 1 SKIP_AUTOMOC ON SKIP_AUTOUIC ON SKIP_AUTOGEN ON ) add_executable( kbibtextest ${kbibtextest_SRCS} ${CMAKE_CURRENT_BINARY_DIR}/kbibtex-git-info.h ) add_executable( kbibtexfilestest ${kbibtexfilestest_SRCS} ${CMAKE_CURRENT_BINARY_DIR}/kbibtex-git-info.h ) add_executable( kbibtexnetworkingtest ${kbibtexnetworkingtest_SRCS} ${CMAKE_CURRENT_BINARY_DIR}/kbibtex-git-info.h ) add_executable( kbibtexiotest ${kbibtexiotest_SRCS} ${CMAKE_CURRENT_BINARY_DIR}/kbibtex-git-info.h ) add_executable( kbibtexdatatest ${kbibtexdatatest_SRCS} ${CMAKE_CURRENT_BINARY_DIR}/kbibtex-git-info.h ) target_link_libraries( kbibtextest Qt5::Core KF5::KIOCore kbibtexconfig kbibtexdata kbibtexio kbibtexproc kbibtexgui kbibtexnetworking ) target_link_libraries( kbibtexfilestest Qt5::Test kbibtexdata kbibtexio ) target_link_libraries( kbibtexnetworkingtest Qt5::Test kbibtexnetworking ) target_link_libraries( kbibtexiotest Qt5::Test kbibtexio ) target_link_libraries( kbibtexdatatest Qt5::Test kbibtexdata ) ecm_mark_as_test( kbibtexfilestest kbibtexnetworkingtest kbibtexiotest kbibtexdatatest ) add_test( NAME kbibtexfilestest COMMAND kbibtexfilestest ) add_test( NAME kbibtexnetworkingtest COMMAND kbibtexnetworkingtest ) add_test( NAME kbibtexiotest COMMAND kbibtexiotest ) add_test( NAME kbibtexdatatest COMMAND kbibtexdatatest ) diff --git a/src/test/kbibtexiotest.cpp b/src/test/kbibtexiotest.cpp index 6af4e03b..7403ebd8 100644 --- a/src/test/kbibtexiotest.cpp +++ b/src/test/kbibtexiotest.cpp @@ -1,611 +1,612 @@ /*************************************************************************** - * Copyright (C) 2004-2018 by Thomas Fischer * + * Copyright (C) 2004-2019 by Thomas Fischer * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, see . * ***************************************************************************/ #include #include #include "encoderxml.h" #include "encoderlatex.h" #include "value.h" #include "entry.h" #include "fileexporterbibtex.h" #include "fileexporterris.h" #include "fileexporterxml.h" #include "file.h" #include "fileimporterbibtex.h" #include "fileimporter.h" #include "fileimporterris.h" #include "fileinfo.h" +#include "kbibtex.h" #include "preferences.h" Q_DECLARE_METATYPE(QMimeType) class KBibTeXIOTest : public QObject { Q_OBJECT private slots: void initTestCase(); void encoderXMLdecode_data(); void encoderXMLdecode(); void encoderXMLencode_data(); void encoderXMLencode(); void encoderLaTeXdecode_data(); void encoderLaTeXdecode(); void encoderLaTeXencode_data(); void encoderLaTeXencode(); void fileImporterSplitName_data(); void fileImporterSplitName(); void fileInfoMimeTypeForUrl_data(); void fileInfoMimeTypeForUrl(); void fileInfoUrlsInText_data(); void fileInfoUrlsInText(); QVector > fileImporterExporterTestCases(); void fileExporterXMLsave_data(); void fileExporterXMLsave(); void fileExporterRISsave_data(); void fileExporterRISsave(); void fileExporterBibTeXsave_data(); void fileExporterBibTeXsave(); void fileImporterRISload_data(); void fileImporterRISload(); void fileImporterBibTeXload_data(); void fileImporterBibTeXload(); void protectiveCasingEntryGeneratedOnTheFly(); void protectiveCasingEntryFromData(); void partialBibTeXInput_data(); void partialBibTeXInput(); void partialRISInput_data(); void partialRISInput(); private: }; void KBibTeXIOTest::encoderXMLdecode_data() { QTest::addColumn("xml"); QTest::addColumn("unicode"); QTest::newRow("Just ASCII") << QStringLiteral("Gallia est omnis divisa in partes tres, quarum unam incolunt Belgae, aliam Aquitani, tertiam qui ipsorum lingua Celtae, nostra Galli appellantur.") << QStringLiteral("Gallia est omnis divisa in partes tres, quarum unam incolunt Belgae, aliam Aquitani, tertiam qui ipsorum lingua Celtae, nostra Galli appellantur."); QTest::newRow("Quotation marks") << QStringLiteral("Caesar said: "Veni, vidi, vici"") << QStringLiteral("Caesar said: \"Veni, vidi, vici\""); QTest::newRow("Characters from EncoderXMLCharMapping") << QStringLiteral(""&<>") << QStringLiteral("\"\\&<>"); QTest::newRow("Characters from backslashSymbols") << QStringLiteral("&%_") << QStringLiteral("\\&\\%\\_"); for (int start = 0; start < 10; ++start) { QString xmlString, unicodeString; for (int offset = 1561; offset < 6791; offset += 621) { const ushort unicode = start * 3671 + offset; xmlString += QStringLiteral("&#") + QString::number(unicode) + QStringLiteral(";"); unicodeString += QChar(unicode); } QTest::newRow(QString(QStringLiteral("Some arbitrary Unicode characters (%1)")).arg(start).toLatin1().constData()) << xmlString << unicodeString; } } void KBibTeXIOTest::encoderXMLdecode() { QFETCH(QString, xml); QFETCH(QString, unicode); QCOMPARE(EncoderXML::instance().decode(xml), unicode); } void KBibTeXIOTest::encoderXMLencode_data() { encoderXMLdecode_data(); } void KBibTeXIOTest::encoderXMLencode() { QFETCH(QString, xml); QFETCH(QString, unicode); QCOMPARE(EncoderXML::instance().encode(unicode, Encoder::TargetEncodingASCII), xml); } void KBibTeXIOTest::encoderLaTeXdecode_data() { QTest::addColumn("latex"); QTest::addColumn("unicode"); QTest::addColumn("alternativelatex"); QTest::newRow("Just ASCII") << QStringLiteral("Gallia est omnis divisa in partes tres, quarum unam incolunt Belgae, aliam Aquitani, tertiam qui ipsorum lingua Celtae, nostra Galli appellantur.") << QStringLiteral("Gallia est omnis divisa in partes tres, quarum unam incolunt Belgae, aliam Aquitani, tertiam qui ipsorum lingua Celtae, nostra Galli appellantur.") << QString(); QTest::newRow("Dotless i and j characters") << QStringLiteral("\\`{\\i}\\'{\\i}\\^{\\i}\\\"{\\i}\\~{\\i}\\={\\i}\\u{\\i}\\k{\\i}\\^{\\j}\\v{\\i}\\v{\\j}") << QString(QChar(0x00EC)) + QChar(0x00ED) + QChar(0x00EE) + QChar(0x00EF) + QChar(0x0129) + QChar(0x012B) + QChar(0x012D) + QChar(0x012F) + QChar(0x0135) + QChar(0x01D0) + QChar(0x01F0) << QString(); QTest::newRow("\\l and \\ldots") << QStringLiteral("\\l\\ldots\\l\\ldots") << QString(QChar(0x0142)) + QChar(0x2026) + QChar(0x0142) + QChar(0x2026) << QStringLiteral("{\\l}{\\ldots}{\\l}{\\ldots}"); } void KBibTeXIOTest::encoderLaTeXdecode() { QFETCH(QString, latex); QFETCH(QString, unicode); QCOMPARE(EncoderLaTeX::instance().decode(latex), unicode); } void KBibTeXIOTest::encoderLaTeXencode_data() { encoderLaTeXdecode_data(); } void KBibTeXIOTest::encoderLaTeXencode() { QFETCH(QString, latex); QFETCH(QString, unicode); QFETCH(QString, alternativelatex); const QString generatedLatex = EncoderLaTeX::instance().encode(unicode, Encoder::TargetEncodingASCII); if (generatedLatex != latex && !alternativelatex.isEmpty()) QCOMPARE(generatedLatex, alternativelatex); else QCOMPARE(generatedLatex, latex); } void KBibTeXIOTest::fileImporterSplitName_data() { QTest::addColumn("name"); QTest::addColumn("person"); QTest::newRow("Empty name") << QString() << new Person(QString(), QString(), QString()); QTest::newRow("PubMed style") << QStringLiteral("Jones A B C") << new Person(QStringLiteral("A B C"), QStringLiteral("Jones"), QString()); QTest::newRow("Just last name") << QStringLiteral("Dido") << new Person(QString(), QStringLiteral("Dido"), QString()); QTest::newRow("Name with 'von'") << QStringLiteral("Theodor von Sickel") << new Person(QStringLiteral("Theodor"), QStringLiteral("von Sickel"), QString()); QTest::newRow("Name with 'von', reversed") << QStringLiteral("von Sickel, Theodor") << new Person(QStringLiteral("Theodor"), QStringLiteral("von Sickel"), QString()); QTest::newRow("Name with 'van der'") << QStringLiteral("Adriaen van der Werff") << new Person(QStringLiteral("Adriaen"), QStringLiteral("van der Werff"), QString()); QTest::newRow("Name with 'van der', reversed") << QStringLiteral("van der Werff, Adriaen") << new Person(QStringLiteral("Adriaen"), QStringLiteral("van der Werff"), QString()); QTest::newRow("Name with suffix") << QStringLiteral("Anna Eleanor Roosevelt Jr.") << new Person(QStringLiteral("Anna Eleanor"), QStringLiteral("Roosevelt"), QStringLiteral("Jr.")); } void KBibTeXIOTest::fileImporterSplitName() { QFETCH(QString, name); QFETCH(Person *, person); Person *computedPerson = FileImporter::splitName(name); QCOMPARE(*computedPerson, *person); delete person; delete computedPerson; } void KBibTeXIOTest::fileInfoMimeTypeForUrl_data() { QTest::addColumn("url"); QTest::addColumn("mimetype"); static const QMimeDatabase db; QTest::newRow("Invalid URL") << QUrl() << QMimeType(); QTest::newRow("Generic URL") << QUrl(QStringLiteral("https://www.example.com")) << db.mimeTypeForName(QStringLiteral("text/html")); QTest::newRow("Generic local file") << QUrl(QStringLiteral("/usr/bin/who")) << db.mimeTypeForName(QStringLiteral("application/octet-stream")); QTest::newRow("Generic Samba URL") << QUrl(QStringLiteral("smb://fileserver.local/file")) << db.mimeTypeForName(QStringLiteral("application/octet-stream")); QTest::newRow("URL to .bib file") << QUrl(QStringLiteral("https://www.example.com/references.bib")) << db.mimeTypeForName(QStringLiteral("text/x-bibtex")); QTest::newRow("Local .bib file") << QUrl(QStringLiteral("/home/user/references.bib")) << db.mimeTypeForName(QStringLiteral("text/x-bibtex")); QTest::newRow("URL to .pdf file") << QUrl(QStringLiteral("https://www.example.com/references.pdf")) << db.mimeTypeForName(QStringLiteral("application/pdf")); QTest::newRow("Local .pdf file") << QUrl(QStringLiteral("/home/user/references.pdf")) << db.mimeTypeForName(QStringLiteral("application/pdf")); } void KBibTeXIOTest::fileInfoMimeTypeForUrl() { QFETCH(QUrl, url); QFETCH(QMimeType, mimetype); QCOMPARE(FileInfo::mimeTypeForUrl(url), mimetype); } void KBibTeXIOTest::fileInfoUrlsInText_data() { QTest::addColumn("text"); QTest::addColumn>("expectedUrls"); QTest::newRow("Empty text") << QString() << QSet(); - QTest::newRow("Lore ipsum with DOI (without URL)") << QStringLiteral("Lore ipsum 10.1000/38-abc Lore ipsum") << QSet{QUrl(FileInfo::doiUrlPrefix() + QStringLiteral("10.1000/38-abc"))}; - QTest::newRow("Lore ipsum with DOI (with URL)") << QStringLiteral("Lore ipsum http://doi.example.org/10.1000/38-abc Lore ipsum") << QSet{QUrl(FileInfo::doiUrlPrefix() + QStringLiteral("10.1000/38-abc"))}; - QTest::newRow("URLs and DOI (without URL), all semicolon-separated") << QStringLiteral("http://www.example.com;10.1000/38-abc ;\nhttps://www.example.com") << QSet{QUrl(QStringLiteral("http://www.example.com")), QUrl(FileInfo::doiUrlPrefix() + QStringLiteral("10.1000/38-abc")), QUrl(QStringLiteral("https://www.example.com"))}; - QTest::newRow("URLs and DOI (with URL), all semicolon-separated") << QStringLiteral("http://www.example.com\n; 10.1000/38-abc;https://www.example.com") << QSet{QUrl(QStringLiteral("http://www.example.com")), QUrl(FileInfo::doiUrlPrefix() + QStringLiteral("10.1000/38-abc")), QUrl(QStringLiteral("https://www.example.com"))}; - QTest::newRow("URLs with various separators") << QStringLiteral("http://www.example.com/def.pdf https://www.example.com\nhttp://download.example.com/abc") << QSet{QUrl(QStringLiteral("http://www.example.com/def.pdf")), QUrl(QStringLiteral("https://www.example.com")), QUrl(QStringLiteral("http://download.example.com/abc"))}; - QTest::newRow("URLs with query strings and anchors") << QStringLiteral("http://www.example.com/def.pdf?a=3&b=1 https://www.example.com#1581584\nhttp://download.example.com/abc,7352,A#abc?gh=352&ghi=1254") << QSet{QUrl(QStringLiteral("http://www.example.com/def.pdf?a=3&b=1")), QUrl(QStringLiteral("https://www.example.com#1581584")), QUrl(QStringLiteral("http://download.example.com/abc,7352,A#abc?gh=352&ghi=1254"))}; + QTest::newRow("Lore ipsum with DOI (without URL)") << QStringLiteral("Lore ipsum 10.1000/38-abc Lore ipsum") << QSet {QUrl(KBibTeX::doiUrlPrefix + QStringLiteral("10.1000/38-abc"))}; + QTest::newRow("Lore ipsum with DOI (with URL)") << QStringLiteral("Lore ipsum http://doi.example.org/10.1000/38-abc Lore ipsum") << QSet {QUrl(KBibTeX::doiUrlPrefix + QStringLiteral("10.1000/38-abc"))}; + QTest::newRow("URLs and DOI (without URL), all semicolon-separated") << QStringLiteral("http://www.example.com;10.1000/38-abc ;\nhttps://www.example.com") << QSet {QUrl(QStringLiteral("http://www.example.com")), QUrl(KBibTeX::doiUrlPrefix + QStringLiteral("10.1000/38-abc")), QUrl(QStringLiteral("https://www.example.com"))}; + QTest::newRow("URLs and DOI (with URL), all semicolon-separated") << QStringLiteral("http://www.example.com\n; 10.1000/38-abc;https://www.example.com") << QSet {QUrl(QStringLiteral("http://www.example.com")), QUrl(KBibTeX::doiUrlPrefix + QStringLiteral("10.1000/38-abc")), QUrl(QStringLiteral("https://www.example.com"))}; + QTest::newRow("URLs with various separators") << QStringLiteral("http://www.example.com/def.pdf https://www.example.com\nhttp://download.example.com/abc") << QSet {QUrl(QStringLiteral("http://www.example.com/def.pdf")), QUrl(QStringLiteral("https://www.example.com")), QUrl(QStringLiteral("http://download.example.com/abc"))}; + QTest::newRow("URLs with query strings and anchors") << QStringLiteral("http://www.example.com/def.pdf?a=3&b=1 https://www.example.com#1581584\nhttp://download.example.com/abc,7352,A#abc?gh=352&ghi=1254") << QSet {QUrl(QStringLiteral("http://www.example.com/def.pdf?a=3&b=1")), QUrl(QStringLiteral("https://www.example.com#1581584")), QUrl(QStringLiteral("http://download.example.com/abc,7352,A#abc?gh=352&ghi=1254"))}; } void KBibTeXIOTest::fileInfoUrlsInText() { QFETCH(QString, text); QFETCH(QSet, expectedUrls); QSet extractedUrls; FileInfo::urlsInText(text, FileInfo::TestExistenceNo, QString(), extractedUrls); QCOMPARE(extractedUrls.count(), expectedUrls.count()); for (const QUrl &expectedUrl : const_cast &>(expectedUrls)) QCOMPARE(extractedUrls.contains(expectedUrl), true); } QVector > KBibTeXIOTest::fileImporterExporterTestCases() { static QVector > result; if (result.isEmpty()) { /// Empty file without any entries result.append(QPair("Empty file", new File())); /// File with single entry, inspired by 'Moby Dick' File *f1 = new File(); QSharedPointer entry1(new Entry(Entry::etArticle, QStringLiteral("the-whale-1851"))); f1->append(entry1); entry1->insert(Entry::ftTitle, Value() << QSharedPointer(new PlainText(QStringLiteral("{Call me Ishmael}")))); entry1->insert(Entry::ftAuthor, Value() << QSharedPointer<Person>(new Person(QStringLiteral("Herman"), QStringLiteral("Melville"))) << QSharedPointer<Person>(new Person(QStringLiteral("Moby"), QStringLiteral("Dick")))); entry1->insert(Entry::ftYear, Value() << QSharedPointer<PlainText>(new PlainText(QStringLiteral("1851")))); result.append(QPair<const char *, File *>("Moby Dick", f1)); // TODO add more file objects to result vector /// Set various properties to guarantee reproducible results irrespective of local settings for (auto it = result.constBegin(); it != result.constEnd(); ++it) { File *file = it->second; file->setProperty(File::NameFormatting, Preferences::personNameFormatLastFirst); file->setProperty(File::ProtectCasing, static_cast<int>(Qt::Checked)); // TODO more file properties to set? } } return result; } void KBibTeXIOTest::fileExporterXMLsave_data() { QTest::addColumn<File *>("bibTeXfile"); QTest::addColumn<QString>("xmlData"); static const QHash<const char *, QString> keyToXmlData { {"Empty file", QStringLiteral("<?xml version=\"1.0\" encoding=\"UTF-8\"?>|<!-- XML document written by KBibTeXIO as part of KBibTeX -->|<!-- https://userbase.kde.org/KBibTeX -->|<bibliography>|</bibliography>|")}, {"Moby Dick", QStringLiteral("<?xml version=\"1.0\" encoding=\"UTF-8\"?>|<!-- XML document written by KBibTeXIO as part of KBibTeX -->|<!-- https://userbase.kde.org/KBibTeX -->|<bibliography>| <entry id=\"the-whale-1851\" type=\"article\">| <authors>|<person><firstname>Herman</firstname><lastname>Melville</lastname></person> <person><firstname>Moby</firstname><lastname>Dick</lastname></person>| </authors>| <title><text>Call me Ishmael</text></title>| <year><text>1851</text></year>| </entry>|</bibliography>|")} }; static const QVector<QPair<const char *, File *> > keyFileTable = fileImporterExporterTestCases(); for (auto it = keyFileTable.constBegin(); it != keyFileTable.constEnd(); ++it) if (keyToXmlData.contains(it->first)) QTest::newRow(it->first) << it->second << keyToXmlData.value(it->first); } void KBibTeXIOTest::fileExporterXMLsave() { QFETCH(File *, bibTeXfile); QFETCH(QString, xmlData); FileExporterXML fileExporterXML(this); QStringList errorLog; const QString generatedData = fileExporterXML.toString(bibTeXfile, &errorLog).remove(QLatin1Char('\r')).replace(QLatin1Char('\n'), QLatin1Char('|')); for (const QString &logLine : const_cast<const QStringList &>(errorLog)) qDebug() << logLine; QCOMPARE(generatedData, xmlData); } void KBibTeXIOTest::fileExporterRISsave_data() { QTest::addColumn<File *>("bibTeXfile"); QTest::addColumn<QString>("risData"); static const QHash<const char *, QString> keyToRisData { {"Empty file", QString()}, {"Moby Dick", QStringLiteral("TY - JOUR|ID - the-whale-1851|AU - Melville, Herman|AU - Dick, Moby|TI - Call me Ishmael|PY - 1851///|ER - ||")} }; static const QVector<QPair<const char *, File *> > keyFileTable = fileImporterExporterTestCases(); for (auto it = keyFileTable.constBegin(); it != keyFileTable.constEnd(); ++it) if (keyToRisData.contains(it->first)) QTest::newRow(it->first) << it->second << keyToRisData.value(it->first); } void KBibTeXIOTest::fileExporterRISsave() { QFETCH(File *, bibTeXfile); QFETCH(QString, risData); FileExporterRIS fileExporterRIS(this); QStringList errorLog; const QString generatedData = fileExporterRIS.toString(bibTeXfile, &errorLog).remove(QLatin1Char('\r')).replace(QLatin1Char('\n'), QLatin1Char('|')); for (const QString &logLine : const_cast<const QStringList &>(errorLog)) qDebug() << logLine; QCOMPARE(generatedData, risData); } void KBibTeXIOTest::fileExporterBibTeXsave_data() { QTest::addColumn<File *>("bibTeXfile"); QTest::addColumn<QString>("bibTeXdata"); static const QHash<const char *, QString> keyToBibTeXData { {"Empty file", QString()}, {"Moby Dick", QStringLiteral("@article{the-whale-1851,|\tauthor = {Melville, Herman and Dick, Moby},|\ttitle = {{Call me Ishmael}},|\tyear = {1851}|}||")} }; static const QVector<QPair<const char *, File *> > keyFileTable = fileImporterExporterTestCases(); for (auto it = keyFileTable.constBegin(); it != keyFileTable.constEnd(); ++it) if (keyToBibTeXData.contains(it->first)) QTest::newRow(it->first) << it->second << keyToBibTeXData.value(it->first); } void KBibTeXIOTest::fileExporterBibTeXsave() { QFETCH(File *, bibTeXfile); QFETCH(QString, bibTeXdata); FileExporterBibTeX fileExporterBibTeX(this); QStringList errorLog; const QString generatedData = fileExporterBibTeX.toString(bibTeXfile, &errorLog).remove(QLatin1Char('\r')).replace(QLatin1Char('\n'), QLatin1Char('|')); for (const QString &logLine : const_cast<const QStringList &>(errorLog)) qDebug() << logLine; QCOMPARE(generatedData, bibTeXdata); } void KBibTeXIOTest::fileImporterRISload_data() { QTest::addColumn<QByteArray>("risData"); QTest::addColumn<File *>("bibTeXfile"); static const QHash<const char *, QString> keyToRisData { {"Empty file", QString()}, {"Moby Dick", QStringLiteral("TY - JOUR|ID - the-whale-1851|AU - Melville, Herman|AU - Dick, Moby|TI - Call me Ishmael|PY - 1851///|ER - ||")} }; static const QVector<QPair<const char *, File *> > keyFileTable = fileImporterExporterTestCases(); for (auto it = keyFileTable.constBegin(); it != keyFileTable.constEnd(); ++it) if (keyToRisData.contains(it->first)) QTest::newRow(it->first) << keyToRisData.value(it->first).toUtf8().replace('|', '\n') << it->second; } void KBibTeXIOTest::fileImporterRISload() { QFETCH(QByteArray, risData); QFETCH(File *, bibTeXfile); FileImporterRIS fileImporterRIS(this); fileImporterRIS.setProtectCasing(true); QBuffer buffer(&risData); buffer.open(QBuffer::ReadOnly); QScopedPointer<File> generatedFile(fileImporterRIS.load(&buffer)); QVERIFY(generatedFile->operator ==(*bibTeXfile)); } void KBibTeXIOTest::fileImporterBibTeXload_data() { QTest::addColumn<QByteArray>("bibTeXdata"); QTest::addColumn<File *>("bibTeXfile"); static const QHash<const char *, QString> keyToBibTeXData { {"Empty file", QString()}, {"Moby Dick", QStringLiteral("@article{the-whale-1851,|\tauthor = {Melville, Herman and Dick, Moby},|\ttitle = {{Call me Ishmael}},|\tyear = {1851}|}||")} }; static const QVector<QPair<const char *, File *> > keyFileTable = fileImporterExporterTestCases(); for (auto it = keyFileTable.constBegin(); it != keyFileTable.constEnd(); ++it) if (keyToBibTeXData.contains(it->first)) QTest::newRow(it->first) << keyToBibTeXData.value(it->first).toUtf8().replace('|', '\n') << it->second ; } void KBibTeXIOTest::fileImporterBibTeXload() { QFETCH(QByteArray, bibTeXdata); QFETCH(File *, bibTeXfile); FileImporterBibTeX fileImporterBibTeX(this); QBuffer buffer(&bibTeXdata); buffer.open(QBuffer::ReadOnly); QScopedPointer<File> generatedFile(fileImporterBibTeX.load(&buffer)); QVERIFY(generatedFile->operator ==(*bibTeXfile)); } void KBibTeXIOTest::protectiveCasingEntryGeneratedOnTheFly() { static const QString titleText = QStringLiteral("Some Title for a Journal Article"); static const QString singleCurleyBracketTitle = QStringLiteral("{") + titleText + QStringLiteral("}"); static const QString doubleCurleyBracketTitle = QStringLiteral("{{") + titleText + QStringLiteral("}}"); FileExporterBibTeX fileExporterBibTeX(this); /// Create a simple File object with a title field File file; file.setProperty(File::StringDelimiter, QStringLiteral("{}")); QSharedPointer<Entry> entry {new Entry(Entry::etArticle, QStringLiteral("SomeId"))}; Value titleValue = Value() << QSharedPointer<PlainText>(new PlainText(titleText)); entry->insert(Entry::ftTitle, titleValue); file.append(entry); file.setProperty(File::ProtectCasing, Qt::Checked); const QString textWithProtectiveCasing = fileExporterBibTeX.toString(&file); QVERIFY(textWithProtectiveCasing.contains(doubleCurleyBracketTitle)); file.setProperty(File::ProtectCasing, Qt::Unchecked); const QString textWithoutProtectiveCasing = fileExporterBibTeX.toString(&file); QVERIFY(textWithoutProtectiveCasing.contains(singleCurleyBracketTitle) && !textWithoutProtectiveCasing.contains(doubleCurleyBracketTitle)); } void KBibTeXIOTest::protectiveCasingEntryFromData() { static const QString titleText = QStringLiteral("Some Title for a Journal Article"); static const QString singleCurleyBracketTitle = QStringLiteral("{") + titleText + QStringLiteral("}"); static const QString doubleCurleyBracketTitle = QStringLiteral("{{") + titleText + QStringLiteral("}}"); static const QString bibTeXDataDoubleCurleyBracketTitle = QStringLiteral("@articl{doubleCurleyBracketTitle,\ntitle={{") + titleText + QStringLiteral("}}\n}\n"); static const QString bibTeXDataSingleCurleyBracketTitle = QStringLiteral("@articl{singleCurleyBracketTitle,\ntitle={") + titleText + QStringLiteral("}\n}\n"); FileImporterBibTeX fileImporterBibTeX(this); FileExporterBibTeX fileExporterBibTeX(this); QByteArray b1(bibTeXDataDoubleCurleyBracketTitle.toUtf8()); QBuffer bufferDoubleCurleyBracketTitle(&b1, this); QByteArray b2(bibTeXDataSingleCurleyBracketTitle.toUtf8()); QBuffer bufferSingleCurleyBracketTitle(&b2, this); bufferDoubleCurleyBracketTitle.open(QBuffer::ReadOnly); QScopedPointer<File> fileDoubleCurleyBracketTitle(fileImporterBibTeX.load(&bufferDoubleCurleyBracketTitle)); bufferDoubleCurleyBracketTitle.close(); fileDoubleCurleyBracketTitle->setProperty(File::StringDelimiter, QStringLiteral("{}")); bufferSingleCurleyBracketTitle.open(QBuffer::ReadOnly); QScopedPointer<File> fileSingleCurleyBracketTitle(fileImporterBibTeX.load(&bufferSingleCurleyBracketTitle)); bufferSingleCurleyBracketTitle.close(); fileSingleCurleyBracketTitle->setProperty(File::StringDelimiter, QStringLiteral("{}")); fileDoubleCurleyBracketTitle->setProperty(File::ProtectCasing, Qt::PartiallyChecked); const QString textDoubleCurleyBracketTitlePartialProtectiveCasing = fileExporterBibTeX.toString(fileDoubleCurleyBracketTitle.data()); QVERIFY(textDoubleCurleyBracketTitlePartialProtectiveCasing.contains(doubleCurleyBracketTitle)); fileSingleCurleyBracketTitle->setProperty(File::ProtectCasing, Qt::PartiallyChecked); const QString textSingleCurleyBracketTitlePartialProtectiveCasing = fileExporterBibTeX.toString(fileSingleCurleyBracketTitle.data()); QVERIFY(textSingleCurleyBracketTitlePartialProtectiveCasing.contains(singleCurleyBracketTitle) && !textSingleCurleyBracketTitlePartialProtectiveCasing.contains(doubleCurleyBracketTitle)); fileDoubleCurleyBracketTitle->setProperty(File::ProtectCasing, Qt::Checked); const QString textDoubleCurleyBracketTitleWithProtectiveCasing = fileExporterBibTeX.toString(fileDoubleCurleyBracketTitle.data()); QVERIFY(textDoubleCurleyBracketTitleWithProtectiveCasing.contains(doubleCurleyBracketTitle)); fileSingleCurleyBracketTitle->setProperty(File::ProtectCasing, Qt::Checked); const QString textSingleCurleyBracketTitleWithProtectiveCasing = fileExporterBibTeX.toString(fileSingleCurleyBracketTitle.data()); QVERIFY(textSingleCurleyBracketTitleWithProtectiveCasing.contains(doubleCurleyBracketTitle)); fileDoubleCurleyBracketTitle->setProperty(File::ProtectCasing, Qt::Unchecked); const QString textDoubleCurleyBracketTitleWithoutProtectiveCasing = fileExporterBibTeX.toString(fileDoubleCurleyBracketTitle.data()); QVERIFY(textDoubleCurleyBracketTitleWithoutProtectiveCasing.contains(singleCurleyBracketTitle) && !textDoubleCurleyBracketTitleWithoutProtectiveCasing.contains(doubleCurleyBracketTitle)); fileSingleCurleyBracketTitle->setProperty(File::ProtectCasing, Qt::Unchecked); const QString textSingleCurleyBracketTitleWithoutProtectiveCasing = fileExporterBibTeX.toString(fileSingleCurleyBracketTitle.data()); QVERIFY(textSingleCurleyBracketTitleWithoutProtectiveCasing.contains(singleCurleyBracketTitle) && !textSingleCurleyBracketTitleWithoutProtectiveCasing.contains(doubleCurleyBracketTitle)); } void KBibTeXIOTest::partialBibTeXInput_data() { QTest::addColumn<bool>("isValid"); QTest::addColumn<QString>("text"); static const struct BibTeXDataTable { const char *label; const bool isValid; const QString text; } bibTeXDataTable[] = { {"Empty string", false, QString()}, {"Only 'at' sign", false, QStringLiteral("@")}, {"Only 'at' sign followed by element type", false, QStringLiteral("@entry")}, {"Only up to opening curly bracket", false, QStringLiteral("@entry{")}, {"Complete entry but without id", true, QStringLiteral("@entry{,\n title=\"{Abc Def}\",\n month = jan\n}")}, {"Entry without any data", true, QStringLiteral("@entry{}")}, {"Entry up to entry id, but no closing curly bracket", false, QStringLiteral("@entry{test")}, {"Entry up to entry id with opening curly bracket", false, QStringLiteral("@entry{test{")}, {"Entry up to entry id with closing curly bracket", true, QStringLiteral("@entry{test}")}, {"Entry up to comma after entry id", false, QStringLiteral("@entry{test,")}, {"Entry up to comma after entry id, followed by closing curly bracket", true, QStringLiteral("@entry{test,}")}, {"Entry up to first field's key, but nothing more, not even an assign char", false, QStringLiteral("@entry{test,title")}, {"Entry up to first field's key, but nothing more, just a closing curly bracket", false, QStringLiteral("@entry{test,title}")}, {"Entry up to first field's assign char, but nothing more", false, QStringLiteral("@entry{test,title=")}, {"Entry up to first field's assign char, but nothing more, just a closing curly bracket", false, QStringLiteral("@entry{test,title=}")}, {"Invalid combination of curly bracket in a field's value (1)", false, QStringLiteral("@entry{test,title={}")}, {"Invalid combination of curly bracket in a field's value (2)", false, QStringLiteral("@entry{test,title={{}}")}, {"Invalid combination of curly bracket in a field's value (3)", false, QStringLiteral("@entry{test,title={}{}")}, {"Invalid combination of curly bracket in a field's value (4)", false, QStringLiteral("@entry{test,title={}{}}")}, {"Complete entry with empty title (1)", true, QStringLiteral("@entry{test,\n title=\"{}\"\n}")}, {"Complete entry with empty title (2)", true, QStringLiteral("@entry{test,\n title=\"\"\n}")}, {"Complete entry with empty title (3)", true, QStringLiteral("@entry{test,\n title={{}}\n}")}, {"Entry abruptly ending at macro key as field value (1)", false, QStringLiteral("@entry{test,\n month = jan")}, {"Entry abruptly ending at macro key as field value (2)", false, QStringLiteral("@entry{test,\n month = jan\n")}, // TODO more tests {"Complete entry", true, QStringLiteral("@entry{test,\n title=\"{Abc Def}\",\n month = jan\n}")} }; for (const auto &bibTeXDataRow : bibTeXDataTable) QTest::newRow(bibTeXDataRow.label) << bibTeXDataRow.isValid << bibTeXDataRow.text; } void KBibTeXIOTest::partialBibTeXInput() { QFETCH(bool, isValid); QFETCH(QString, text); bool gotErrors = false; FileImporterBibTeX importer(this); connect(&importer, &FileImporter::message, [&gotErrors](const FileImporter::MessageSeverity messageSeverity, const QString &messageText) { gotErrors |= messageSeverity >= FileImporter::SeverityError; Q_UNUSED(messageText); //qDebug()<<"FileImporterBibTeX issues message during 'partialBibTeXInput' test: "<<messageText; }); QScopedPointer<File> bibTeXfile(importer.fromString(text)); QVERIFY(text.isEmpty() || isValid != gotErrors); QVERIFY(isValid ? (!bibTeXfile.isNull() && bibTeXfile->count() == 1) : (bibTeXfile.isNull() || bibTeXfile->count() == 0)); } void KBibTeXIOTest::partialRISInput_data() { QTest::addColumn<bool>("isValid"); QTest::addColumn<QString>("text"); static const struct RISDataTable { const char *label; const bool isValid; const QString text; } risDataTable[] = { //{"Empty string", false, QString()}, {"Incorrect year", true, QStringLiteral("TY - JOUR\nAU - Shannon, Claude E.\nPY - 5555/07//\nTI - A Mathematical Theory of Communication\nT2 - Bell System Technical Journal\nSP - 379\nEP - 423\nVL - 27\nER -")}, {"Incorrect month", true, QStringLiteral("TY - JOUR\nAU - Shannon, Claude E.\nPY - 1948/17//\nTI - A Mathematical Theory of Communication\nT2 - Bell System Technical Journal\nSP - 379\nEP - 423\nVL - 27\nER -")}, {"Entry does not end with 'ER'", true, QStringLiteral("TY - JOUR\nAU - Shannon, Claude E.\nPY - 1948/07//\nTI - A Mathematical Theory of Communication\nT2 - Bell System Technical Journal\nSP - 379\nEP - 423\nVL - 27")}, // TODO more tests //{"Complete entry", true, QStringLiteral("TY - JOUR\nAU - Shannon, Claude E.\nPY - 1948/07//\nTI - A Mathematical Theory of Communication\nT2 - Bell System Technical Journal\nSP - 379\nEP - 423\nVL - 27\nER -")} }; for (const auto &risDataRow : risDataTable) QTest::newRow(risDataRow.label) << risDataRow.isValid << risDataRow.text; } void KBibTeXIOTest::partialRISInput() { QFETCH(bool, isValid); QFETCH(QString, text); bool gotErrors = false; FileImporterRIS importer(this); connect(&importer, &FileImporter::message, [&gotErrors](const FileImporter::MessageSeverity messageSeverity, const QString &messageText) { gotErrors |= messageSeverity >= FileImporter::SeverityError; Q_UNUSED(messageText); //qDebug()<<"FileImporterRIS issues message during 'partialBibTeXInput' test: "<<messageText; }); QScopedPointer<File> bibTeXfile(importer.fromString(text)); QVERIFY(text.isEmpty() || isValid != gotErrors); QVERIFY(isValid ? (!bibTeXfile.isNull() && bibTeXfile->count() == 1) : (bibTeXfile.isNull() || bibTeXfile->count() == 0)); } void KBibTeXIOTest::initTestCase() { QFile texFile(QStandardPaths::writableLocation(QStandardPaths::TempLocation) + QStringLiteral("/encoderlatex-tables.tex")); qDebug() << "Writing LaTeX tables to: " << texFile.fileName(); if (texFile.open(QFile::WriteOnly)) { EncoderLaTeX::writeLaTeXTables(texFile); texFile.close(); } qRegisterMetaType<FileImporter::MessageSeverity>(); } QTEST_MAIN(KBibTeXIOTest) #include "kbibtexiotest.moc"