diff --git a/src/fetch/allocinefetcher.cpp b/src/fetch/allocinefetcher.cpp index 4ddc3047..a8379de7 100644 --- a/src/fetch/allocinefetcher.cpp +++ b/src/fetch/allocinefetcher.cpp @@ -1,503 +1,507 @@ /*************************************************************************** Copyright (C) 2012-2013 Robby Stephenson ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or * * modify it under the terms of the GNU General Public License as * * published by the Free Software Foundation; either version 2 of * * the License or (at your option) version 3 or any later version * * accepted by the membership of KDE e.V. (or its successor approved * * by the membership of KDE e.V.), which shall act as a proxy * * defined in Section 14 of version 3 of the license. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see . * * * ***************************************************************************/ #include // for TELLICO_VERSION #include "allocinefetcher.h" #include "../collections/videocollection.h" #include "../images/imagefactory.h" #include "../entry.h" #include "../utils/guiproxy.h" #include "../utils/string_utils.h" #include "../core/filehandler.h" #include "../tellico_debug.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace { static const char* ALLOCINE_API_KEY = "100043982026"; static const char* ALLOCINE_API_URL = "http://api.allocine.fr/rest/v3/"; static const char* ALLOCINE_PARTNER_KEY = "29d185d98c984a359e6e6f26a0474269"; } using namespace Tellico; using Tellico::Fetch::AbstractAllocineFetcher; using Tellico::Fetch::AllocineFetcher; AbstractAllocineFetcher::AbstractAllocineFetcher(QObject* parent_, const QString& baseUrl_) : Fetcher(parent_) , m_started(false) , m_apiKey(QLatin1String(ALLOCINE_API_KEY)) , m_baseUrl(baseUrl_) , m_numCast(10) { Q_ASSERT(!m_baseUrl.isEmpty()); } AbstractAllocineFetcher::~AbstractAllocineFetcher() { } bool AbstractAllocineFetcher::canSearch(FetchKey k) const { return k == Keyword; } bool AbstractAllocineFetcher::canFetch(int type) const { return type == Data::Collection::Video; } void AbstractAllocineFetcher::readConfigHook(const KConfigGroup& config_) { QString k = config_.readEntry("API Key", ALLOCINE_API_KEY); if(!k.isEmpty()) { m_apiKey = k; } m_numCast = config_.readEntry("Max Cast", 10); } void AbstractAllocineFetcher::search() { m_started = true; QUrl u(m_baseUrl); u = u.adjusted(QUrl::StripTrailingSlash); u.setPath(u.path() + QLatin1Char('/') + QStringLiteral("search")); // myDebug() << u; // the order of the parameters appears to matter QList > params; params.append(qMakePair(QStringLiteral("partner"), m_apiKey)); // I can't figure out how to encode accent marks, but they don't // seem to be necessary QString q = removeAccents(request().value); // should I just remove all non alphabetical characters? // see https://bugs.kde.org/show_bug.cgi?id=337432 q.remove(QRegExp(QStringLiteral("[,:!?;\\(\\)]"))); q.replace(QLatin1Char('\''), QLatin1Char('+')); q.replace(QLatin1Char(' '), QLatin1Char('+')); switch(request().key) { case Keyword: params.append(qMakePair(QStringLiteral("q"), q)); break; default: myWarning() << "key not recognized: " << request().key; return; } params.append(qMakePair(QStringLiteral("format"), QStringLiteral("json"))); params.append(qMakePair(QStringLiteral("filter"), QStringLiteral("movie"))); const QString sed = QDateTime::currentDateTimeUtc().toString(QStringLiteral("yyyyMMdd")); params.append(qMakePair(QStringLiteral("sed"), sed)); const QByteArray sig = calculateSignature(params); QUrlQuery query; query.setQueryItems(params); query.addQueryItem(QStringLiteral("sig"), QLatin1String(sig)); u.setQuery(query); // myDebug() << u; m_job = KIO::storedGet(u, KIO::NoReload, KIO::HideProgressInfo); // 10/8/17: UserAgent appears necessary to receive data m_job->addMetaData(QStringLiteral("UserAgent"), QStringLiteral("Tellico/%1") .arg(QStringLiteral(TELLICO_VERSION))); KJobWidgets::setWindow(m_job, GUI::Proxy::widget()); connect(m_job.data(), &KJob::result, this, &AbstractAllocineFetcher::slotComplete); } void AbstractAllocineFetcher::stop() { if(!m_started) { return; } if(m_job) { m_job->kill(); } m_started = false; emit signalDone(this); } Tellico::Data::EntryPtr AbstractAllocineFetcher::fetchEntryHook(uint uid_) { Data::EntryPtr entry = m_entries.value(uid_); if(!entry) { myWarning() << "no entry in dict"; return Data::EntryPtr(); } QString code = entry->field(QStringLiteral("allocine-code")); if(code.isEmpty()) { // could mean we already updated the entry myDebug() << "no allocine release found"; return entry; } QUrl u(m_baseUrl); u = u.adjusted(QUrl::StripTrailingSlash); u.setPath(u.path() + QLatin1Char('/') + QStringLiteral("movie")); // the order of the parameters appears to matter QList > params; params.append(qMakePair(QStringLiteral("partner"), m_apiKey)); params.append(qMakePair(QStringLiteral("code"), code)); params.append(qMakePair(QStringLiteral("profile"), QStringLiteral("large"))); params.append(qMakePair(QStringLiteral("filter"), QStringLiteral("movie"))); params.append(qMakePair(QStringLiteral("format"), QStringLiteral("json"))); const QString sed = QDateTime::currentDateTimeUtc().toString(QStringLiteral("yyyyMMdd")); params.append(qMakePair(QStringLiteral("sed"), sed)); const QByteArray sig = calculateSignature(params); QUrlQuery query; query.setQueryItems(params); query.addQueryItem(QStringLiteral("sig"), QLatin1String(sig)); u.setQuery(query); // myDebug() << "url: " << u; // 10/8/17: UserAgent appears necessary to receive data // QByteArray data = FileHandler::readDataFile(u, true); KIO::StoredTransferJob* dataJob = KIO::storedGet(u, KIO::NoReload, KIO::HideProgressInfo); dataJob->addMetaData(QStringLiteral("UserAgent"), QStringLiteral("Tellico/%1") .arg(QStringLiteral(TELLICO_VERSION))); if(!dataJob->exec()) { myDebug() << "Failed to load" << u; return entry; } const QByteArray data = dataJob->data(); #if 0 myWarning() << "Remove debug2 from allocinefetcher.cpp"; QFile f(QString::fromLatin1("/tmp/test2.json")); if(f.open(QIODevice::WriteOnly)) { QTextStream t(&f); t.setCodec("UTF-8"); t << data; } f.close(); #endif QJsonParseError error; QJsonDocument doc = QJsonDocument::fromJson(data, &error); QVariantMap result = doc.object().toVariantMap().value(QStringLiteral("movie")).toMap(); if(error.error != QJsonParseError::NoError) { myDebug() << "Bad JSON results"; #if 0 myWarning() << "Remove debug3 from allocinefetcher.cpp"; QFile f2(QString::fromLatin1("/tmp/test3.json")); if(f2.open(QIODevice::WriteOnly)) { QTextStream t(&f2); t.setCodec("UTF-8"); t << data; } f2.close(); #endif return entry; } populateEntry(entry, result); // image might still be a URL const QString image_id = entry->field(QStringLiteral("cover")); if(image_id.contains(QLatin1Char('/'))) { const QString id = ImageFactory::addImage(QUrl::fromUserInput(image_id), true /* quiet */); if(id.isEmpty()) { message(i18n("The cover image could not be loaded."), MessageHandler::Warning); } // empty image ID is ok entry->setField(QStringLiteral("cover"), id); } // don't want to include id entry->collection()->removeField(QStringLiteral("allocine-code")); QStringList castRows = FieldFormat::splitTable(entry->field(QStringLiteral("cast"))); while(castRows.count() > m_numCast) { castRows.removeLast(); } entry->setField(QStringLiteral("cast"), castRows.join(FieldFormat::rowDelimiterString())); return entry; } void AbstractAllocineFetcher::slotComplete(KJob*) { if(m_job->error()) { myDebug() << "Error:" << m_job->errorString(); m_job->uiDelegate()->showErrorMessage(); stop(); return; } QByteArray data = m_job->data(); if(data.isEmpty()) { myDebug() << "no data"; stop(); return; } // see bug 319662. If fetcher is cancelled, job is killed // if the pointer is retained, it gets double-deleted m_job = nullptr; #if 0 myWarning() << "Remove debug from allocinefetcher.cpp"; QFile f(QString::fromLatin1("/tmp/test.json")); if(f.open(QIODevice::WriteOnly)) { QTextStream t(&f); t.setCodec("UTF-8"); t << data; } f.close(); #endif QJsonDocument doc = QJsonDocument::fromJson(data); QVariantMap result = doc.object().toVariantMap().value(QStringLiteral("feed")).toMap(); // myDebug() << "total:" << result.value(QLatin1String("totalResults")); QVariantList resultList = result.value(QStringLiteral("movie")).toList(); if(resultList.isEmpty()) { myDebug() << "no results"; stop(); return; } foreach(const QVariant& result, resultList) { // myDebug() << "found result:" << result; //create a new collection for every result since we end up removing the allocine code field // when fetchEntryHook is called. See bug 338389 Data::EntryPtr entry(new Data::Entry(createCollection())); populateEntry(entry, result.toMap()); FetchResult* r = new FetchResult(Fetcher::Ptr(this), entry); m_entries.insert(r->uid, entry); emit signalResultFound(r); } m_hasMoreResults = false; stop(); } Tellico::Data::CollPtr AbstractAllocineFetcher::createCollection() const { Data::CollPtr coll(new Data::VideoCollection(true)); // always add the allocine release code for fetchEntryHook Data::FieldPtr field(new Data::Field(QStringLiteral("allocine-code"), QStringLiteral("Allocine Code"), Data::Field::Number)); field->setCategory(i18n("General")); coll->addField(field); // add new fields if(optionalFields().contains(QStringLiteral("allocine"))) { Data::FieldPtr field(new Data::Field(QStringLiteral("allocine"), i18n("Allocine Link"), Data::Field::URL)); field->setCategory(i18n("General")); coll->addField(field); } if(optionalFields().contains(QStringLiteral("origtitle"))) { Data::FieldPtr f(new Data::Field(QStringLiteral("origtitle"), i18n("Original Title"))); f->setFormatType(FieldFormat::FormatTitle); coll->addField(f); } return coll; } void AbstractAllocineFetcher::populateEntry(Data::EntryPtr entry, const QVariantMap& resultMap) { if(entry->collection()->hasField(QStringLiteral("allocine-code"))) { entry->setField(QStringLiteral("allocine-code"), mapValue(resultMap, "code")); } entry->setField(QStringLiteral("title"), mapValue(resultMap, "title")); if(optionalFields().contains(QStringLiteral("origtitle"))) { entry->setField(QStringLiteral("origtitle"), mapValue(resultMap, "originalTitle")); } if(entry->title().isEmpty()) { entry->setField(QStringLiteral("title"), mapValue(resultMap, "originalTitle")); } entry->setField(QStringLiteral("year"), mapValue(resultMap, "productionYear")); entry->setField(QStringLiteral("plot"), mapValue(resultMap, "synopsis")); const int runTime = mapValue(resultMap, "runtime").toInt(); entry->setField(QStringLiteral("running-time"), QString::number(runTime/60)); const QVariantList castList = resultMap.value(QStringLiteral("castMember")).toList(); QStringList actors, directors, producers, composers; foreach(const QVariant& castVariant, castList) { const QVariantMap castMap = castVariant.toMap(); const int code = mapValue(castMap, "activity", "code").toInt(); switch(code) { case 8001: actors << (mapValue(castMap, "person", "name") + FieldFormat::columnDelimiterString() + mapValue(castMap, "role")); break; case 8002: directors << mapValue(castMap, "person", "name"); break; case 8029: producers << mapValue(castMap, "person", "name"); break; case 8003: composers << mapValue(castMap, "person", "name"); break; } } entry->setField(QStringLiteral("cast"), actors.join(FieldFormat::rowDelimiterString())); entry->setField(QStringLiteral("director"), directors.join(FieldFormat::delimiterString())); entry->setField(QStringLiteral("producer"), producers.join(FieldFormat::delimiterString())); entry->setField(QStringLiteral("composer"), composers.join(FieldFormat::delimiterString())); const QVariantMap releaseMap = resultMap.value(QStringLiteral("release")).toMap(); entry->setField(QStringLiteral("studio"), mapValue(releaseMap, "distributor", "name")); QStringList genres; foreach(const QVariant& variant, resultMap.value(QLatin1String("genre")).toList()) { genres << i18n(mapValue(variant.toMap(), "$").toUtf8().constData()); } entry->setField(QStringLiteral("genre"), genres.join(FieldFormat::delimiterString())); QStringList nats; foreach(const QVariant& variant, resultMap.value(QLatin1String("nationality")).toList()) { nats << mapValue(variant.toMap(), "$"); } entry->setField(QStringLiteral("nationality"), nats.join(FieldFormat::delimiterString())); QStringList langs; foreach(const QVariant& variant, resultMap.value(QLatin1String("language")).toList()) { langs << mapValue(variant.toMap(), "$"); } entry->setField(QStringLiteral("language"), langs.join(FieldFormat::delimiterString())); const QVariantMap colorMap = resultMap.value(QLatin1String("color")).toMap(); if(colorMap.value(QStringLiteral("code")) == QLatin1String("12001")) { entry->setField(QStringLiteral("color"), i18n("Color")); } entry->setField(QStringLiteral("cover"), mapValue(resultMap, "poster", "href")); if(optionalFields().contains(QStringLiteral("allocine"))) { entry->setField(QStringLiteral("allocine"), mapValue(resultMap, "link", "href")); } } Tellico::Fetch::FetchRequest AbstractAllocineFetcher::updateRequest(Data::EntryPtr entry_) { QString title = entry_->field(QStringLiteral("title")); if(!title.isEmpty()) { return FetchRequest(Keyword, title); } return FetchRequest(); } AbstractAllocineFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const AbstractAllocineFetcher* fetcher_) : Fetch::ConfigWidget(parent_) { QGridLayout* l = new QGridLayout(optionsWidget()); l->setSpacing(4); l->setColumnStretch(1, 10); int row = -1; QLabel* label = new QLabel(i18n("&Maximum cast: "), optionsWidget()); l->addWidget(label, ++row, 0); m_numCast = new QSpinBox(optionsWidget()); m_numCast->setMaximum(99); m_numCast->setMinimum(0); m_numCast->setValue(10); - void (QSpinBox::* valueChanged)(const QString&) = &QSpinBox::valueChanged; - connect(m_numCast, valueChanged, this, &ConfigWidget::slotSetModified); +#if (QT_VERSION < QT_VERSION_CHECK(5, 14, 0)) + void (QSpinBox::* textChanged)(const QString&) = &QSpinBox::valueChanged; +#else + void (QSpinBox::* textChanged)(const QString&) = &QSpinBox::textChanged; +#endif + connect(m_numCast, textChanged, this, &ConfigWidget::slotSetModified); l->addWidget(m_numCast, row, 1); QString w = i18n("The list of cast members may include many people. Set the maximum number returned from the search."); label->setWhatsThis(w); m_numCast->setWhatsThis(w); label->setBuddy(m_numCast); l->setRowStretch(++row, 10); m_numCast->setValue(fetcher_ ? fetcher_->m_numCast : 10); } void AbstractAllocineFetcher::ConfigWidget::saveConfigHook(KConfigGroup& config_) { config_.writeEntry("Max Cast", m_numCast->value()); } QByteArray AbstractAllocineFetcher::calculateSignature(const QList >& params_) { typedef QPair StringPair; QByteArray queryString; foreach(const StringPair& pair, params_) { queryString.append(pair.first.toUtf8().toPercentEncoding("+")); queryString.append('='); queryString.append(pair.second.toUtf8().toPercentEncoding("+")); queryString.append('&'); } // remove final '&' queryString.chop(1); const QByteArray toSign = ALLOCINE_PARTNER_KEY + queryString; const QByteArray hash = QCryptographicHash::hash(toSign, QCryptographicHash::Sha1); QByteArray sig = hash.toBase64(); return sig; } /**********************************************************************************************/ AllocineFetcher::AllocineFetcher(QObject* parent_) : AbstractAllocineFetcher(parent_, QLatin1String(ALLOCINE_API_URL)) { } QString AllocineFetcher::source() const { return m_name.isEmpty() ? defaultName() : m_name; } Tellico::Fetch::ConfigWidget* AllocineFetcher::configWidget(QWidget* parent_) const { return new AllocineFetcher::ConfigWidget(parent_, this); } QString AllocineFetcher::defaultName() { return QStringLiteral("AlloCiné.fr"); } QString AllocineFetcher::defaultIcon() { return favIcon("http://www.allocine.fr"); } Tellico::StringHash AllocineFetcher::allOptionalFields() { StringHash hash; hash[QStringLiteral("origtitle")] = i18n("Original Title"); hash[QStringLiteral("allocine")] = i18n("Allocine Link"); return hash; } AllocineFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const AbstractAllocineFetcher* fetcher_) : AbstractAllocineFetcher::ConfigWidget(parent_, fetcher_) { // now add additional fields widget addFieldsWidget(AllocineFetcher::allOptionalFields(), fetcher_ ? fetcher_->optionalFields() : QStringList()); } QString AllocineFetcher::ConfigWidget::preferredName() const { return AllocineFetcher::defaultName(); } diff --git a/src/fetch/amazonfetcher.cpp b/src/fetch/amazonfetcher.cpp index 4037c425..8e15694e 100644 --- a/src/fetch/amazonfetcher.cpp +++ b/src/fetch/amazonfetcher.cpp @@ -1,1205 +1,1208 @@ /*************************************************************************** Copyright (C) 2004-2020 Robby Stephenson ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or * * modify it under the terms of the GNU General Public License as * * published by the Free Software Foundation; either version 2 of * * the License or (at your option) version 3 or any later version * * accepted by the membership of KDE e.V. (or its successor approved * * by the membership of KDE e.V.), which shall act as a proxy * * defined in Section 14 of version 3 of the license. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see . * * * ***************************************************************************/ #include #include "amazonfetcher.h" #include "amazonrequest.h" #include "../collectionfactory.h" #include "../images/imagefactory.h" #include "../utils/guiproxy.h" #include "../collection.h" #include "../entry.h" #include "../field.h" #include "../fieldformat.h" #include "../utils/string_utils.h" #include "../utils/isbnvalidator.h" #include "../gui/combobox.h" #include "../tellico_debug.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace { static const int AMAZON_RETURNS_PER_REQUEST = 10; static const int AMAZON_MAX_RETURNS_TOTAL = 20; static const char* AMAZON_ASSOC_TOKEN = "tellico-20"; } using namespace Tellico; using Tellico::Fetch::AmazonFetcher; // static // see https://webservices.amazon.com/paapi5/documentation/common-request-parameters.html#host-and-region const AmazonFetcher::SiteData& AmazonFetcher::siteData(int site_) { Q_ASSERT(site_ >= 0); Q_ASSERT(site_ < XX); static SiteData dataVector[16] = { { i18n("Amazon (US)"), "www.amazon.com", "us-east-1", QLatin1String("us"), i18n("United States") }, { i18n("Amazon (UK)"), "www.amazon.co.uk", "eu-west-1", QLatin1String("gb"), i18n("United Kingdom") }, { i18n("Amazon (Germany)"), "www.amazon.de", "eu-west-1", QLatin1String("de"), i18n("Germany") }, { i18n("Amazon (Japan)"), "www.amazon.co.jp", "us-west-2", QLatin1String("jp"), i18n("Japan") }, { i18n("Amazon (France)"), "www.amazon.fr", "eu-west-1", QLatin1String("fr"), i18n("France") }, { i18n("Amazon (Canada)"), "www.amazon.ca", "us-east-1", QLatin1String("ca"), i18n("Canada") }, { // TODO: no chinese in PAAPI-5 yet? i18n("Amazon (China)"), "www.amazon.cn", "us-west-2", QLatin1String("ch"), i18n("China") }, { i18n("Amazon (Spain)"), "www.amazon.es", "eu-west-1", QLatin1String("es"), i18n("Spain") }, { i18n("Amazon (Italy)"), "www.amazon.it", "eu-west-1", QLatin1String("it"), i18n("Italy") }, { i18n("Amazon (Brazil)"), "www.amazon.com.br", "us-east-1", QLatin1String("br"), i18n("Brazil") }, { i18n("Amazon (Australia)"), "www.amazon.com.au", "us-west-2", QLatin1String("au"), i18n("Australia") }, { i18n("Amazon (India)"), "www.amazon.in", "eu-west-1", QLatin1String("in"), i18n("India") }, { i18n("Amazon (Mexico)"), "www.amazon.com.mx", "us-east-1", QLatin1String("mx"), i18n("Mexico") }, { i18n("Amazon (Turkey)"), "www.amazon.com.tr", "eu-west-1", QLatin1String("tr"), i18n("Turkey") }, { i18n("Amazon (Singapore)"), "www.amazon.sg", "us-west-2", QLatin1String("sg"), i18n("Singapore") }, { i18n("Amazon (UAE)"), "www.amazon.ae", "eu-west-1", QLatin1String("ae"), i18n("United Arab Emirates") } }; return dataVector[qBound(0, site_, static_cast(sizeof(dataVector)/sizeof(SiteData)))]; } AmazonFetcher::AmazonFetcher(QObject* parent_) : Fetcher(parent_), m_site(Unknown), m_imageSize(MediumImage), m_assoc(QLatin1String(AMAZON_ASSOC_TOKEN)), m_limit(AMAZON_MAX_RETURNS_TOTAL), m_countOffset(0), m_page(1), m_total(-1), m_numResults(0), m_job(nullptr), m_started(false) { // to facilitate transition to Amazon PAAPI5, allow users to enable logging the Amazon // results so they can be shared for debugging const QByteArray enableLog = qgetenv("TELLICO_ENABLE_AMAZON_LOG").trimmed().toLower(); m_enableLog = (enableLog == "true" || enableLog == "1"); } AmazonFetcher::~AmazonFetcher() { } QString AmazonFetcher::source() const { return m_name.isEmpty() ? defaultName() : m_name; } QString AmazonFetcher::attribution() const { return i18n("This data is licensed under specific terms.", QLatin1String("https://affiliate-program.amazon.com/gp/advertising/api/detail/agreement.html")); } bool AmazonFetcher::canFetch(int type) const { return type == Data::Collection::Book || type == Data::Collection::ComicBook || type == Data::Collection::Bibtex || type == Data::Collection::Album || type == Data::Collection::Video || type == Data::Collection::Game || type == Data::Collection::BoardGame; } bool AmazonFetcher::canSearch(FetchKey k) const { // no UPC in Canada return k == Title || k == Person || k == ISBN || k == UPC || k == Keyword; } void AmazonFetcher::readConfigHook(const KConfigGroup& config_) { const int site = config_.readEntry("Site", int(Unknown)); Q_ASSERT(site != Unknown); m_site = static_cast(site); if(m_name.isEmpty()) { m_name = siteData(m_site).title; } QString s = config_.readEntry("AccessKey"); if(!s.isEmpty()) { m_accessKey = s; } else { myWarning() << "No Amazon access key"; } s = config_.readEntry("AssocToken"); if(!s.isEmpty()) { m_assoc = s; } s = config_.readEntry("SecretKey"); if(!s.isEmpty()) { m_secretKey = s; } else { myWarning() << "No Amazon secret key"; } int imageSize = config_.readEntry("Image Size", -1); if(imageSize > -1) { m_imageSize = static_cast(imageSize); } } void AmazonFetcher::search() { m_started = true; m_page = 1; m_total = -1; m_countOffset = 0; m_numResults = 0; doSearch(); } void AmazonFetcher::continueSearch() { m_started = true; m_limit += AMAZON_MAX_RETURNS_TOTAL; doSearch(); } void AmazonFetcher::doSearch() { if(m_secretKey.isEmpty() || m_accessKey.isEmpty()) { // this message is split in two since the first half is reused later message(i18n("Access to data from Amazon.com requires an AWS Access Key ID and a Secret Key.") + QLatin1Char(' ') + i18n("Those values must be entered in the data source settings."), MessageHandler::Error); stop(); return; } const QByteArray payload = requestPayload(request()); if(payload.isEmpty()) { stop(); return; } AmazonRequest request(m_accessKey, m_secretKey); request.setHost(siteData(m_site).host); request.setRegion(siteData(m_site).region); // debugging check if(m_testResultsFile.isEmpty()) { QUrl u; u.setHost(QString::fromUtf8(siteData(m_site).host)); m_job = KIO::storedHttpPost(payload, u, KIO::HideProgressInfo); QMapIterator i(request.headers(payload)); while(i.hasNext()) { i.next(); m_job->addMetaData(QStringLiteral("customHTTPHeader"), QString::fromUtf8(i.key() + ": " + i.value())); } } else { myDebug() << "Reading" << m_testResultsFile; m_job = KIO::storedGet(QUrl::fromLocalFile(m_testResultsFile), KIO::NoReload, KIO::HideProgressInfo); } KJobWidgets::setWindow(m_job, GUI::Proxy::widget()); connect(m_job.data(), &KJob::result, this, &AmazonFetcher::slotComplete); } void AmazonFetcher::stop() { if(!m_started) { return; } // myDebug(); if(m_job) { m_job->kill(); m_job = nullptr; } m_started = false; emit signalDone(this); } void AmazonFetcher::slotComplete(KJob*) { if(m_job->error()) { m_job->uiDelegate()->showErrorMessage(); stop(); return; } const QByteArray data = m_job->data(); if(data.isEmpty()) { myDebug() << "no data"; stop(); return; } // since the fetch is done, don't worry about holding the job pointer m_job = nullptr; if(m_enableLog) { QTemporaryFile logFile(QDir::tempPath() + QStringLiteral("/amazon-search-items-XXXXXX.json")); logFile.setAutoRemove(false); if(logFile.open()) { QTextStream t(&logFile); t.setCodec("UTF-8"); t << data; myLog() << "Writing Amazon data output to" << logFile.fileName(); } } #if 0 myWarning() << "Remove debug from amazonfetcher.cpp"; QFile f(QString::fromLatin1("/tmp/test%1.json").arg(m_page)); if(f.open(QIODevice::WriteOnly)) { QTextStream t(&f); t.setCodec("UTF-8"); t << data; } f.close(); #endif QJsonParseError jsonError; QJsonObject databject = QJsonDocument::fromJson(data, &jsonError).object(); if(jsonError.error != QJsonParseError::NoError) { myDebug() << "AmazonFetcher: JSON error -" << jsonError.errorString(); message(jsonError.errorString(), MessageHandler::Error); stop(); return; } QJsonObject resultObject = databject.value(QStringLiteral("SearchResult")).toObject(); if(resultObject.isEmpty()) { resultObject = databject.value(QStringLiteral("ItemsResult")).toObject(); } if(m_total == -1) { int totalResults = resultObject.value(QStringLiteral("TotalResultCount")).toInt(); if(totalResults > 0) { m_total = totalResults; // myDebug() << "Total results is" << totalResults; } } QStringList errors; QJsonValue errorValue = databject.value(QLatin1String("Errors")); if(!errorValue.isNull()) { foreach(const QJsonValue& error, errorValue.toArray()) { errors += error.toObject().value(QLatin1String("Message")).toString(); } } if(!errors.isEmpty()) { for(QStringList::ConstIterator it = errors.constBegin(); it != errors.constEnd(); ++it) { myDebug() << "AmazonFetcher::" << *it; } message(errors[0], MessageHandler::Error); stop(); return; } Data::CollPtr coll = createCollection(); if(!coll) { myDebug() << "no collection pointer"; stop(); return; } int count = -1; foreach(const QJsonValue& item, resultObject.value(QLatin1String("Items")).toArray()) { ++count; if(m_numResults >= m_limit) { break; } if(!m_started) { // might get aborted break; } Data::EntryPtr entry(new Data::Entry(coll)); populateEntry(entry, item.toObject()); // special case book author // amazon is really bad about not putting spaces after periods if(coll->type() == Data::Collection::Book) { QRegExp rx(QLatin1String("\\.([^\\s])")); QStringList values = FieldFormat::splitValue(entry->field(QStringLiteral("author"))); for(QStringList::Iterator it = values.begin(); it != values.end(); ++it) { (*it).replace(rx, QStringLiteral(". \\1")); } entry->setField(QStringLiteral("author"), values.join(FieldFormat::delimiterString())); } // UK puts the year in the title for some reason if(m_site == UK && coll->type() == Data::Collection::Video) { QRegExp rx(QLatin1String("\\[(\\d{4})\\]")); QString t = entry->title(); if(rx.indexIn(t) > -1) { QString y = rx.cap(1); t = t.remove(rx).simplified(); entry->setField(QStringLiteral("title"), t); if(entry->field(QStringLiteral("year")).isEmpty()) { entry->setField(QStringLiteral("year"), y); } } } // myDebug() << entry->title(); FetchResult* r = new FetchResult(Fetcher::Ptr(this), entry); m_entries.insert(r->uid, entry); emit signalResultFound(r); ++m_numResults; } // we might have gotten aborted if(!m_started) { return; } // are there any additional results to get? m_hasMoreResults = m_testResultsFile.isEmpty() && (m_page * AMAZON_RETURNS_PER_REQUEST < m_total); const int currentTotal = qMin(m_total, m_limit); if(m_testResultsFile.isEmpty() && (m_page * AMAZON_RETURNS_PER_REQUEST < currentTotal)) { int foundCount = (m_page-1) * AMAZON_RETURNS_PER_REQUEST + coll->entryCount(); message(i18n("Results from %1: %2/%3", source(), foundCount, m_total), MessageHandler::Status); ++m_page; m_countOffset = 0; doSearch(); } else if(request().value.count(QLatin1Char(';')) > 9) { // start new request after cutting off first 10 isbn values FetchRequest newRequest = request(); newRequest.value = request().value.section(QLatin1Char(';'), 10); startSearch(newRequest); } else { m_countOffset = m_entries.count() % AMAZON_RETURNS_PER_REQUEST; if(m_countOffset == 0) { ++m_page; // need to go to next page } stop(); } } Tellico::Data::EntryPtr AmazonFetcher::fetchEntryHook(uint uid_) { Data::EntryPtr entry = m_entries[uid_]; if(!entry) { myWarning() << "no entry in dict"; return entry; } // do what we can to remove useless keywords const int type = collectionType(); switch(type) { case Data::Collection::Book: case Data::Collection::ComicBook: case Data::Collection::Bibtex: if(optionalFields().contains(QStringLiteral("keyword"))) { - StringSet newWords; + QStringList newWords; const QStringList keywords = FieldFormat::splitValue(entry->field(QStringLiteral("keyword"))); foreach(const QString& keyword, keywords) { if(keyword == QLatin1String("General") || keyword == QLatin1String("Subjects") || keyword == QLatin1String("Par prix") || // french stuff keyword == QLatin1String("Divers") || // french stuff keyword.startsWith(QLatin1Char('(')) || keyword.startsWith(QLatin1String("Authors"))) { continue; } - newWords.add(keyword); + newWords += keyword; } - entry->setField(QStringLiteral("keyword"), newWords.values().join(FieldFormat::delimiterString())); + newWords.removeDuplicates(); + entry->setField(QStringLiteral("keyword"), newWords.join(FieldFormat::delimiterString())); } entry->setField(QStringLiteral("comments"), Tellico::decodeHTML(entry->field(QStringLiteral("comments")))); break; case Data::Collection::Video: { const QString genres = QStringLiteral("genre"); QStringList oldWords = FieldFormat::splitValue(entry->field(genres)); - StringSet words; + QStringList newWords; // only care about genres that have "Genres" in the amazon response // and take the first word after that for(QStringList::Iterator it = oldWords.begin(); it != oldWords.end(); ++it) { if((*it).indexOf(QLatin1String("Genres")) == -1) { continue; } // the amazon2tellico stylesheet separates words with '/' QStringList nodes = (*it).split(QLatin1Char('/')); for(QStringList::Iterator it2 = nodes.begin(); it2 != nodes.end(); ++it2) { if(*it2 != QLatin1String("Genres")) { continue; } ++it2; if(it2 != nodes.end() && *it2 != QLatin1String("General")) { - words.add(*it2); + newWords += *it2; } break; // we're done } } - entry->setField(genres, words.values().join(FieldFormat::delimiterString())); + newWords.removeDuplicates(); + entry->setField(genres, newWords.join(FieldFormat::delimiterString())); // language tracks get duplicated, too - words.clear(); - words.add(FieldFormat::splitValue(entry->field(QStringLiteral("language")))); - entry->setField(QStringLiteral("language"), words.values().join(FieldFormat::delimiterString())); + newWords = FieldFormat::splitValue(entry->field(QStringLiteral("language"))); + newWords.removeDuplicates(); + entry->setField(QStringLiteral("language"), newWords.join(FieldFormat::delimiterString())); } entry->setField(QStringLiteral("plot"), Tellico::decodeHTML(entry->field(QStringLiteral("plot")))); break; case Data::Collection::Album: { const QString genres = QStringLiteral("genre"); QStringList oldWords = FieldFormat::splitValue(entry->field(genres)); - StringSet words; + QStringList newWords; // only care about genres that have "Styles" in the amazon response // and take the first word after that for(QStringList::Iterator it = oldWords.begin(); it != oldWords.end(); ++it) { if((*it).indexOf(QLatin1String("Styles")) == -1) { continue; } // the amazon2tellico stylesheet separates words with '/' QStringList nodes = (*it).split(QLatin1Char('/')); bool isStyle = false; for(QStringList::Iterator it2 = nodes.begin(); it2 != nodes.end(); ++it2) { if(!isStyle) { if(*it2 == QLatin1String("Styles")) { isStyle = true; } continue; } if(*it2 != QLatin1String("General")) { - words.add(*it2); + newWords += *it2; } } } - entry->setField(genres, words.values().join(FieldFormat::delimiterString())); + newWords.removeDuplicates(); + entry->setField(genres, newWords.join(FieldFormat::delimiterString())); } entry->setField(QStringLiteral("comments"), Tellico::decodeHTML(entry->field(QStringLiteral("comments")))); break; case Data::Collection::Game: entry->setField(QStringLiteral("description"), Tellico::decodeHTML(entry->field(QStringLiteral("description")))); break; } // clean up the title parseTitle(entry); // also sometimes table fields have rows but no values Data::FieldList fields = entry->collection()->fields(); QRegExp blank(QLatin1String("[\\s") + FieldFormat::columnDelimiterString() + FieldFormat::delimiterString() + QLatin1String("]+")); // only white space, column separators and value separators foreach(Data::FieldPtr fIt, fields) { if(fIt->type() != Data::Field::Table) { continue; } if(blank.exactMatch(entry->field(fIt))) { entry->setField(fIt, QString()); } } // don't want to show image urls in the fetch dialog // so clear them after reading the URL QString imageURL; switch(m_imageSize) { case SmallImage: imageURL = entry->field(QStringLiteral("small-image")); entry->setField(QStringLiteral("small-image"), QString()); break; case MediumImage: imageURL = entry->field(QStringLiteral("medium-image")); entry->setField(QStringLiteral("medium-image"), QString()); break; case LargeImage: imageURL = entry->field(QStringLiteral("large-image")); entry->setField(QStringLiteral("large-image"), QString()); break; case NoImage: default: break; } if(!imageURL.isEmpty()) { // myDebug() << "grabbing " << imageURL; QString id = ImageFactory::addImage(QUrl::fromUserInput(imageURL), true); if(id.isEmpty()) { message(i18n("The cover image could not be loaded."), MessageHandler::Warning); } else { // amazon serves up 1x1 gifs occasionally, but that's caught in the image constructor // all relevant collection types have cover fields entry->setField(QStringLiteral("cover"), id); } } return entry; } Tellico::Fetch::FetchRequest AmazonFetcher::updateRequest(Data::EntryPtr entry_) { const int type = entry_->collection()->type(); const QString t = entry_->field(QStringLiteral("title")); if(type == Data::Collection::Book || type == Data::Collection::ComicBook || type == Data::Collection::Bibtex) { const QString isbn = entry_->field(QStringLiteral("isbn")); if(!isbn.isEmpty()) { return FetchRequest(Fetch::ISBN, isbn); } const QString a = entry_->field(QStringLiteral("author")); if(!a.isEmpty()) { return t.isEmpty() ? FetchRequest(Fetch::Person, a) : FetchRequest(Fetch::Keyword, t + QLatin1Char('-') + a); } } else if(type == Data::Collection::Album) { const QString a = entry_->field(QStringLiteral("artist")); if(!a.isEmpty()) { return t.isEmpty() ? FetchRequest(Fetch::Person, a) : FetchRequest(Fetch::Keyword, t + QLatin1Char('-') + a); } } // optimistically try searching for title and rely on Collection::sameEntry() to figure things out if(!t.isEmpty()) { return FetchRequest(Fetch::Title, t); } return FetchRequest(); } QByteArray AmazonFetcher::requestPayload(FetchRequest request_) { QJsonObject payload; payload.insert(QLatin1String("PartnerTag"), m_assoc); payload.insert(QLatin1String("PartnerType"), QLatin1String("Associates")); payload.insert(QLatin1String("Operation"), QLatin1String("SearchItems")); payload.insert(QLatin1String("SortBy"), QLatin1String("Relevance")); // not mandatory // payload.insert(QLatin1String("Marketplace"), QLatin1String(siteData(m_site).host)); if(m_page > 1) { payload.insert(QLatin1String("ItemPage"), m_page); } QJsonArray resources; resources.append(QLatin1String("ItemInfo.Title")); resources.append(QLatin1String("ItemInfo.ContentInfo")); resources.append(QLatin1String("ItemInfo.ByLineInfo")); resources.append(QLatin1String("ItemInfo.TechnicalInfo")); const int type = request_.collectionType; switch(type) { case Data::Collection::Book: case Data::Collection::ComicBook: case Data::Collection::Bibtex: payload.insert(QLatin1String("SearchIndex"), QLatin1String("Books")); resources.append(QLatin1String("ItemInfo.ExternalIds")); resources.append(QLatin1String("ItemInfo.ManufactureInfo")); break; case Data::Collection::Album: payload.insert(QLatin1String("SearchIndex"), QLatin1String("Music")); break; case Data::Collection::Video: // CA and JP appear to have a bug where Video only returns VHS or Music results // DVD will return DVD, Blu-ray, etc. so just ignore VHS for those users payload.insert(QLatin1String("SearchIndex"), QLatin1String("MoviesAndTV")); if(m_site == CA || m_site == JP || m_site == IT || m_site == ES) { payload.insert(QStringLiteral("SearchIndex"), QStringLiteral("DVD")); } else { payload.insert(QStringLiteral("SearchIndex"), QStringLiteral("Video")); } // params.insert(QStringLiteral("SortIndex"), QStringLiteral("relevancerank")); resources.append(QLatin1String("ItemInfo.ContentRating")); break; case Data::Collection::Game: payload.insert(QLatin1String("SearchIndex"), QLatin1String("VideoGames")); break; case Data::Collection::BoardGame: payload.insert(QLatin1String("SearchIndex"), QLatin1String("ToysAndGames")); // params.insert(QStringLiteral("SortIndex"), QStringLiteral("relevancerank")); break; case Data::Collection::Coin: case Data::Collection::Stamp: case Data::Collection::Wine: case Data::Collection::Base: case Data::Collection::Card: myDebug() << "can't fetch this type:" << collectionType(); return QByteArray(); } switch(request_.key) { case Title: payload.insert(QLatin1String("Title"), request_.value); break; case Person: if(type == Data::Collection::Video) { payload.insert(QStringLiteral("Actor"), request_.value); // payload.insert(QStringLiteral("Director"), request_.value); } else if(type == Data::Collection::Album) { payload.insert(QStringLiteral("Artist"), request_.value); } else if(type == Data::Collection::Book) { payload.insert(QLatin1String("Author"), request_.value); } else { payload.insert(QLatin1String("Keywords"), request_.value); } break; case ISBN: { QString cleanValue = request_.value; cleanValue.remove(QLatin1Char('-')); // ISBN only get digits or 'X' QStringList isbns = FieldFormat::splitValue(cleanValue); // Amazon isbn13 search is still very flaky, so if possible, we're going to convert // all of them to isbn10. If we run into a 979 isbn13, then we're forced to do an // isbn13 search bool isbn13 = false; for(QStringList::Iterator it = isbns.begin(); it != isbns.end(); ) { if((*it).startsWith(QLatin1String("979"))) { isbn13 = true; break; } ++it; } // if we want isbn10, then convert all if(!isbn13) { for(QStringList::Iterator it = isbns.begin(); it != isbns.end(); ++it) { if((*it).length() > 12) { (*it) = ISBNValidator::isbn10(*it); (*it).remove(QLatin1Char('-')); } } } // limit to first 10 while(isbns.size() > 10) { isbns.pop_back(); } payload.insert(QLatin1String("Keywords"), isbns.join(QLatin1String("|"))); if(isbn13) { // params.insert(QStringLiteral("IdType"), QStringLiteral("EAN")); } } break; case UPC: { QString cleanValue = request_.value; cleanValue.remove(QLatin1Char('-')); // for EAN values, add 0 to beginning if not 13 characters // in order to assume US country code from UPC value QStringList values; foreach(const QString& splitValue, cleanValue.split(FieldFormat::delimiterString())) { QString tmpValue = splitValue; if(m_site != US && tmpValue.length() == 12) { tmpValue.prepend(QLatin1Char('0')); } values << tmpValue; // limit to first 10 values if(values.length() >= 10) { break; } } payload.insert(QLatin1String("Keywords"), values.join(QLatin1String("|"))); } break; case Keyword: payload.insert(QLatin1String("Keywords"), request_.value); break; case Raw: { QString key = request_.value.section(QLatin1Char('='), 0, 0).trimmed(); QString str = request_.value.section(QLatin1Char('='), 1).trimmed(); payload.insert(key, str); } break; default: myWarning() << "key not recognized: " << request().key; return QByteArray(); } switch(m_imageSize) { case SmallImage: resources.append(QLatin1String("Images.Primary.Small")); break; case MediumImage: resources.append(QLatin1String("Images.Primary.Medium")); break; case LargeImage: resources.append(QLatin1String("Images.Primary.Large")); break; case NoImage: break; } payload.insert(QLatin1String("Resources"), resources); return QJsonDocument(payload).toJson(QJsonDocument::Compact); } Tellico::Data::CollPtr AmazonFetcher::createCollection() { Data::CollPtr coll = CollectionFactory::collection(collectionType(), true); if(!coll) { return coll; } QString imageFieldName; switch(m_imageSize) { case SmallImage: imageFieldName = QStringLiteral("small-image"); break; case MediumImage: imageFieldName = QStringLiteral("medium-image"); break; case LargeImage: imageFieldName = QStringLiteral("large-image"); break; case NoImage: break; } if(!imageFieldName.isEmpty()) { Data::FieldPtr field(new Data::Field(imageFieldName, QString(), Data::Field::URL)); coll->addField(field); } if(optionalFields().contains(QStringLiteral("amazon"))) { Data::FieldPtr field(new Data::Field(QStringLiteral("amazon"), i18n("Amazon Link"), Data::Field::URL)); field->setCategory(i18n("General")); coll->addField(field); } return coll; } void AmazonFetcher::populateEntry(Data::EntryPtr entry_, const QJsonObject& info_) { QVariantMap itemMap = info_.value(QLatin1String("ItemInfo")).toObject().toVariantMap(); entry_->setField(QStringLiteral("title"), mapValue(itemMap, "Title", "DisplayValue")); const QString isbn = mapValue(itemMap, "ExternalIds", "ISBNs", "DisplayValues"); if(!isbn.isEmpty()) { // could be duplicate isbn10 and isbn13 values QStringList isbns = FieldFormat::splitValue(isbn, FieldFormat::StringSplit); for(QStringList::Iterator it = isbns.begin(); it != isbns.end(); ++it) { if((*it).length() > 12) { (*it) = ISBNValidator::isbn10(*it); (*it).remove(QLatin1Char('-')); } } isbns.removeDuplicates(); entry_->setField(QStringLiteral("isbn"), isbns.join(FieldFormat::delimiterString())); } QStringList actors, artists, authors, illustrators, publishers; QVariantMap byLineMap = itemMap.value(QLatin1String("ByLineInfo")).toMap(); QVariantList contribArray = byLineMap.value(QLatin1String("Contributors")).toList(); foreach(const QVariant& v, contribArray) { const QVariantMap contribMap = v.toMap(); const QString role = contribMap.value(QLatin1String("Role")).toString(); const QString name = contribMap.value(QLatin1String("Name")).toString(); if(role == QLatin1String("Actor")) { actors += name; } else if(role == QLatin1String("Artist")) { artists += name; } else if(role == QLatin1String("Author")) { authors += name; } else if(role == QLatin1String("Illustrator")) { illustrators += name; } else if(role == QLatin1String("Publisher")) { publishers += name; } } // assume for books that the manufacturer is the publishers if(collectionType() == Data::Collection::Book || collectionType() == Data::Collection::Bibtex || collectionType() == Data::Collection::ComicBook) { const QString manufacturer = byLineMap.value(QLatin1String("Manufacturer")).toMap() .value(QLatin1String("DisplayValue")).toString(); publishers += manufacturer; } actors.removeDuplicates(); artists.removeDuplicates(); authors.removeDuplicates(); illustrators.removeDuplicates(); publishers.removeDuplicates(); if(!actors.isEmpty()) { entry_->setField(QStringLiteral("cast"), actors.join(FieldFormat::delimiterString())); } if(!artists.isEmpty()) { entry_->setField(QStringLiteral("artist"), artists.join(FieldFormat::delimiterString())); } if(!authors.isEmpty()) { entry_->setField(QStringLiteral("author"), authors.join(FieldFormat::delimiterString())); } if(!illustrators.isEmpty()) { entry_->setField(QStringLiteral("illustrator"), illustrators.join(FieldFormat::delimiterString())); } if(!publishers.isEmpty()) { entry_->setField(QStringLiteral("publisher"), publishers.join(FieldFormat::delimiterString())); } QVariantMap contentMap = itemMap.value(QLatin1String("ContentInfo")).toMap(); entry_->setField(QStringLiteral("edition"), mapValue(contentMap, "Edition", "DisplayValue")); entry_->setField(QStringLiteral("pages"), mapValue(contentMap, "PagesCount", "DisplayValue")); const QString pubDate = mapValue(contentMap, "PublicationDate", "DisplayValue"); if(!pubDate.isEmpty()) { entry_->setField(QStringLiteral("pub_year"), pubDate.left(4)); } QVariantList langArray = itemMap.value(QLatin1String("ContentInfo")).toMap() .value(QStringLiteral("Languages")).toMap() .value(QStringLiteral("DisplayValues")).toList(); QStringList langs; foreach(const QVariant& v, langArray) { langs += mapValue(v.toMap(), "DisplayValue"); } langs.removeDuplicates(); langs.removeAll(QString()); entry_->setField(QStringLiteral("language"), langs.join(FieldFormat::delimiterString())); if(collectionType() == Data::Collection::Book || collectionType() == Data::Collection::Bibtex || collectionType() == Data::Collection::ComicBook) { QVariantMap classificationsMap = itemMap.value(QLatin1String("Classifications")).toMap(); QVariantMap technicalMap = itemMap.value(QLatin1String("TechnicalInfo")).toMap(); QString binding = mapValue(classificationsMap, "Binding", "DisplayValue"); if(binding.isEmpty()) { binding = mapValue(technicalMap, "Formats", "DisplayValues"); } if(binding.contains(QStringLiteral("Paperback")) && binding != QStringLiteral("Trade Paperback")) { binding = i18n("Paperback"); } else if(binding.contains(QStringLiteral("Hard"))) { // could be Hardcover or Hardback binding = i18n("Hardback"); } entry_->setField(QStringLiteral("binding"), binding); } QVariantMap imagesMap = info_.value(QLatin1String("Images")).toObject().toVariantMap(); switch(m_imageSize) { case SmallImage: entry_->setField(QStringLiteral("small-image"), mapValue(imagesMap, "Primary", "Small", "URL")); break; case MediumImage: entry_->setField(QStringLiteral("medium-image"), mapValue(imagesMap, "Primary", "Medium", "URL")); break; case LargeImage: entry_->setField(QStringLiteral("large-image"), mapValue(imagesMap, "Primary", "Large", "URL")); break; case NoImage: break; } if(optionalFields().contains(QStringLiteral("amazon"))) { entry_->setField(QStringLiteral("amazon"), mapValue(info_.toVariantMap(), "DetailPageURL")); } } void AmazonFetcher::parseTitle(Tellico::Data::EntryPtr entry_) { // assume that everything in brackets or parentheses is extra static const QRegularExpression rx(QLatin1String("[\\(\\[](.*?)[\\)\\]]")); QString title = entry_->field(QStringLiteral("title")); int pos = 0; QRegularExpressionMatch match = rx.match(title, pos); while(match.hasMatch()) { pos = match.capturedStart(); if(parseTitleToken(entry_, match.captured(1))) { title.remove(match.capturedStart(), match.capturedLength()); --pos; // search again there } match = rx.match(title, pos+1); } entry_->setField(QStringLiteral("title"), title.simplified()); } bool AmazonFetcher::parseTitleToken(Tellico::Data::EntryPtr entry_, const QString& token_) { // myDebug() << "title token:" << token_; // if res = true, then the token gets removed from the title bool res = false; if(token_.indexOf(QLatin1String("widescreen"), 0, Qt::CaseInsensitive) > -1 || token_.indexOf(i18n("Widescreen"), 0, Qt::CaseInsensitive) > -1) { entry_->setField(QStringLiteral("widescreen"), QStringLiteral("true")); // res = true; leave it in the title } else if(token_.indexOf(QLatin1String("full screen"), 0, Qt::CaseInsensitive) > -1) { // skip, but go ahead and remove from title res = true; } else if(token_.indexOf(QLatin1String("import"), 0, Qt::CaseInsensitive) > -1) { // skip, but go ahead and remove from title res = true; } if(token_.indexOf(QLatin1String("blu-ray"), 0, Qt::CaseInsensitive) > -1) { entry_->setField(QStringLiteral("medium"), i18n("Blu-ray")); res = true; } else if(token_.indexOf(QLatin1String("hd dvd"), 0, Qt::CaseInsensitive) > -1) { entry_->setField(QStringLiteral("medium"), i18n("HD DVD")); res = true; } else if(token_.indexOf(QLatin1String("vhs"), 0, Qt::CaseInsensitive) > -1) { entry_->setField(QStringLiteral("medium"), i18n("VHS")); res = true; } if(token_.indexOf(QLatin1String("director's cut"), 0, Qt::CaseInsensitive) > -1 || token_.indexOf(i18n("Director's Cut"), 0, Qt::CaseInsensitive) > -1) { entry_->setField(QStringLiteral("directors-cut"), QStringLiteral("true")); // res = true; leave it in the title } if(token_.toLower() == QLatin1String("ntsc")) { entry_->setField(QStringLiteral("format"), i18n("NTSC")); res = true; } if(token_.toLower() == QLatin1String("dvd")) { entry_->setField(QStringLiteral("medium"), i18n("DVD")); res = true; } if(token_.indexOf(QLatin1String("series"), 0, Qt::CaseInsensitive) > -1) { entry_->setField(QStringLiteral("series"), token_); res = true; } static const QRegularExpression regionRx(QLatin1String("Region [1-9]")); QRegularExpressionMatch match = regionRx.match(token_); if(match.hasMatch()) { entry_->setField(QStringLiteral("region"), i18n(match.captured().toUtf8().constData())); res = true; } if(entry_->collection()->type() == Data::Collection::Game) { Data::FieldPtr f = entry_->collection()->fieldByName(QStringLiteral("platform")); if(f && f->allowed().contains(token_)) { res = true; } } return res; } //static QString AmazonFetcher::defaultName() { return i18n("Amazon.com Web Services"); } QString AmazonFetcher::defaultIcon() { return favIcon("http://www.amazon.com"); } Tellico::StringHash AmazonFetcher::allOptionalFields() { StringHash hash; hash[QStringLiteral("keyword")] = i18n("Keywords"); hash[QStringLiteral("amazon")] = i18n("Amazon Link"); return hash; } Tellico::Fetch::ConfigWidget* AmazonFetcher::configWidget(QWidget* parent_) const { return new AmazonFetcher::ConfigWidget(parent_, this); } AmazonFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const AmazonFetcher* fetcher_/*=0*/) : Fetch::ConfigWidget(parent_) { QGridLayout* l = new QGridLayout(optionsWidget()); l->setSpacing(4); l->setColumnStretch(1, 10); int row = -1; QLabel* al = new QLabel(i18n("Registration is required for accessing the %1 data source. " "If you agree to the terms and conditions, sign " "up for an account, and enter your information below.", AmazonFetcher::defaultName(), QLatin1String("https://affiliate-program.amazon.com/gp/flex/advertising/api/sign-in.html")), optionsWidget()); al->setOpenExternalLinks(true); al->setWordWrap(true); ++row; l->addWidget(al, row, 0, 1, 2); // richtext gets weird with size al->setMinimumWidth(al->sizeHint().width()); QLabel* label = new QLabel(i18n("Access key: "), optionsWidget()); l->addWidget(label, ++row, 0); m_accessEdit = new QLineEdit(optionsWidget()); connect(m_accessEdit, &QLineEdit::textChanged, this, &ConfigWidget::slotSetModified); l->addWidget(m_accessEdit, row, 1); QString w = i18n("Access to data from Amazon.com requires an AWS Access Key ID and a Secret Key."); label->setWhatsThis(w); m_accessEdit->setWhatsThis(w); label->setBuddy(m_accessEdit); label = new QLabel(i18n("Secret key: "), optionsWidget()); l->addWidget(label, ++row, 0); m_secretKeyEdit = new QLineEdit(optionsWidget()); // m_secretKeyEdit->setEchoMode(QLineEdit::PasswordEchoOnEdit); connect(m_secretKeyEdit, &QLineEdit::textChanged, this, &ConfigWidget::slotSetModified); l->addWidget(m_secretKeyEdit, row, 1); label->setWhatsThis(w); m_secretKeyEdit->setWhatsThis(w); label->setBuddy(m_secretKeyEdit); label = new QLabel(i18n("Country: "), optionsWidget()); l->addWidget(label, ++row, 0); m_siteCombo = new GUI::ComboBox(optionsWidget()); for(int i = 0; i < XX; ++i) { const AmazonFetcher::SiteData& siteData = AmazonFetcher::siteData(i); QIcon icon(QStandardPaths::locate(QStandardPaths::GenericDataLocation, QStringLiteral("kf5/locale/countries/%1/flag.png").arg(siteData.country))); m_siteCombo->addItem(icon, siteData.countryName, i); m_siteCombo->model()->sort(0); } void (GUI::ComboBox::* activatedInt)(int) = &GUI::ComboBox::activated; connect(m_siteCombo, activatedInt, this, &ConfigWidget::slotSetModified); connect(m_siteCombo, activatedInt, this, &ConfigWidget::slotSiteChanged); l->addWidget(m_siteCombo, row, 1); w = i18n("Amazon.com provides data from several different localized sites. Choose the one " "you wish to use for this data source."); label->setWhatsThis(w); m_siteCombo->setWhatsThis(w); label->setBuddy(m_siteCombo); label = new QLabel(i18n("&Image size: "), optionsWidget()); l->addWidget(label, ++row, 0); m_imageCombo = new GUI::ComboBox(optionsWidget()); m_imageCombo->addItem(i18n("Small Image"), SmallImage); m_imageCombo->addItem(i18n("Medium Image"), MediumImage); m_imageCombo->addItem(i18n("Large Image"), LargeImage); m_imageCombo->addItem(i18n("No Image"), NoImage); connect(m_imageCombo, activatedInt, this, &ConfigWidget::slotSetModified); l->addWidget(m_imageCombo, row, 1); w = i18n("The cover image may be downloaded as well. However, too many large images in the " "collection may degrade performance."); label->setWhatsThis(w); m_imageCombo->setWhatsThis(w); label->setBuddy(m_imageCombo); label = new QLabel(i18n("&Associate's ID: "), optionsWidget()); l->addWidget(label, ++row, 0); m_assocEdit = new QLineEdit(optionsWidget()); void (QLineEdit::* textChanged)(const QString&) = &QLineEdit::textChanged; connect(m_assocEdit, textChanged, this, &ConfigWidget::slotSetModified); l->addWidget(m_assocEdit, row, 1); w = i18n("The associate's id identifies the person accessing the Amazon.com Web Services, and is included " "in any links to the Amazon.com site."); label->setWhatsThis(w); m_assocEdit->setWhatsThis(w); label->setBuddy(m_assocEdit); l->setRowStretch(++row, 10); if(fetcher_) { m_siteCombo->setCurrentData(fetcher_->m_site); m_accessEdit->setText(fetcher_->m_accessKey); m_secretKeyEdit->setText(fetcher_->m_secretKey); m_assocEdit->setText(fetcher_->m_assoc); m_imageCombo->setCurrentData(fetcher_->m_imageSize); } else { // defaults m_assocEdit->setText(QLatin1String(AMAZON_ASSOC_TOKEN)); m_imageCombo->setCurrentData(MediumImage); } addFieldsWidget(AmazonFetcher::allOptionalFields(), fetcher_ ? fetcher_->optionalFields() : QStringList()); KAcceleratorManager::manage(optionsWidget()); } void AmazonFetcher::ConfigWidget::saveConfigHook(KConfigGroup& config_) { int n = m_siteCombo->currentData().toInt(); config_.writeEntry("Site", n); QString s = m_accessEdit->text().trimmed(); if(!s.isEmpty()) { config_.writeEntry("AccessKey", s); } s = m_secretKeyEdit->text().trimmed(); if(!s.isEmpty()) { config_.writeEntry("SecretKey", s); } s = m_assocEdit->text().trimmed(); if(!s.isEmpty()) { config_.writeEntry("AssocToken", s); } n = m_imageCombo->currentData().toInt(); config_.writeEntry("Image Size", n); } QString AmazonFetcher::ConfigWidget::preferredName() const { return AmazonFetcher::siteData(m_siteCombo->currentData().toInt()).title; } void AmazonFetcher::ConfigWidget::slotSiteChanged() { emit signalName(preferredName()); } diff --git a/src/fetch/fetchmanager.cpp b/src/fetch/fetchmanager.cpp index 2d1ed17d..a51d9bcc 100644 --- a/src/fetch/fetchmanager.cpp +++ b/src/fetch/fetchmanager.cpp @@ -1,582 +1,582 @@ /*************************************************************************** Copyright (C) 2003-2009 Robby Stephenson ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or * * modify it under the terms of the GNU General Public License as * * published by the Free Software Foundation; either version 2 of * * the License or (at your option) version 3 or any later version * * accepted by the membership of KDE e.V. (or its successor approved * * by the membership of KDE e.V.), which shall act as a proxy * * defined in Section 14 of version 3 of the license. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see . * * * ***************************************************************************/ #include #include "fetchmanager.h" #include "configwidget.h" #include "messagehandler.h" #include "../entry.h" #include "../collection.h" #include "../document.h" #include "../utils/string_utils.h" #include "../utils/tellico_utils.h" #include "../tellico_debug.h" #ifdef HAVE_YAZ #include "z3950fetcher.h" #endif #include "srufetcher.h" #include "execexternalfetcher.h" #include #include #include #include #include #include #include #define LOAD_ICON(name, group, size) \ KIconLoader::global()->loadIcon(name, static_cast(group), size_) using Tellico::Fetch::Manager; Manager* Manager::s_self = nullptr; Manager::Manager() : QObject(), m_currentFetcherIndex(-1), m_messager(new ManagerMessage()), m_count(0), m_loadDefaults(false) { // must create static pointer first Q_ASSERT(!s_self); s_self = this; // no need to load fetchers since the initializer does it for us // m_keyMap.insert(FetchFirst, QString()); m_keyMap.insert(Title, i18n("Title")); m_keyMap.insert(Person, i18n("Person")); m_keyMap.insert(ISBN, i18n("ISBN")); m_keyMap.insert(UPC, i18n("UPC/EAN")); m_keyMap.insert(Keyword, i18n("Keyword")); m_keyMap.insert(DOI, i18n("DOI")); m_keyMap.insert(ArxivID, i18n("arXiv ID")); m_keyMap.insert(PubmedID, i18n("PubMed ID")); m_keyMap.insert(LCCN, i18n("LCCN")); m_keyMap.insert(Raw, i18n("Raw Query")); // m_keyMap.insert(FetchLast, QString()); } Manager::~Manager() { delete m_messager; } void Manager::registerFunction(int type_, const FetcherFunction& func_) { functionRegistry.insert(type_, func_); } void Manager::loadFetchers() { m_fetchers.clear(); m_uuidHash.clear(); KSharedConfigPtr config = KSharedConfig::openConfig(); if(config->hasGroup(QStringLiteral("Data Sources"))) { KConfigGroup configGroup(config, QStringLiteral("Data Sources")); int nSources = configGroup.readEntry("Sources Count", 0); for(int i = 0; i < nSources; ++i) { QString group = QStringLiteral("Data Source %1").arg(i); Fetcher::Ptr f = createFetcher(config, group); if(f) { m_fetchers.append(f); f->setMessageHandler(m_messager); m_uuidHash.insert(f->uuid(), f); } } m_loadDefaults = false; } else { // add default sources m_fetchers = defaultFetchers(); m_loadDefaults = true; } } const Tellico::Fetch::FetcherVec& Manager::fetchers() const { return m_fetchers; } Tellico::Fetch::FetcherVec Manager::fetchers(int type_) { FetcherVec vec; foreach(Fetcher::Ptr fetcher, m_fetchers) { if(fetcher->canFetch(type_)) { vec.append(fetcher); } } return vec; } Tellico::Fetch::Fetcher::Ptr Manager::fetcherByUuid(const QString& uuid_) { return m_uuidHash.contains(uuid_) ? m_uuidHash[uuid_] : Fetcher::Ptr(); } Tellico::Fetch::KeyMap Manager::keyMap(const QString& source_) const { // an empty string means return all if(source_.isEmpty()) { return m_keyMap; } // assume there's only one fetcher match Fetcher::Ptr foundFetcher; foreach(Fetcher::Ptr fetcher, m_fetchers) { if(source_ == fetcher->source()) { foundFetcher = fetcher; break; } } if(!foundFetcher) { myWarning() << "no fetcher found!"; return KeyMap(); } KeyMap map; for(KeyMap::ConstIterator it = m_keyMap.constBegin(); it != m_keyMap.constEnd(); ++it) { if(foundFetcher->canSearch(it.key())) { map.insert(it.key(), it.value()); } } return map; } void Manager::startSearch(const QString& source_, Tellico::Fetch::FetchKey key_, const QString& value_) { if(value_.isEmpty()) { emit signalDone(); return; } FetchRequest request(Data::Document::self()->collection()->type(), key_, value_); // assume there's only one fetcher match int i = 0; m_currentFetcherIndex = -1; foreach(Fetcher::Ptr fetcher, m_fetchers) { if(source_ == fetcher->source()) { ++m_count; // Fetcher::search() might emit done(), so increment before calling search() connect(fetcher.data(), &Fetcher::signalResultFound, this, &Manager::signalResultFound); connect(fetcher.data(), &Fetcher::signalDone, this, &Manager::slotFetcherDone); fetcher->startSearch(request); m_currentFetcherIndex = i; break; } ++i; } } void Manager::continueSearch() { if(m_currentFetcherIndex < 0 || m_currentFetcherIndex >= static_cast(m_fetchers.count())) { myDebug() << "can't continue!"; emit signalDone(); return; } Fetcher::Ptr fetcher = m_fetchers[m_currentFetcherIndex]; if(fetcher && fetcher->hasMoreResults()) { ++m_count; connect(fetcher.data(), &Fetcher::signalResultFound, this, &Manager::signalResultFound); connect(fetcher.data(), &Fetcher::signalDone, this, &Manager::slotFetcherDone); fetcher->continueSearch(); } else { emit signalDone(); } } bool Manager::hasMoreResults() const { if(m_currentFetcherIndex < 0 || m_currentFetcherIndex >= static_cast(m_fetchers.count())) { return false; } Fetcher::Ptr fetcher = m_fetchers[m_currentFetcherIndex]; return fetcher && fetcher->hasMoreResults(); } void Manager::stop() { // DEBUG_LINE; foreach(Fetcher::Ptr fetcher, m_fetchers) { if(fetcher->isSearching()) { fetcher->stop(); fetcher->saveConfig(); } } if(m_count != 0) { myDebug() << "count should be 0!"; } m_count = 0; } void Manager::slotFetcherDone(Tellico::Fetch::Fetcher* fetcher_) { // myDebug() << (fetcher_ ? fetcher_->source() : QString()) << ":" << m_count; fetcher_->disconnect(); // disconnect all signals fetcher_->saveConfig(); --m_count; if(m_count <= 0) { emit signalDone(); } } bool Manager::canFetch() const { foreach(Fetcher::Ptr fetcher, m_fetchers) { if(fetcher->canFetch(Data::Document::self()->collection()->type())) { return true; } } return false; } Tellico::Fetch::Fetcher::Ptr Manager::createFetcher(KSharedConfigPtr config_, const QString& group_) { if(!config_->hasGroup(group_)) { myDebug() << "no config group for " << group_; return Fetcher::Ptr(); } KConfigGroup config(config_, group_); int fetchType = config.readEntry("Type", int(Fetch::Unknown)); if(fetchType == Fetch::Unknown) { myDebug() << "unknown type " << fetchType << ", skipping"; return Fetcher::Ptr(); } // special case: the BoardGameGeek fetcher was originally implemented as a Ruby script // now, it's available with an XML API, so prefer the new version // so check for fetcher version and switch to the XML if version is missing or lower if(fetchType == Fetch::ExecExternal && config.readPathEntry("ExecPath", QString()).endsWith(QLatin1String("boardgamegeek.rb"))) { KConfigGroup generalConfig(config_, QStringLiteral("General Options")); if(generalConfig.readEntry("FetchVersion", 0) < 1) { fetchType = Fetch::BoardGameGeek; generalConfig.writeEntry("FetchVersion", 1); } } // special case: the Bedetheque fetcher was originally implemented as a Python script // now, it's available as a builtin data source, so prefer the new version // so check for fetcher version and switch to the newer if version is missing or lower if(fetchType == Fetch::ExecExternal && config.readPathEntry("ExecPath", QString()).endsWith(QStringLiteral("bedetheque.py"))) { KConfigGroup generalConfig(config_, QStringLiteral("General Options")); if(generalConfig.readEntry("FetchVersion", 0) < 2) { fetchType = Fetch::Bedetheque; generalConfig.writeEntry("FetchVersion", 2); } } Fetcher::Ptr f; if(functionRegistry.contains(fetchType)) { f = functionRegistry.value(fetchType).create(this); f->readConfig(config, group_); } return f; } #define FETCHER_ADD(type) \ do { \ if(functionRegistry.contains(type)) { \ vec.append(functionRegistry.value(type).create(this)); \ } \ } while(false) // static Tellico::Fetch::FetcherVec Manager::defaultFetchers() { FetcherVec vec; vec.append(SRUFetcher::libraryOfCongress(this)); // books FETCHER_ADD(ISBNdb); FETCHER_ADD(OpenLibrary); FETCHER_ADD(GoogleBook); // comic books FETCHER_ADD(AnimeNfo); FETCHER_ADD(Bedetheque); FETCHER_ADD(ComicVine); // bibliographic FETCHER_ADD(Arxiv); FETCHER_ADD(GoogleScholar); FETCHER_ADD(BiblioShare); FETCHER_ADD(DBLP); FETCHER_ADD(HathiTrust); // music FETCHER_ADD(MusicBrainz); // video games FETCHER_ADD(TheGamesDB); FETCHER_ADD(IGDB); FETCHER_ADD(VNDB); FETCHER_ADD(VideoGameGeek); // board games FETCHER_ADD(BoardGameGeek); // movies FETCHER_ADD(TheMovieDB); FETCHER_ADD(IMDB); QStringList langs = QLocale().uiLanguages(); if(langs.first().contains(QLatin1Char('-'))) { // I'm not sure QT always include two-letter locale codes langs << langs.first().section(QLatin1Char('-'), 0, 0); } // only add IBS if user includes italian if(langs.contains(QStringLiteral("it"))) { FETCHER_ADD(IBS); } if(langs.contains(QStringLiteral("fr"))) { FETCHER_ADD(DVDFr); FETCHER_ADD(Allocine); } if(langs.contains(QStringLiteral("ru"))) { FETCHER_ADD(KinoPoisk); } if(langs.contains(QStringLiteral("ua"))) { FETCHER_ADD(KinoTeatr); } if(langs.contains(QStringLiteral("de"))) { FETCHER_ADD(Kino); } if(langs.contains(QStringLiteral("cn"))) { FETCHER_ADD(Douban); } if(langs.contains(QStringLiteral("dk"))) { FETCHER_ADD(DBC); } return vec; } #undef FETCHER_ADD Tellico::Fetch::FetcherVec Manager::createUpdateFetchers(int collType_) { if(m_loadDefaults) { return defaultFetchers(); } FetcherVec vec; KConfigGroup config(KSharedConfig::openConfig(), "Data Sources"); int nSources = config.readEntry("Sources Count", 0); for(int i = 0; i < nSources; ++i) { QString group = QStringLiteral("Data Source %1").arg(i); // needs the KConfig* Fetcher::Ptr fetcher = createFetcher(KSharedConfig::openConfig(), group); if(fetcher && fetcher->canFetch(collType_) && fetcher->canUpdate()) { vec.append(fetcher); } } return vec; } Tellico::Fetch::FetcherVec Manager::createUpdateFetchers(int collType_, Tellico::Fetch::FetchKey key_) { FetcherVec fetchers; // creates new fetchers FetcherVec allFetchers = createUpdateFetchers(collType_); foreach(Fetcher::Ptr fetcher, allFetchers) { if(fetcher->canSearch(key_)) { fetchers.append(fetcher); } } return fetchers; } Tellico::Fetch::Fetcher::Ptr Manager::createUpdateFetcher(int collType_, const QString& source_) { Fetcher::Ptr newFetcher; // creates new fetchers FetcherVec fetchers = createUpdateFetchers(collType_); foreach(Fetcher::Ptr fetcher, fetchers) { if(fetcher->source() == source_) { newFetcher = fetcher; break; } } return newFetcher; } void Manager::updateStatus(const QString& message_) { emit signalStatus(message_); } Tellico::Fetch::NameTypeMap Manager::nameTypeMap() { Fetch::NameTypeMap map; FunctionRegistry::const_iterator it = functionRegistry.constBegin(); while(it != functionRegistry.constEnd()) { map.insert(functionRegistry.value(it.key()).name(), static_cast(it.key())); ++it; } // now find all the scripts distributed with tellico QStringList files = Tellico::locateAllFiles(QStringLiteral("tellico/data-sources/*.spec")); foreach(const QString& file, files) { KConfig spec(file, KConfig::SimpleConfig); KConfigGroup specConfig(&spec, QString()); QString name = specConfig.readEntry("Name"); if(name.isEmpty()) { myDebug() << "no name for" << file; continue; } bool enabled = specConfig.readEntry("Enabled", true); if(!enabled || !bundledScriptHasExecPath(file, specConfig)) { // no available exec continue; } map.insert(name, ExecExternal); m_scriptMap.insert(name, file); } return map; } // called when creating a new fetcher Tellico::Fetch::ConfigWidget* Manager::configWidget(QWidget* parent_, Tellico::Fetch::Type type_, const QString& name_) { ConfigWidget* w = nullptr; if(functionRegistry.contains(type_)) { w = functionRegistry.value(type_).configWidget(parent_); } else { myWarning() << "no widget defined for type =" << type_; } if(w && type_ == ExecExternal) { if(!name_.isEmpty() && m_scriptMap.contains(name_)) { // bundledScriptHasExecPath() actually needs to write the exec path // back to the config so the configWidget can read it. But if the spec file // is not readable, that doesn't work. So work around it with a copy to a temp file QTemporaryFile tmpFile; tmpFile.open(); QUrl from = QUrl::fromLocalFile(m_scriptMap[name_]); QUrl to = QUrl::fromLocalFile(tmpFile.fileName()); // have to overwrite since QTemporaryFile already created it KIO::Job* job = KIO::file_copy(from, to, -1, KIO::Overwrite); if(!job->exec()) { myDebug() << job->errorString(); } KConfig spec(to.path(), KConfig::SimpleConfig); KConfigGroup specConfig(&spec, QString()); // pass actual location of spec file if(name_ == specConfig.readEntry("Name") && bundledScriptHasExecPath(m_scriptMap[name_], specConfig)) { w->readConfig(specConfig); } else { myWarning() << "Can't read config file for " << to.path(); } } } return w; } // static QString Manager::typeName(Tellico::Fetch::Type type_) { if(self()->functionRegistry.contains(type_)) { return self()->functionRegistry.value(type_).name(); } myWarning() << "none found for" << type_; return QString(); } QPixmap Manager::fetcherIcon(Tellico::Fetch::Fetcher::Ptr fetcher_, int group_, int size_) { if(fetcher_->type() == Fetch::Z3950) { #ifdef HAVE_YAZ const Fetch::Z3950Fetcher* f = static_cast(fetcher_.data()); QUrl u; u.setScheme(QStringLiteral("http")); u.setHost(f->host()); QString icon = Fetcher::favIcon(u); if(!icon.isEmpty()) { return LOAD_ICON(icon, group_, size_); } #endif } else if(fetcher_->type() == Fetch::ExecExternal) { const Fetch::ExecExternalFetcher* f = static_cast(fetcher_.data()); const QString p = f->execPath(); QUrl u; if(p.contains(QStringLiteral("allocine"))) { u = QUrl(QStringLiteral("http://www.allocine.fr")); } else if(p.contains(QStringLiteral("ministerio_de_cultura"))) { u = QUrl(QStringLiteral("http://www.mcu.es")); } else if(p.contains(QStringLiteral("dark_horse_comics"))) { u = QUrl(QStringLiteral("http://www.darkhorse.com")); } else if(p.contains(QStringLiteral("boardgamegeek"))) { u = QUrl(QStringLiteral("http://www.boardgamegeek.com")); } else if(p.contains(QStringLiteral("supercat"))) { u = QUrl(QStringLiteral("https://evergreen-ils.org")); } else if(f->source().contains(QStringLiteral("amarok"), Qt::CaseInsensitive)) { return LOAD_ICON(QStringLiteral("amarok"), group_, size_); } if(!u.isEmpty() && u.isValid()) { QString icon = Fetcher::favIcon(u); if(!icon.isEmpty()) { return LOAD_ICON(icon, group_, size_); } } } return fetcherIcon(fetcher_->type(), group_, size_); } QPixmap Manager::fetcherIcon(Tellico::Fetch::Type type_, int group_, int size_) { QString name; if(self()->functionRegistry.contains(type_)) { name = self()->functionRegistry.value(type_).icon(); } else { myWarning() << "no pixmap defined for type =" << type_; } if(name.isEmpty()) { // use default tellico application icon name = QStringLiteral("tellico"); } QPixmap pix = KIconLoader::global()->loadIcon(name, static_cast(group_), size_, KIconLoader::DefaultState, QStringList(), nullptr, true); if(pix.isNull()) { QIcon icon = QIcon::fromTheme(name); const int groupSize = KIconLoader::global()->currentSize(static_cast(group_)); size_ = size_ == 0 ? groupSize : size_; pix = icon.pixmap(size_, size_); } if(pix.isNull()) { - pix = BarIcon(name); + pix = KIconLoader::global()->loadIcon(name, KIconLoader::Toolbar); } return pix; } Tellico::StringHash Manager::optionalFields(Type type_) { if(self()->functionRegistry.contains(type_)) { return self()->functionRegistry.value(type_).optionalFields(); } return StringHash(); } bool Manager::bundledScriptHasExecPath(const QString& specFile_, KConfigGroup& config_) { // make sure ExecPath is set and executable // for the bundled scripts, either the exec name is not set, in which case it is the // name of the spec file, minus the .spec, or the exec is set, and is local to the dir // if not, look for it QFileInfo specInfo(specFile_); QString exec = config_.readPathEntry("ExecPath", QString()); QFileInfo execInfo(exec); if(exec.isEmpty() || !execInfo.exists()) { exec = specInfo.canonicalPath() + QDir::separator() + specInfo.completeBaseName(); // remove ".spec" } else if(execInfo.isRelative()) { exec = specInfo.canonicalPath() + QDir::separator() + exec; } else if(!execInfo.isExecutable()) { myWarning() << "not executable:" << specFile_; return false; } execInfo.setFile(exec); if(!execInfo.exists() || !execInfo.isExecutable()) { myWarning() << "no exec file for" << specFile_; myWarning() << "exec =" << exec; return false; // we're not ok } config_.writePathEntry("ExecPath", exec); config_.sync(); // might be readonly, but that's ok return true; } diff --git a/src/fetch/imdbfetcher.cpp b/src/fetch/imdbfetcher.cpp index 4973a4c3..77a23e7d 100644 --- a/src/fetch/imdbfetcher.cpp +++ b/src/fetch/imdbfetcher.cpp @@ -1,1562 +1,1563 @@ /*************************************************************************** Copyright (C) 2004-2009 Robby Stephenson ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or * * modify it under the terms of the GNU General Public License as * * published by the Free Software Foundation; either version 2 of * * the License or (at your option) version 3 or any later version * * accepted by the membership of KDE e.V. (or its successor approved * * by the membership of KDE e.V.), which shall act as a proxy * * defined in Section 14 of version 3 of the license. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see . * * * ***************************************************************************/ #include "imdbfetcher.h" #include "../utils/guiproxy.h" #include "../collections/videocollection.h" #include "../entry.h" #include "../field.h" #include "../fieldformat.h" #include "../core/filehandler.h" #include "../images/imagefactory.h" #include "../utils/string_utils.h" #include "../tellico_debug.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace { static const uint IMDB_MAX_RESULTS = 20; } using namespace Tellico; using Tellico::Fetch::IMDBFetcher; QRegExp* IMDBFetcher::s_tagRx = nullptr; QRegExp* IMDBFetcher::s_anchorRx = nullptr; QRegExp* IMDBFetcher::s_anchorTitleRx = nullptr; QRegExp* IMDBFetcher::s_anchorNameRx = nullptr; QRegExp* IMDBFetcher::s_titleRx = nullptr; // static void IMDBFetcher::initRegExps() { s_tagRx = new QRegExp(QStringLiteral("<.*>")); s_tagRx->setMinimal(true); s_anchorRx = new QRegExp(QStringLiteral("]*href\\s*=\\s*\"([^\"]+)\"[^<]*>([^<]+)"), Qt::CaseInsensitive); s_anchorRx->setMinimal(true); s_anchorTitleRx = new QRegExp(QStringLiteral("]*href\\s*=\\s*\"([^\"]*/title/[^\"]*)\"[^<]*>([^<]*)"), Qt::CaseInsensitive); s_anchorTitleRx->setMinimal(true); s_anchorNameRx = new QRegExp(QStringLiteral("]*href\\s*=\\s*\"([^\"]*/name/[^\"]*)\"[^<]*>(.+)"), Qt::CaseInsensitive); s_anchorNameRx->setMinimal(true); s_titleRx = new QRegExp(QStringLiteral("(.*)"), Qt::CaseInsensitive); s_titleRx->setMinimal(true); } // static const IMDBFetcher::LangData& IMDBFetcher::langData(int lang_) { Q_ASSERT(lang_ >= 0); Q_ASSERT(lang_ < 6); static LangData dataVector[6] = { { i18n("Internet Movie Database"), QStringLiteral("www.imdb.com"), QStringLiteral("findSectionHeader"), QStringLiteral("Exact Matches"), QStringLiteral("Partial Matches"), QStringLiteral("Approx Matches"), QStringLiteral("findSectionHeader"), QStringLiteral("Other Results"), QStringLiteral("aka"), QStringLiteral("Directed by"), QStringLiteral("Written by"), QStringLiteral("Produced by"), QStringLiteral("runtime.*(\\d+)\\s+min"), QStringLiteral("aspect ratio"), QStringLiteral("also known as"), QStringLiteral("Production Co"), QStringLiteral("cast"), QStringLiteral("cast overview"), QStringLiteral("credited cast"), QStringLiteral("episodes"), QStringLiteral("Genre"), QStringLiteral("Sound"), QStringLiteral("Color"), QStringLiteral("Language"), QStringLiteral("Certification"), QStringLiteral("Country"), QStringLiteral("plot\\s+(outline|summary)(?!/)") }, { i18n("Internet Movie Database (French)"), QStringLiteral("www.imdb.fr"), QStringLiteral("findSectionHeader"), QStringLiteral("Résultats Exacts"), QStringLiteral("Résultats Partiels"), QStringLiteral("Résultats Approximatif"), QStringLiteral("findSectionHeader"), QStringLiteral("Résultats Autres"), QStringLiteral("autre titre"), QStringLiteral("Réalisateur"), QStringLiteral("Scénarist"), QString(), QStringLiteral("Durée.*(\\d+)\\s+min"), QStringLiteral("Format"), QStringLiteral("Alias"), QStringLiteral("Sociétés de Production"), QStringLiteral("Ensemble"), QStringLiteral("cast overview"), // couldn't get phrase QStringLiteral("credited cast"), // couldn't get phrase QStringLiteral("episodes"), QStringLiteral("Genre"), QStringLiteral("Son"), QStringLiteral("Couleur"), QStringLiteral("Langue"), QStringLiteral("Classification"), QStringLiteral("Pays"), QStringLiteral("Intrigue\\s*") }, { i18n("Internet Movie Database (Spanish)"), QStringLiteral("www.imdb.es"), QStringLiteral("findSectionHeader"), QStringLiteral("Resultados Exactos"), QStringLiteral("Resultados Parciales"), QStringLiteral("Resultados Aproximados"), QStringLiteral("findSectionHeader"), QStringLiteral("Resultados Otros"), QStringLiteral("otro título"), QStringLiteral("Director"), QStringLiteral("Escritores"), QString(), QStringLiteral("Duración.*(\\d+)\\s+min"), QStringLiteral("Relación de Aspecto"), QStringLiteral("Conocido como"), QStringLiteral("Compañías Productores"), QStringLiteral("Reparto"), QStringLiteral("cast overview"), // couldn't get phrase QStringLiteral("credited cast"), // couldn't get phrase QStringLiteral("episodes"), QStringLiteral("Género"), QStringLiteral("Sonido"), QStringLiteral("Color"), QStringLiteral("Idioma"), QStringLiteral("Clasificación"), QStringLiteral("País"), QStringLiteral("Trama\\s*") }, { i18n("Internet Movie Database (German)"), QStringLiteral("www.imdb.de"), QStringLiteral("findSectionHeader"), QStringLiteral("genaue Übereinstimmung"), QStringLiteral("teilweise Übereinstimmung"), QStringLiteral("näherungsweise Übereinstimmung"), QStringLiteral("findSectionHeader"), QStringLiteral("andere Übereinstimmung"), QStringLiteral("andere titel"), QStringLiteral("Regisseur"), QStringLiteral("Drehbuchautoren"), QString(), QStringLiteral("Länge.*(\\d+)\\s+min"), QStringLiteral("Seitenverhältnis"), QStringLiteral("Auch bekannt als"), QStringLiteral("Produktionsfirmen"), QStringLiteral("Besetzung"), QStringLiteral("cast overview"), // couldn't get phrase QStringLiteral("credited cast"), // couldn't get phrase QStringLiteral("episodes"), QStringLiteral("Genre"), QStringLiteral("Tonverfahren"), QStringLiteral("Farbe"), QStringLiteral("Sprache"), QStringLiteral("Altersfreigabe"), QStringLiteral("Land"), QStringLiteral("Handlung\\s*") }, { i18n("Internet Movie Database (Italian)"), QStringLiteral("www.imdb.it"), QStringLiteral("findSectionHeader"), QStringLiteral("risultati esatti"), QStringLiteral("risultati parziali"), QStringLiteral("risultati approssimati"), QStringLiteral("findSectionHeader"), QStringLiteral("Resultados Otros"), QStringLiteral("otro título"), QStringLiteral("Regista"), QStringLiteral("Sceneggiatori"), QString(), QStringLiteral("Durata.*(\\d+)\\s+min"), QStringLiteral("Aspect Ratio"), QStringLiteral("Alias"), QStringLiteral("Società di produzione"), QStringLiteral("Cast"), QStringLiteral("cast overview"), // couldn't get phrase QStringLiteral("credited cast"), // couldn't get phrase QStringLiteral("episodes"), QStringLiteral("Genere"), QStringLiteral("Sonoro"), QStringLiteral("Colore"), QStringLiteral("Lingua"), QStringLiteral("Divieti"), QStringLiteral("Nazionalità"), QStringLiteral("Trama\\s*") }, { i18n("Internet Movie Database (Portuguese)"), QStringLiteral("www.imdb.pt"), QStringLiteral("findSectionHeader"), QStringLiteral("Exato"), QStringLiteral("Combinação Parcial"), QStringLiteral("Combinação Aproximada"), QStringLiteral("findSectionHeader"), QStringLiteral("Combinação Otros"), QStringLiteral("otro título"), QStringLiteral("Diretor"), QStringLiteral("Escritores"), QString(), QStringLiteral("Duração.*(\\d+)\\s+min"), QStringLiteral("Resolução"), QStringLiteral("Também Conhecido Como"), QStringLiteral("Companhias de Produção"), QStringLiteral("Elenco"), QStringLiteral("cast overview"), // couldn't get phrase QStringLiteral("credited cast"), // couldn't get phrase QStringLiteral("episodes"), QStringLiteral("Gênero"), QStringLiteral("Mixagem de Som"), QStringLiteral("Cor"), QStringLiteral("Lingua"), QStringLiteral("Certificação"), QStringLiteral("País"), QStringLiteral("Argumento\\s*") } }; return dataVector[qBound(0, lang_, static_cast(sizeof(dataVector)/sizeof(LangData)))]; } IMDBFetcher::IMDBFetcher(QObject* parent_) : Fetcher(parent_), m_job(nullptr), m_started(false), m_fetchImages(true), m_numCast(10), m_redirected(false), m_limit(IMDB_MAX_RESULTS), m_lang(EN), m_currentTitleBlock(Unknown), m_countOffset(0) { if(!s_tagRx) { initRegExps(); } m_host = langData(m_lang).siteHost; } IMDBFetcher::~IMDBFetcher() { } QString IMDBFetcher::source() const { return m_name.isEmpty() ? defaultName() : m_name; } bool IMDBFetcher::canFetch(int type) const { return type == Data::Collection::Video; } // imdb can search title only bool IMDBFetcher::canSearch(FetchKey k) const { return k == Title; } void IMDBFetcher::readConfigHook(const KConfigGroup& config_) { /* const int lang = config_.readEntry("Lang", int(EN)); m_lang = static_cast(lang); */ if(m_name.isEmpty()) { m_name = langData(m_lang).siteTitle; } QString h = config_.readEntry("Host"); if(h.isEmpty()) { m_host = langData(m_lang).siteHost; } else { m_host = h; } m_numCast = config_.readEntry("Max Cast", 10); m_fetchImages = config_.readEntry("Fetch Images", true); } // multiple values not supported void IMDBFetcher::search() { m_started = true; m_redirected = false; m_matches.clear(); m_popularTitles.clear(); m_exactTitles.clear(); m_partialTitles.clear(); m_currentTitleBlock = Unknown; m_countOffset = 0; m_url = QUrl(); m_url.setScheme(QStringLiteral("https")); m_url.setHost(m_host); m_url.setPath(QStringLiteral("/find")); // as far as I can tell, the url encoding should always be iso-8859-1? QUrlQuery q; q.addQueryItem(QStringLiteral("q"), request().value); switch(request().key) { case Title: q.addQueryItem(QStringLiteral("s"), QStringLiteral("tt")); m_url.setQuery(q); break; case Raw: m_url = QUrl(request().value); break; default: myWarning() << "not supported:" << request().key; stop(); return; } // myDebug() << m_url; m_job = KIO::storedGet(m_url, KIO::NoReload, KIO::HideProgressInfo); KJobWidgets::setWindow(m_job, GUI::Proxy::widget()); connect(m_job.data(), &KJob::result, this, &IMDBFetcher::slotComplete); connect(m_job.data(), &KIO::TransferJob::redirection, this, &IMDBFetcher::slotRedirection); } void IMDBFetcher::continueSearch() { m_started = true; m_limit += IMDB_MAX_RESULTS; if(m_currentTitleBlock == Popular) { parseTitleBlock(m_popularTitles); // if the offset is 0, then we need to be looking at the next block m_currentTitleBlock = m_countOffset == 0 ? Exact : Popular; } // current title block might have changed if(m_currentTitleBlock == Exact) { parseTitleBlock(m_exactTitles); m_currentTitleBlock = m_countOffset == 0 ? Partial : Exact; } if(m_currentTitleBlock == Partial) { parseTitleBlock(m_partialTitles); m_currentTitleBlock = m_countOffset == 0 ? Approx : Partial; } if(m_currentTitleBlock == Approx) { parseTitleBlock(m_approxTitles); m_currentTitleBlock = m_countOffset == 0 ? Unknown : Approx; } stop(); } void IMDBFetcher::stop() { if(!m_started) { return; } if(m_job) { m_job->kill(); m_job = nullptr; } m_started = false; m_redirected = false; emit signalDone(this); } void IMDBFetcher::slotRedirection(KIO::Job*, const QUrl& toURL_) { m_url = toURL_; if(m_url.path().contains(QRegExp(QStringLiteral("/tt\\d+/$")))) { m_url.setPath(m_url.path() + QStringLiteral("reference")); } m_redirected = true; } void IMDBFetcher::slotComplete(KJob*) { if(m_job->error()) { m_job->uiDelegate()->showErrorMessage(); stop(); return; } m_text = Tellico::fromHtmlData(m_job->data(), "UTF-8"); if(m_text.isEmpty()) { myLog() << "No data returned"; stop(); return; } // see bug 319662. If fetcher is cancelled, job is killed // if the pointer is retained, it gets double-deleted m_job = nullptr; #if 0 myWarning() << "Remove debug from imdbfetcher.cpp for /tmp/testimdbresults.html"; QFile f(QString::fromLatin1("/tmp/testimdbresults.html")); if(f.open(QIODevice::WriteOnly)) { QTextStream t(&f); t.setCodec("UTF-8"); t << m_text; } f.close(); #endif // a single result was found if we got redirected switch(request().key) { case Title: if(m_redirected) { parseSingleTitleResult(); } else { parseMultipleTitleResults(); } break; case Raw: parseSingleTitleResult(); break; default: myWarning() << "skipping results"; break; } } void IMDBFetcher::parseSingleTitleResult() { s_titleRx->indexIn(Tellico::decodeHTML(m_text)); // split title at parenthesis const QString cap1 = s_titleRx->cap(1); int pPos = cap1.indexOf(QLatin1Char('(')); // FIXME: maybe remove parentheses here? FetchResult* r = new FetchResult(Fetcher::Ptr(this), pPos == -1 ? cap1 : cap1.left(pPos), pPos == -1 ? QString() : cap1.mid(pPos)); // IMDB returns different HTML for single title results and has a query in the url // clear the query so we download the "canonical" page for the title QUrl url(m_url); url.setQuery(QString()); m_matches.insert(r->uid, url); m_allMatches.insert(r->uid, url); emit signalResultFound(r); m_hasMoreResults = false; stop(); } void IMDBFetcher::parseMultipleTitleResults() { QString output = Tellico::decodeHTML(m_text); const LangData& data = langData(m_lang); // IMDb can return three title lists, popular, exact, and partial // the popular titles are in the first table int pos_popular = output.indexOf(data.title_popular, 0, Qt::CaseInsensitive); int pos_exact = output.indexOf(data.match_exact, qMax(pos_popular, 0), Qt::CaseInsensitive); int pos_partial = output.indexOf(data.match_partial, qMax(pos_exact, 0), Qt::CaseInsensitive); int pos_approx = output.indexOf(data.match_approx, qMax(pos_partial, 0), Qt::CaseInsensitive); int end_popular = pos_exact; // keep track of where to end if(end_popular == -1) { end_popular = pos_partial == -1 ? (pos_approx == -1 ? output.length() : pos_approx) : pos_partial; } int end_exact = pos_partial; // keep track of where to end if(end_exact == -1) { end_exact = pos_approx == -1 ? output.length() : pos_approx; } int end_partial = pos_approx; // keep track of where to end if(end_partial == -1) { end_partial = output.length(); } // if found popular matches if(pos_popular > -1) { m_popularTitles = output.mid(pos_popular, end_popular-pos_popular); } // if found exact matches if(pos_exact > -1) { m_exactTitles = output.mid(pos_exact, end_exact-pos_exact); } if(pos_partial > -1) { m_partialTitles = output.mid(pos_partial, end_partial-pos_partial); } if(pos_approx > -1) { m_approxTitles = output.mid(pos_approx); } parseTitleBlock(m_popularTitles); // if the offset is 0, then we need to be looking at the next block m_currentTitleBlock = m_countOffset == 0 ? Exact : Popular; if(m_matches.size() < m_limit) { parseTitleBlock(m_exactTitles); m_currentTitleBlock = m_countOffset == 0 ? Partial : Exact; } if(m_matches.size() < m_limit) { parseTitleBlock(m_partialTitles); m_currentTitleBlock = m_countOffset == 0 ? Approx : Partial; } if(m_matches.size() < m_limit) { parseTitleBlock(m_approxTitles); m_currentTitleBlock = m_countOffset == 0 ? Unknown : Approx; } if(m_matches.size() == 0) { myLog() << "no matches found."; } stop(); } void IMDBFetcher::parseTitleBlock(const QString& str_) { if(str_.isEmpty()) { m_countOffset = 0; return; } QRegExp akaRx(QStringLiteral("%1 (.*)(||indexIn(str_); while(m_started && start > -1) { // split title at parenthesis const QString cap1 = s_anchorTitleRx->cap(1); // the anchor url const QString cap2 = s_anchorTitleRx->cap(2).trimmed(); // the anchor text start += s_anchorTitleRx->matchedLength(); int pPos = cap2.indexOf(QLatin1Char('(')); // if it has parentheses, use that for description QString desc; if(pPos > -1) { int pPos2 = cap2.indexOf(QLatin1Char(')'), pPos+1); if(pPos2 > -1) { desc = cap2.mid(pPos+1, pPos2-pPos-1); } } else { // parenthesis might be outside anchor tag int end = s_anchorTitleRx->indexIn(str_, start); if(end == -1) { end = str_.length(); } const QString text = str_.mid(start, end-start); pPos = text.indexOf(QLatin1Char('(')); if(pPos > -1) { const int pNewLine = text.indexOf(QStringLiteral(" -1 && (pNewLine == -1 || pPos < pNewLine)) { const int pPos2 = text.indexOf(QLatin1Char(')'), pPos); desc = text.mid(pPos+1, pPos2-pPos-1); } } pPos = -1; } } // multiple matches might have 'aka' info int end = s_anchorTitleRx->indexIn(str_, start+1); if(end == -1) { end = str_.length(); } int akaPos = akaRx.indexIn(str_, start+1); if(akaPos > -1 && akaPos < end) { // limit to 50 chars desc += QLatin1Char(' ') + akaRx.cap(1).trimmed().remove(*s_tagRx); if(desc.length() > 50) { desc = desc.left(50) + QStringLiteral("..."); } } start = s_anchorTitleRx->indexIn(str_, start); if(count < m_countOffset) { ++count; continue; } // if we got this far, then there is a valid result if(m_matches.size() >= m_limit) { m_hasMoreResults = true; break; } FetchResult* r = new FetchResult(Fetcher::Ptr(this), pPos == -1 ? cap2 : cap2.left(pPos), desc); QUrl u = QUrl(m_url).resolved(QUrl(cap1)); u.setQuery(QString()); m_matches.insert(r->uid, u); m_allMatches.insert(r->uid, u); emit signalResultFound(r); ++count; } if(!m_hasMoreResults && m_currentTitleBlock != Partial) { m_hasMoreResults = true; } m_countOffset = m_matches.size() < m_limit ? 0 : count; } Tellico::Data::EntryPtr IMDBFetcher::fetchEntryHook(uint uid_) { // if we already grabbed this one, then just pull it out of the dict Data::EntryPtr entry = m_entries[uid_]; if(entry) { return entry; } if(!m_matches.contains(uid_) && !m_allMatches.contains(uid_)) { myLog() << "no url found"; return Data::EntryPtr(); } QUrl url = m_matches.contains(uid_) ? m_matches[uid_] : m_allMatches[uid_]; if(url.path().contains(QRegExp(QStringLiteral("/tt\\d+/$")))) { url.setPath(url.path() + QStringLiteral("reference")); } QUrl origURL = m_url; // keep to switch back QString results; // if the url matches the current one, no need to redownload it if(url == m_url) { // myDebug() << "matches previous URL, no downloading needed."; results = Tellico::decodeHTML(m_text); } else { // now it's synchronous // be quiet about failure results = Tellico::fromHtmlData(FileHandler::readDataFile(url, true), "UTF-8"); m_url = url; // needed for processing #if 0 myWarning() << "Remove debug from imdbfetcher.cpp for /tmp/testimdbresult.html"; myDebug() << m_url; QFile f(QStringLiteral("/tmp/testimdbresult.html")); if(f.open(QIODevice::WriteOnly)) { QTextStream t(&f); t << results; } f.close(); #endif results = Tellico::decodeHTML(results); } if(results.isEmpty()) { myLog() << "no text results"; m_url = origURL; return Data::EntryPtr(); } entry = parseEntry(results); m_url = origURL; if(!entry) { myDebug() << "error in processing entry"; return Data::EntryPtr(); } m_entries.insert(uid_, entry); // keep for later return entry; } Tellico::Data::EntryPtr IMDBFetcher::parseEntry(const QString& str_) { Data::CollPtr coll(new Data::VideoCollection(true)); Data::EntryPtr entry(new Data::Entry(coll)); doTitle(str_, entry); doRunningTime(str_, entry); doAspectRatio(str_, entry); doAlsoKnownAs(str_, entry); doPlot(str_, entry, m_url); if(m_lang == EN) { doLists(str_, entry); } else { doLists2(str_, entry); } doStudio(str_, entry); doPerson(str_, entry, langData(m_lang).director, QStringLiteral("director")); doPerson(str_, entry, langData(m_lang).writer, QStringLiteral("writer")); doRating(str_, entry); doCast(str_, entry, m_url); if(m_fetchImages) { // needs base URL doCover(str_, entry, m_url); } const QString imdb = QStringLiteral("imdb"); if(!coll->hasField(imdb) && optionalFields().contains(imdb)) { Data::FieldPtr field(new Data::Field(imdb, i18n("IMDb Link"), Data::Field::URL)); field->setCategory(i18n("General")); coll->addField(field); } if(coll->hasField(imdb) && coll->fieldByName(imdb)->type() == Data::Field::URL) { m_url.setQuery(QString()); // we want to strip the "/reference" from the url QString url = m_url.url(); if(url.endsWith(QStringLiteral("/reference"))) { url = m_url.adjusted(QUrl::RemoveFilename).url(); } entry->setField(imdb, url); } return entry; } void IMDBFetcher::doTitle(const QString& str_, Tellico::Data::EntryPtr entry_) { if(s_titleRx->indexIn(str_) > -1) { const QString cap1 = s_titleRx->cap(1); // titles always have parentheses int pPos = cap1.indexOf(QLatin1Char('(')); QString title = cap1.left(pPos).trimmed(); // remove first and last quotes is there if(title.startsWith(QLatin1Char('"')) && title.endsWith(QLatin1Char('"'))) { title = title.mid(1, title.length()-2); } entry_->setField(QStringLiteral("title"), title); // now for movies with original non-english titles, the is english // but the page header is the original title. Grab the orig title QRegExp h3TitleRx(QStringLiteral("<h3[^>]+itemprop=\"name\"\\s*>(.*)<"), Qt::CaseInsensitive); h3TitleRx.setMinimal(true); if(h3TitleRx.indexIn(str_) > -1) { const QString h3Title = h3TitleRx.cap(1).trimmed(); if(h3Title != title) { // mis-matching titles. If the user has requested original title, // put it in origtitle field and keep english as title // otherwise replace if(optionalFields().contains(QStringLiteral("origtitle"))) { Data::FieldPtr f(new Data::Field(QStringLiteral("origtitle"), i18n("Original Title"))); f->setFormatType(FieldFormat::FormatTitle); entry_->collection()->addField(f); entry_->setField(QStringLiteral("origtitle"), h3Title); } else { entry_->setField(QStringLiteral("title"), h3Title); } } } // remove parentheses and extract year int pPos2 = pPos+1; while(pPos2 < cap1.length() && cap1[pPos2].isDigit()) { ++pPos2; } QString year = cap1.mid(pPos+1, pPos2-pPos-1); if(!year.isEmpty()) { entry_->setField(QStringLiteral("year"), year); } } } void IMDBFetcher::doRunningTime(const QString& str_, Tellico::Data::EntryPtr entry_) { // running time QRegExp runtimeRx(langData(m_lang).runtime, Qt::CaseInsensitive); runtimeRx.setMinimal(true); if(runtimeRx.indexIn(str_) > -1) { entry_->setField(QStringLiteral("running-time"), runtimeRx.cap(1)); } } void IMDBFetcher::doAspectRatio(const QString& str_, Tellico::Data::EntryPtr entry_) { QRegExp rx(QStringLiteral("%1.*([\\d\\.\\,]+\\s*:\\s*[\\d\\.\\,]+)").arg(langData(m_lang).aspect_ratio), Qt::CaseInsensitive); rx.setMinimal(true); if(rx.indexIn(str_) > -1) { entry_->setField(QStringLiteral("aspect-ratio"), rx.cap(1).trimmed()); } } void IMDBFetcher::doAlsoKnownAs(const QString& str_, Tellico::Data::EntryPtr entry_) { if(!optionalFields().contains(QStringLiteral("alttitle"))) { return; } // match until next b tag // QRegExp akaRx(QStringLiteral("also known as(.*)<b(?:\\s.*)?>")); QRegExp akaRx(QStringLiteral("%1(.*)(<a|<span)[>\\s/]").arg(langData(m_lang).also_known_as), Qt::CaseInsensitive); akaRx.setMinimal(true); if(akaRx.indexIn(str_) > -1 && !akaRx.cap(1).isEmpty()) { Data::FieldPtr f = entry_->collection()->fieldByName(QStringLiteral("alttitle")); if(!f) { f = new Data::Field(QStringLiteral("alttitle"), i18n("Alternative Titles"), Data::Field::Table); f->setFormatType(FieldFormat::FormatTitle); entry_->collection()->addField(f); } // split by </li> QStringList list = akaRx.cap(1).split(QStringLiteral("</li>")); // lang could be included with [fr] // const QRegExp parRx(QStringLiteral("\\(.+\\)")); const QRegExp brackRx(QStringLiteral("\\[\\w+\\]")); const QRegExp countryRx(QStringLiteral("\\s*\\(.+\\)\\s*$")); QStringList values; for(QStringList::Iterator it = list.begin(); it != list.end(); ++it) { QString s = *it; // sometimes, the word "more" gets linked to the releaseinfo page, check that if(s.contains(QStringLiteral("releaseinfo"))) { continue; } s.remove(*s_tagRx); s.remove(brackRx); // remove country s.remove(countryRx); s.remove(QLatin1Char('"')); s = s.trimmed(); // the first value ends up being or starting with the colon after "Also known as" // I'm too lazy to figure out a better regexp if(s.startsWith(QLatin1Char(':'))) { s = s.mid(1); s = s.trimmed(); } if(!s.isEmpty()) { values += s; } } if(!values.isEmpty()) { entry_->setField(QStringLiteral("alttitle"), values.join(FieldFormat::rowDelimiterString())); } // } else { // myLog() << "'Also Known As' not found"; } } void IMDBFetcher::doPlot(const QString& str_, Tellico::Data::EntryPtr entry_, const QUrl& baseURL_) { // plot summaries provided by users are on a separate page // should those be preferred? bool useUserSummary = false; // match until next <p> tag QString plotRxStr = langData(m_lang).plot + QStringLiteral("(.*)</(p|div|li)"); QRegExp plotRx(plotRxStr, Qt::CaseInsensitive); plotRx.setMinimal(true); QRegExp plotURLRx(QStringLiteral("<a\\s+.*href\\s*=\\s*\".*/title/.*/plotsummary\""), Qt::CaseInsensitive); plotURLRx.setMinimal(true); if(plotRx.indexIn(str_) > -1) { QString thisPlot = plotRx.cap(2); // if ends with "Written by", remove it. It has an em tag thisPlot.remove(QRegExp(QStringLiteral("<em class=\"nobr\".*</em>"))); thisPlot.remove(*s_tagRx); // remove HTML tags thisPlot = thisPlot.simplified(); // if thisPlot ends with (more) or contains // a url that ends with plotsummary, then we'll grab it, otherwise not if(plotRx.cap(0).endsWith(QStringLiteral("(more)</")) || plotURLRx.indexIn(plotRx.cap(0)) > -1 || thisPlot.isEmpty()) { useUserSummary = true; } else { entry_->setField(QStringLiteral("plot"), thisPlot); } } else { useUserSummary = true; } if(useUserSummary) { QRegExp idRx(QStringLiteral("title/(tt\\d+)")); idRx.indexIn(baseURL_.path()); QUrl plotURL = baseURL_; plotURL.setPath(QStringLiteral("/title/") + idRx.cap(1) + QStringLiteral("/plotsummary")); // be quiet about failure QString plotPage = Tellico::fromHtmlData(FileHandler::readDataFile(plotURL, true), "UTF-8"); if(!plotPage.isEmpty()) { QRegExp plotRx(QStringLiteral("id=\"plot-summaries-content\">(.*)</p")); plotRx.setMinimal(true); QRegExp plotRx2(QStringLiteral("<div\\s+id\\s*=\\s*\"swiki.2.1\">(.*)</d")); plotRx2.setMinimal(true); QString userPlot; if(plotRx.indexIn(plotPage) > -1) { userPlot = plotRx.cap(1); } else if(plotRx2.indexIn(plotPage) > -1) { userPlot = plotRx2.cap(1); } userPlot.remove(*s_tagRx); // remove HTML tags // remove last little "written by", if there userPlot.remove(QRegExp(QStringLiteral("\\s*written by.*$"), Qt::CaseInsensitive)); if(!userPlot.isEmpty()) { entry_->setField(QStringLiteral("plot"), Tellico::decodeHTML(userPlot.simplified())); } } } // myDebug() << "Plot:" << entry_->field(QStringLiteral("plot")); } void IMDBFetcher::doStudio(const QString& str_, Tellico::Data::EntryPtr entry_) { // match until next opening tag // QRegExp productionRx(langData(m_lang).studio, Qt::CaseInsensitive); QRegExp productionRx(langData(m_lang).studio); productionRx.setMinimal(true); QRegExp blackcatRx(QStringLiteral("blackcatheader"), Qt::CaseInsensitive); blackcatRx.setMinimal(true); const int pos1 = str_.indexOf(productionRx); if(pos1 == -1) { // myLog() << "No studio found"; return; } int pos2 = str_.indexOf(blackcatRx, pos1); if(pos2 == -1) { pos2 = str_.length(); } // stop matching when getting to Distributors int pos3 = str_.indexOf(QStringLiteral("Distributors"), pos1); if(pos3 > -1 && pos3 < pos2) { pos2 = pos3; } const QString text = str_.mid(pos1, pos2-pos1); const QString company = QStringLiteral("/company/"); QStringList studios; for(int pos = s_anchorRx->indexIn(text); pos > -1; pos = s_anchorRx->indexIn(text, pos+s_anchorRx->matchedLength())) { const QString cap1 = s_anchorRx->cap(1); if(cap1.contains(company)) { studios += s_anchorRx->cap(2).trimmed(); } } entry_->setField(QStringLiteral("studio"), studios.join(FieldFormat::delimiterString())); } void IMDBFetcher::doPerson(const QString& str_, Tellico::Data::EntryPtr entry_, const QString& imdbHeader_, const QString& fieldName_) { QRegExp br2Rx(QStringLiteral("<br[\\s/]*>\\s*<br[\\s/]*>"), Qt::CaseInsensitive); br2Rx.setMinimal(true); QRegExp divRx(QStringLiteral("<div\\s[^>]*class\\s*=\\s*\"(?:ipl-header__content|info|txt-block)\"[^>]*>(.*)</table"), Qt::CaseInsensitive); divRx.setMinimal(true); const QString name = QStringLiteral("/name/"); - StringSet people; + QStringList people; for(int pos = str_.indexOf(divRx); pos > -1; pos = str_.indexOf(divRx, pos+divRx.matchedLength())) { const QString infoBlock = divRx.cap(1); if(infoBlock.contains(imdbHeader_, Qt::CaseInsensitive)) { int pos2 = s_anchorRx->indexIn(infoBlock); while(pos2 > -1) { if(s_anchorRx->cap(1).contains(name)) { - people.add(s_anchorRx->cap(2).trimmed()); + people += s_anchorRx->cap(2).trimmed(); } pos2 = s_anchorRx->indexIn(infoBlock, pos2+s_anchorRx->matchedLength()); } break; } } if(!people.isEmpty()) { - entry_->setField(fieldName_, people.values().join(FieldFormat::delimiterString())); + people.removeDuplicates(); + entry_->setField(fieldName_, people.join(FieldFormat::delimiterString())); } } void IMDBFetcher::doCast(const QString& str_, Tellico::Data::EntryPtr entry_, const QUrl& baseURL_) { // the extended cast list is on a separate page // that's usually a lot of people // but since it can be in billing order, the main actors might not // be in the short list QRegExp idRx(QStringLiteral("title/(tt\\d+)")); idRx.indexIn(baseURL_.path()); QUrl castURL = baseURL_; castURL.setPath(QStringLiteral("/title/") + idRx.cap(1) + QStringLiteral("/fullcredits")); // be quiet about failure and be sure to translate entities const QString castPage = Tellico::decodeHTML(FileHandler::readTextFile(castURL, true)); #if 0 myWarning() << "Remove debug from imdbfetcher.cpp (/tmp/testimdbcast.html)"; QFile f(QString::fromLatin1("/tmp/testimdbcast.html")); if(f.open(QIODevice::WriteOnly)) { QTextStream t(&f); t << castPage; } f.close(); #endif const LangData& data = langData(m_lang); int pos = -1; // the text to search, depends on which page is being read QString castText = castPage; if(castText.isEmpty()) { // fall back to short list castText = str_; pos = castText.indexOf(data.cast1, 0, Qt::CaseInsensitive); if(pos == -1) { pos = castText.indexOf(data.cast2, 0, Qt::CaseInsensitive); } } else { // first look for anchor QRegExp castAnchorRx(QStringLiteral("<a\\s+name\\s*=\\s*\"cast\""), Qt::CaseInsensitive); pos = castAnchorRx.indexIn(castText); if(pos < 0) { QRegExp tableClassRx(QStringLiteral("<table\\s+class\\s*=\\s*\"cast_list\""), Qt::CaseInsensitive); pos = tableClassRx.indexIn(castText); if(pos < 0) { // fragile, the word "cast" appears in the title, but need to find // the one right above the actual cast table // for TV shows, there's a link on the sidebar for "episodes case" // so need to not match that one const QString castEnd = data.cast + QStringLiteral("</"); pos = castText.indexOf(castEnd, 0, Qt::CaseInsensitive); if(pos > 9) { // back up 9 places if(castText.midRef(pos-9, 9).startsWith(data.episodes)) { // find next cast list pos = castText.indexOf(castEnd, pos+6, Qt::CaseInsensitive); } } } } } if(pos == -1) { // no cast list found myLog() << "no cast list found"; return; } // loop until closing table tag int endPos = castText.indexOf(QStringLiteral("</table"), pos, Qt::CaseInsensitive); castText = castText.mid(pos, endPos-pos+1); QStringList actorList, characterList; QRegularExpression tdActorRx(QStringLiteral("<td>.*?<a href=\"/name.+?\".*?>(.+?)</a"), QRegularExpression::DotMatchesEverythingOption); QRegularExpression tdCharRx(QStringLiteral("<td class=\"character\">(.+?)</td"), QRegularExpression::DotMatchesEverythingOption); QRegularExpressionMatchIterator i = tdActorRx.globalMatch(castText); while(i.hasNext()) { QRegularExpressionMatch match = i.next(); actorList += match.captured(1).simplified(); } i = tdCharRx.globalMatch(castText); while(i.hasNext()) { QRegularExpressionMatch match = i.next(); characterList += match.captured(1).remove(*s_tagRx).simplified(); } // sanity check while(characterList.length() > actorList.length()) { myDebug() << "Too many characters"; characterList.removeLast(); } while(characterList.length() < actorList.length()) { characterList += QString(); } QStringList cast; cast.reserve(actorList.size()); for(int i = 0; i < actorList.size(); ++i) { cast += actorList.at(i) + FieldFormat::columnDelimiterString() + characterList.at(i); if(cast.count() >= m_numCast) { break; } } if(cast.isEmpty()) { QRegExp tdRx(QStringLiteral("<td[^>]*>(.*)</td>"), Qt::CaseInsensitive); tdRx.setMinimal(true); QRegExp tdActorRx(QStringLiteral("<td\\s+[^>]*itemprop=\"actor\"[^>]*>(.*)</td>"), Qt::CaseInsensitive); tdActorRx.setMinimal(true); QRegExp tdCharRx(QStringLiteral("<td\\s+[^>]*class=\"character\"[^>]*>(.*)</td>"), Qt::CaseInsensitive); tdCharRx.setMinimal(true); pos = tdActorRx.indexIn(castText); while(pos > -1 && cast.count() < m_numCast) { QString actorText = tdActorRx.cap(1).remove(*s_tagRx).simplified(); const int pos2 = tdCharRx.indexIn(castText, pos+1); if(pos2 > -1) { cast += actorText + FieldFormat::columnDelimiterString() + tdCharRx.cap(1).remove(*s_tagRx).simplified(); } pos = tdActorRx.indexIn(castText, qMax(pos+1, pos2)); } } if(!cast.isEmpty()) { entry_->setField(QStringLiteral("cast"), cast.join(FieldFormat::rowDelimiterString())); } // also do other items from fullcredits page, like producer QStringList producers; pos = castPage.indexOf(data.producer, 0, Qt::CaseInsensitive); if(pos > -1) { int endPos = castText.indexOf(QStringLiteral("</table"), pos, Qt::CaseInsensitive); if(endPos == -1) { endPos = castText.length(); } const QString prodText = castPage.mid(pos, endPos-pos+1); QRegExp tdCharRx(QStringLiteral("<td\\s+[^>]*class=\"credit\"[^>]*>(.*)</td>")); tdCharRx.setMinimal(true); pos = s_anchorNameRx->indexIn(prodText); while(pos > -1) { const int pos2 = tdCharRx.indexIn(prodText, pos+1); const QString credit = tdCharRx.cap(1).trimmed(); if(pos2 > -1 && (credit.startsWith(QStringLiteral("producer")) || credit.startsWith(QStringLiteral("co-producer")) || credit.startsWith(QStringLiteral("associate producer")))) { producers += s_anchorNameRx->cap(2).trimmed(); } pos = s_anchorNameRx->indexIn(prodText, pos+1); } } if(!producers.isEmpty()) { entry_->setField(QStringLiteral("producer"), producers.join(FieldFormat::delimiterString())); } #if 0 myWarning() << "Remove debug from imdbfetcher.cpp"; QFile f2(QString::fromLatin1("/tmp/testimdbcast2.html")); if(f2.open(QIODevice::WriteOnly)) { QTextStream t(&f); t.setCodec("UTF-8"); t << producers.join(FieldFormat::delimiterString()); } f2.close(); #endif } void IMDBFetcher::doRating(const QString& str_, Tellico::Data::EntryPtr entry_) { if(!optionalFields().contains(QStringLiteral("imdb-rating"))) { return; } QRegExp divRx(QStringLiteral("<div class=\"ipl-rating-star[\\s\"]+>(.*)</div"), Qt::CaseInsensitive); divRx.setMinimal(true); if(divRx.indexIn(str_) > -1) { if(!entry_->collection()->hasField(QStringLiteral("imdb-rating"))) { Data::FieldPtr f(new Data::Field(QStringLiteral("imdb-rating"), i18n("IMDb Rating"), Data::Field::Rating)); f->setCategory(i18n("General")); f->setProperty(QStringLiteral("maximum"), QStringLiteral("10")); entry_->collection()->addField(f); } QString text = divRx.cap(0); text.remove(*s_tagRx); QRegExp ratingRx(QStringLiteral("\\s(\\d+.?\\d*)\\s")); if(ratingRx.indexIn(text) > -1) { bool ok; float value = ratingRx.cap(1).toFloat(&ok); if(!ok) { value = QLocale().toFloat(ratingRx.cap(1), &ok); } if(ok) { entry_->setField(QStringLiteral("imdb-rating"), QString::number(value)); } } } } void IMDBFetcher::doCover(const QString& str_, Tellico::Data::EntryPtr entry_, const QUrl& baseURL_) { QRegExp imgRx(QStringLiteral("<img\\s+[^>]*src\\s*=\\s*\"([^\"]*)\"[^>]*>"), Qt::CaseInsensitive); imgRx.setMinimal(true); QRegExp posterRx(QStringLiteral("<a\\s+[^>]*name\\s*=\\s*\"poster\"[^>]*>(.*)</a>"), Qt::CaseInsensitive); posterRx.setMinimal(true); const QString cover = QStringLiteral("cover"); int pos = posterRx.indexIn(str_); while(pos > -1) { if(posterRx.cap(1).contains(imgRx)) { QUrl u = QUrl(baseURL_).resolved(QUrl(imgRx.cap(1))); QString id = ImageFactory::addImage(u, true); if(!id.isEmpty()) { entry_->setField(cover, id); return; } } pos = posterRx.indexIn(str_, pos+posterRx.matchedLength()); } // <link rel='image_src' QRegExp linkRx(QStringLiteral("<link (.*)>"), Qt::CaseInsensitive); linkRx.setMinimal(true); const QString src = QStringLiteral("image_src"); pos = linkRx.indexIn(str_); while(pos > -1) { const QString tag = linkRx.cap(1); if(tag.contains(src, Qt::CaseInsensitive)) { QRegExp hrefRx(QStringLiteral("href=['\"](.*)['\"]"), Qt::CaseInsensitive); hrefRx.setMinimal(true); if(hrefRx.indexIn(tag) > -1) { QUrl u = QUrl(baseURL_).resolved(QUrl(hrefRx.cap(1))); // imdb uses amazon media image, where the img src "encodes" requests for image sizing and cropping // strip everything after the "@." and add UY64 to limit the max image dimension to 640 int n = u.url().indexOf(QStringLiteral("@.")); if(n > -1) { const QString newLink = u.url().left(n) + QStringLiteral("@.UY640.jpg"); const QString id = ImageFactory::addImage(QUrl(newLink), true); if(!id.isEmpty()) { entry_->setField(cover, id); return; } } const QString id = ImageFactory::addImage(u, true); if(!id.isEmpty()) { entry_->setField(cover, id); return; } } } pos = linkRx.indexIn(str_, pos+linkRx.matchedLength()); } // <img alt="poster" posterRx.setPattern(QStringLiteral("<img\\s+[^>]*alt\\s*=\\s*\"poster\"[^>]+src\\s*=\\s*\"([^\"]+)\"")); pos = posterRx.indexIn(str_); if(pos > -1) { QUrl u = QUrl(baseURL_).resolved(QUrl(posterRx.cap(1))); QString id = ImageFactory::addImage(u, true); if(!id.isEmpty()) { entry_->setField(cover, id); return; } } // didn't find the cover, IMDb also used to put "cover" inside the url // cover is the img with the "cover" alt text pos = imgRx.indexIn(str_); while(pos > -1) { const QString url = imgRx.cap(0).toLower(); if(url.contains(cover)) { QUrl u = QUrl(baseURL_).resolved(QUrl(imgRx.cap(1))); QString id = ImageFactory::addImage(u, true); if(!id.isEmpty()) { entry_->setField(cover, id); return; } } pos = imgRx.indexIn(str_, pos+imgRx.matchedLength()); } } void IMDBFetcher::doLists2(const QString& str_, Tellico::Data::EntryPtr entry_) { QRegExp divInfoRx(QStringLiteral("<div class=\"info\">(.*)</div"), Qt::CaseInsensitive); divInfoRx.setMinimal(true); const LangData& data = langData(m_lang); QStringList genres, countries, langs, certs, tracks; for(int pos = divInfoRx.indexIn(str_); pos > -1; pos = divInfoRx.indexIn(str_, pos+divInfoRx.matchedLength())) { const QString text = divInfoRx.cap(1).remove(*s_tagRx); const QString tag = text.section(QLatin1Char(':'), 0, 0).simplified(); QString value = text.section(QLatin1Char(':'), 1, -1).simplified(); if(tag == data.genre) { foreach(const QString& token, value.split(QLatin1Char('|'))) { genres << token.trimmed(); } } else if(tag == data.language) { foreach(const QString& token, value.split(QRegExp(QLatin1String("[,|]")))) { langs << token.trimmed(); } } else if(tag == data.sound) { foreach(const QString& token, value.split(QLatin1Char('|'))) { tracks << token.trimmed(); } } else if(tag == data.country) { countries << value; } else if(tag == data.certification) { foreach(const QString& token, value.split(QLatin1Char('|'))) { certs << token.trimmed(); } } else if(tag == data.color) { // cut off any parentheses value = value.section(QLatin1Char('('), 0, 0).trimmed(); // change "black and white" to "black & white" value.replace(QStringLiteral("and"), QStringLiteral("&")); if(value == data.color) { entry_->setField(QStringLiteral("color"), i18n("Color")); } else { entry_->setField(QStringLiteral("color"), value); } } } entry_->setField(QStringLiteral("genre"), genres.join(FieldFormat::delimiterString())); entry_->setField(QStringLiteral("nationality"), countries.join(FieldFormat::delimiterString())); entry_->setField(QStringLiteral("language"), langs.join(FieldFormat::delimiterString())); entry_->setField(QStringLiteral("audio-track"), tracks.join(FieldFormat::delimiterString())); if(!certs.isEmpty()) { // first try to set default certification const QStringList& certsAllowed = entry_->collection()->fieldByName(QStringLiteral("certification"))->allowed(); foreach(const QString& cert, certs) { QString country = cert.section(QLatin1Char(':'), 0, 0); QString lcert = cert.section(QLatin1Char(':'), 1, 1); if(lcert == QStringLiteral("Unrated")) { lcert = QLatin1Char('U'); } lcert += QStringLiteral(" (") + country + QLatin1Char(')'); if(certsAllowed.contains(lcert)) { entry_->setField(QStringLiteral("certification"), lcert); break; } } // now add new field for all certifications const QString allc = QStringLiteral("allcertification"); if(optionalFields().contains(allc)) { Data::FieldPtr f = entry_->collection()->fieldByName(allc); if(!f) { f = new Data::Field(allc, i18n("Certifications"), Data::Field::Table); f->setFlags(Data::Field::AllowGrouped); entry_->collection()->addField(f); } entry_->setField(QStringLiteral("allcertification"), certs.join(FieldFormat::rowDelimiterString())); } } } // look at every anchor tag in the string void IMDBFetcher::doLists(const QString& str_, Tellico::Data::EntryPtr entry_) { const QString genre = QStringLiteral("/Genres/"); const QString genre2 = QStringLiteral("/genre/"); const QString country = QStringLiteral("/country/"); const QString lang = QStringLiteral("/language/"); const QString colorInfo = QStringLiteral("colors="); const QString cert = QStringLiteral("certificates="); const QString soundMix = QStringLiteral("sound_mixes="); const QString year = QStringLiteral("/Years/"); // if we reach faqs or user comments, we can stop const QString faqs = QStringLiteral("/faq"); const QString users = QStringLiteral("/user/"); // IMdb also has links with the word "sections" in them, remove that // for genres and nationalities int startPos = str_.indexOf(QStringLiteral("<div id=\"pagecontent\">")); if(startPos == -1) { startPos = 0; } QStringList genres, countries, langs, certs, tracks; for(int pos = s_anchorRx->indexIn(str_, startPos); pos > -1; pos = s_anchorRx->indexIn(str_, pos+s_anchorRx->matchedLength())) { const QString cap1 = s_anchorRx->cap(1); if(cap1.contains(genre) || cap1.contains(genre2)) { const QString g = s_anchorRx->cap(2); if(!g.contains(QStringLiteral(" section"), Qt::CaseInsensitive) && !g.contains(QStringLiteral(" genre"), Qt::CaseInsensitive)) { // ignore "Most Popular by Genre" genres += g.trimmed(); } } else if(cap1.contains(country)) { if(!s_anchorRx->cap(2).contains(QStringLiteral(" section"), Qt::CaseInsensitive)) { countries += s_anchorRx->cap(2).trimmed(); } } else if(cap1.contains(lang) && !cap1.contains(QStringLiteral("contribute"))) { langs += s_anchorRx->cap(2).trimmed(); } else if(cap1.contains(colorInfo)) { QString value = s_anchorRx->cap(2); // cut off any parentheses value = value.section(QLatin1Char('('), 0, 0).trimmed(); // change "black and white" to "black & white" value.replace(QStringLiteral("and"), QStringLiteral("&")); entry_->setField(QStringLiteral("color"), value.trimmed()); } else if(cap1.contains(cert)) { certs += s_anchorRx->cap(2).trimmed(); } else if(cap1.contains(soundMix)) { tracks += s_anchorRx->cap(2).trimmed(); // if year field wasn't set before, do it now } else if(entry_->field(QStringLiteral("year")).isEmpty() && cap1.contains(year)) { entry_->setField(QStringLiteral("year"), s_anchorRx->cap(2).trimmed()); } else if((cap1.contains(faqs) || cap1.contains(users)) && !genres.isEmpty()) { break; } } // since we have multiple genre search strings genres.removeDuplicates(); entry_->setField(QStringLiteral("genre"), genres.join(FieldFormat::delimiterString())); entry_->setField(QStringLiteral("nationality"), countries.join(FieldFormat::delimiterString())); entry_->setField(QStringLiteral("language"), langs.join(FieldFormat::delimiterString())); entry_->setField(QStringLiteral("audio-track"), tracks.join(FieldFormat::delimiterString())); if(!certs.isEmpty()) { // first try to set default certification const QStringList& certsAllowed = entry_->collection()->fieldByName(QStringLiteral("certification"))->allowed(); foreach(const QString& cert, certs) { QString country = cert.section(QLatin1Char(':'), 0, 0); if(country == QStringLiteral("United States")) { country = QStringLiteral("USA"); } QString lcert = cert.section(QLatin1Char(':'), 1, 1); if(lcert == QStringLiteral("Unrated")) { lcert = QLatin1Char('U'); } lcert += QStringLiteral(" (") + country + QLatin1Char(')'); if(certsAllowed.contains(lcert)) { entry_->setField(QStringLiteral("certification"), lcert); break; } } // now add new field for all certifications const QString allc = QStringLiteral("allcertification"); if(optionalFields().contains(allc)) { Data::FieldPtr f = entry_->collection()->fieldByName(allc); if(!f) { f = new Data::Field(allc, i18n("Certifications"), Data::Field::Table); f->setFlags(Data::Field::AllowGrouped); entry_->collection()->addField(f); } entry_->setField(QStringLiteral("allcertification"), certs.join(FieldFormat::rowDelimiterString())); } } } Tellico::Fetch::FetchRequest IMDBFetcher::updateRequest(Data::EntryPtr entry_) { const QString t = entry_->field(QStringLiteral("title")); QUrl link = QUrl::fromUserInput(entry_->field(QStringLiteral("imdb"))); if(!link.isEmpty() && link.isValid()) { if(link.host() != m_host) { // myLog() << "switching hosts to " << m_host; link.setHost(m_host); } return FetchRequest(Fetch::Raw, link.url()); } // optimistically try searching for title and rely on Collection::sameEntry() to figure things out if(!t.isEmpty()) { return FetchRequest(Fetch::Title, t); } return FetchRequest(); } QString IMDBFetcher::defaultName() { return i18n("Internet Movie Database"); } QString IMDBFetcher::defaultIcon() { return favIcon("https://www.imdb.com"); } //static Tellico::StringHash IMDBFetcher::allOptionalFields() { StringHash hash; hash[QStringLiteral("imdb")] = i18n("IMDb Link"); hash[QStringLiteral("imdb-rating")] = i18n("IMDb Rating"); hash[QStringLiteral("alttitle")] = i18n("Alternative Titles"); hash[QStringLiteral("allcertification")] = i18n("Certifications"); hash[QStringLiteral("origtitle")] = i18n("Original Title"); return hash; } Tellico::Fetch::ConfigWidget* IMDBFetcher::configWidget(QWidget* parent_) const { return new IMDBFetcher::ConfigWidget(parent_, this); } IMDBFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const IMDBFetcher* fetcher_/*=0*/) : Fetch::ConfigWidget(parent_) { QGridLayout* l = new QGridLayout(optionsWidget()); l->setSpacing(4); l->setColumnStretch(1, 10); int row = -1; /* IMDB.fr and others now redirects to imdb.com QLabel* label = new QLabel(i18n("Country: "), optionsWidget()); l->addWidget(label, ++row, 0); m_langCombo = new GUI::ComboBox(optionsWidget()); m_langCombo->addItem(i18n("United States"), EN); m_langCombo->addItem(i18n("France"), FR); m_langCombo->addItem(i18n("Spain"), ES); m_langCombo->addItem(i18n("Germany"), DE); m_langCombo->addItem(i18n("Italy"), IT); m_langCombo->addItem(i18n("Portugal"), PT); connect(m_langCombo, SIGNAL(activated(int)), SLOT(slotSetModified())); connect(m_langCombo, SIGNAL(activated(int)), SLOT(slotSiteChanged())); l->addWidget(m_langCombo, row, 1); QString w = i18n("The Internet Movie Database provides data from several different localized sites. " "Choose the one you wish to use for this data source."); label->setWhatsThis(w); m_langCombo->setWhatsThis(w); label->setBuddy(m_langCombo); */ QLabel* label = new QLabel(i18n("&Maximum cast: "), optionsWidget()); l->addWidget(label, ++row, 0); m_numCast = new QSpinBox(optionsWidget()); m_numCast->setMaximum(99); m_numCast->setMinimum(0); m_numCast->setValue(10); #if (QT_VERSION < QT_VERSION_CHECK(5, 14, 0)) void (QSpinBox::* textChanged)(const QString&) = &QSpinBox::valueChanged; #else void (QSpinBox::* textChanged)(const QString&) = &QSpinBox::textChanged; #endif connect(m_numCast, textChanged, this, &ConfigWidget::slotSetModified); l->addWidget(m_numCast, row, 1); QString w = i18n("The list of cast members may include many people. Set the maximum number returned from the search."); label->setWhatsThis(w); m_numCast->setWhatsThis(w); label->setBuddy(m_numCast); m_fetchImageCheck = new QCheckBox(i18n("Download cover &image"), optionsWidget()); connect(m_fetchImageCheck, &QAbstractButton::clicked, this, &ConfigWidget::slotSetModified); ++row; l->addWidget(m_fetchImageCheck, row, 0, 1, 2); w = i18n("The cover image may be downloaded as well. However, too many large images in the " "collection may degrade performance."); m_fetchImageCheck->setWhatsThis(w); l->setRowStretch(++row, 10); // now add additional fields widget addFieldsWidget(IMDBFetcher::allOptionalFields(), fetcher_ ? fetcher_->optionalFields() : QStringList()); KAcceleratorManager::manage(optionsWidget()); if(fetcher_) { // m_langCombo->setCurrentData(fetcher_->m_lang); m_numCast->setValue(fetcher_->m_numCast); m_fetchImageCheck->setChecked(fetcher_->m_fetchImages); } else { //defaults // m_langCombo->setCurrentData(EN); m_numCast->setValue(10); m_fetchImageCheck->setChecked(true); } } void IMDBFetcher::ConfigWidget::saveConfigHook(KConfigGroup& config_) { // int n = m_langCombo->currentData().toInt(); // config_.writeEntry("Lang", n); config_.writeEntry("Host", QString()); // clear old host entry config_.writeEntry("Max Cast", m_numCast->value()); config_.writeEntry("Fetch Images", m_fetchImageCheck->isChecked()); } QString IMDBFetcher::ConfigWidget::preferredName() const { // return IMDBFetcher::langData(m_langCombo->currentData().toInt()).siteTitle; return IMDBFetcher::langData(EN).siteTitle; } void IMDBFetcher::ConfigWidget::slotSiteChanged() { emit signalName(preferredName()); }