diff --git a/CMakeLists.txt b/CMakeLists.txt index 1c090ee4..9cef39b2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,200 +1,200 @@ cmake_minimum_required(VERSION 3.7.2) if(POLICY CMP0048) cmake_policy(SET CMP0048 NEW) endif(POLICY CMP0048) project( kbibtex VERSION 0.9.50 LANGUAGES CXX ) set(CMAKE_CXX_STANDARD 11) set(QT_MIN_VERSION 5.9.0) # Somewhat arbitrary chosen version number ... set(KF5_MIN_VERSION 5.51) find_package(ECM ${KF5_MIN_VERSION} REQUIRED NO_MODULE) set( CMAKE_MODULE_PATH ${ECM_MODULE_PATH} ${ECM_KDE_MODULE_DIR} ${CMAKE_MODULE_PATH} ) set(KDE_INSTALL_DIRS_NO_DEPRECATED TRUE) include(KDEInstallDirs) include(KDECompilerSettings NO_POLICY_SCOPE) include(KDECMakeSettings) include(ECMGenerateHeaders) include(ECMInstallIcons) include(ECMSetupVersion) include(ECMAddAppIcon) include(GenerateExportHeader) include(ECMQtDeclareLoggingCategory) ecm_setup_version( PROJECT VARIABLE_PREFIX KBIBTEX SOVERSION ${KBIBTEX_VERSION_MAJOR} VERSION_HEADER "${CMAKE_BINARY_DIR}/kbibtex-version.h" PACKAGE_VERSION_FILE "${CMAKE_BINARY_DIR}/KBibTeXConfigVersion.cmake" ) install( FILES ${CMAKE_BINARY_DIR}/kbibtex-version.h DESTINATION ${KDE_INSTALL_INCLUDEDIR}/KBibTeX # FIXME is "Devel" standard? COMPONENT Devel ) if("${KBIBTEX_VERSION_PATCH}" STREQUAL "") # Patch level is not set for version numbers like "0.9", # so set the patch level manually to 0 set(KBIBTEX_VERSION_PATCH 0) endif() if((${KBIBTEX_VERSION_PATCH} GREATER 50) OR (${KBIBTEX_VERSION_PATCH} EQUAL 50)) # If the version number indicates a pre-release version such as # '0.7.90', i.e. a beta version for the major release 0.8, # increment release version from 0.7 to 0.8 math(EXPR KBIBTEX_RELEASE_VERSION_MINOR "${KBIBTEX_VERSION_MINOR} + 1") set( KBIBTEX_RELEASE_VERSION ${KBIBTEX_VERSION_MAJOR}.${KBIBTEX_RELEASE_VERSION_MINOR} ) else() set( KBIBTEX_RELEASE_VERSION ${KBIBTEX_VERSION_MAJOR}.${KBIBTEX_VERSION_MINOR} ) endif() option( UNITY_BUILD - "Compile multiple C++ files in one big, merged file (\"Unity build\")\nSee also http://t-fischer.dreamwidth.org/3054.html" + "Compile multiple C++ files in one big, merged file (\"Unity build\")\nSee also https://t-fischer.dreamwidth.org/3054.html" ) if(UNITY_BUILD) message(STATUS "Unity build enabled") else(UNITY_BUILD) message(STATUS "Unity build disabled (default), use option UNITY_BUILD to enable it") endif(UNITY_BUILD) find_package( Qt5 ${QT_MIN_VERSION} CONFIG COMPONENTS Core Widgets Network XmlPatterns Concurrent NetworkAuth OPTIONAL_COMPONENTS WebEngineWidgets WebKitWidgets Test ) add_definitions(-DHAVE_QTWIDGETS) find_package( KF5 ${KF5_MIN_VERSION} MODULE REQUIRED I18n XmlGui KIO IconThemes Parts CoreAddons Service Wallet Crash DocTools TextEditor ) add_definitions(-DHAVE_KF5) find_package( Poppler MODULE REQUIRED Qt5 ) find_package( ICU MODULE OPTIONAL_COMPONENTS uc i18n ) if(ICU_FOUND) add_definitions(-DHAVE_ICU) endif() option( BUILD_TESTING "Build automated and interactive tests" OFF ) if (MSVC) MESSAGE( STATUS "Disabling building tests when using Microsoft Visual Studio C++ compiler" ) # Note to any developer: Try to enable building tests and see which issues you may encounter. # Examples may include: (1) char* texts which exceed the size limit supported by MSVC which # is about 2^16 bytes and (2) characters in strings written in \uXXXX notation not supported # in 1252 encoding as assumed by MSVC for C++ source files. set(BUILD_TESTING OFF) endif() if(NOT BUILD_TESTING AND Qt5Test_FOUND) message(STATUS "Testing is disabled, but can be enabled as the Qt5::Test library is available" ) endif() if(BUILD_TESTING AND NOT Qt5Test_FOUND) message(STATUS "Disabling building tests as Qt5::Test library is not available" ) set(BUILD_TESTING OFF) endif() if(BUILD_TESTING) if (WRITE_RAWDATAFILE) add_definitions(-DWRITE_RAWDATAFILE) endif(WRITE_RAWDATAFILE) set( TESTSET_DIRECTORY "" CACHE PATH "Directory where the local checkout of Git repository 'kbibtex-testset' is located" ) endif() add_subdirectory( config ) add_subdirectory( src ) add_subdirectory( xslt ) add_subdirectory( mime ) if(KF5DocTools_FOUND) add_subdirectory(doc) endif() # macro_optional_add_subdirectory( # po # ) if (ECM_VERSION VERSION_GREATER_EQUAL "5.59.0") install(FILES kbibtex.categories DESTINATION ${KDE_INSTALL_LOGGINGCATEGORIESDIR}) else() install(FILES kbibtex.categories DESTINATION ${KDE_INSTALL_CONFDIR}) endif() feature_summary(WHAT ALL INCLUDE_QUIET_PACKAGES FATAL_ON_MISSING_REQUIRED_PACKAGES) diff --git a/src/getgit.cmake b/src/getgit.cmake index cdf93450..8c1b28c0 100644 --- a/src/getgit.cmake +++ b/src/getgit.cmake @@ -1,171 +1,171 @@ ############################################################################# # Copyright (C) 2004-2019 by Thomas Fischer # # # # This program is free software; you can redistribute it and/or modify # # it under the terms of the GNU General Public License as published by # # the Free Software Foundation; either version 2 of the License, or # # (at your option) any later version. # # # # This program is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # # GNU General Public License for more details. # # # # You should have received a copy of the GNU General Public License # # along with this program; if not, see . # ############################################################################# # Inspired by: -# http://stackoverflow.com/questions/3780667/use-cmake-to-get-build-time-svn-revision +# https://stackoverflow.com/questions/3780667/use-cmake-to-get-build-time-svn-revision if(DEFINED ENV{GIT_REV} AND DEFINED ENV{GIT_BRANCH} AND NOT("${GIT_REV}" STREQUAL "" OR "${GIT_BRANCH}" STREQUAL "")) message (STATUS "Git information set by environment variables GIT_REV and GIT_BRANCH") set (GIT_REV $ENV{GIT_REV}) set (GIT_BRANCH $ENV{GIT_BRANCH}) set (GIT_COMMIT_COUNT "0") else() set(GIT_REV "") set(GIT_BRANCH "") set(GIT_COMMIT_COUNT "0") if(EXISTS ${SOURCE_DIR}/.git) # Git find_program( GIT_EXECUTABLE NAMES git.bat git ) # for Windows, "git.bat" must be found before "git" if(GIT_EXECUTABLE) execute_process ( WORKING_DIRECTORY "${SOURCE_DIR}" COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD OUTPUT_VARIABLE GIT_REV OUTPUT_STRIP_TRAILING_WHITESPACE ) execute_process ( WORKING_DIRECTORY "${SOURCE_DIR}" COMMAND ${GIT_EXECUTABLE} rev-parse --abbrev-ref HEAD OUTPUT_VARIABLE GIT_BRANCH OUTPUT_STRIP_TRAILING_WHITESPACE ) execute_process ( WORKING_DIRECTORY "${SOURCE_DIR}" COMMAND ${GIT_EXECUTABLE} rev-list --count HEAD OUTPUT_VARIABLE GIT_COMMIT_COUNT OUTPUT_STRIP_TRAILING_WHITESPACE ) else() message( "No Git executable" ) endif() else() message( "Not a Git source directory" ) endif() endif() # write a header file defining VERSION file( WRITE "${BINARY_DIR}/kbibtex-git-info.h.tmp" "/// This file has been automatically generated by 'getgit.cmake'.\n/// Do not edit or modify it.\n\n" ) file( APPEND "${BINARY_DIR}/kbibtex-git-info.h.tmp" "#ifndef KBIBTEX_GIT_INFO_H\n" ) file( APPEND "${BINARY_DIR}/kbibtex-git-info.h.tmp" "#define KBIBTEX_GIT_INFO_H\n" ) if("${GIT_REV}" STREQUAL "" OR "${GIT_BRANCH}" STREQUAL "") set(GIT_REV "") set(GIT_BRANCH "") set(GIT_COMMIT_COUNT "0") message( STATUS "Source does not come from a Git checkout or determining the Git revision or branch failed" ) file( APPEND "${BINARY_DIR}/kbibtex-git-info.h.tmp" "/// This source code does not come from a Git checkout or\n/// determining the Git revision or branch failed.\n/// Please consider setting environment variables GIT_REV and\n/// GIT_BRANCH before running the build tool (make, ninja, ...).\n\n" ) else() message( STATUS "Git revision is " ${GIT_REV} "\nGit branch is " ${GIT_BRANCH} "\nGit commit count is " ${GIT_COMMIT_COUNT} ) endif() file( APPEND "${BINARY_DIR}/kbibtex-git-info.h.tmp" "#define KBIBTEX_GIT_REV_STRING \"${GIT_REV}\"\n" ) file( APPEND "${BINARY_DIR}/kbibtex-git-info.h.tmp" "#define KBIBTEX_GIT_BRANCH_STRING \"${GIT_BRANCH}\"\n" ) file( APPEND "${BINARY_DIR}/kbibtex-git-info.h.tmp" "#define KBIBTEX_GIT_COMMIT_COUNT ${GIT_COMMIT_COUNT}\n" ) if("${GIT_REV}" STREQUAL "" OR "${GIT_BRANCH}" STREQUAL "") file( APPEND "${BINARY_DIR}/kbibtex-git-info.h.tmp" "#define KBIBTEX_GIT_INFO_STRING \"\"\n" ) else() file( APPEND "${BINARY_DIR}/kbibtex-git-info.h.tmp" "#define KBIBTEX_GIT_INFO_STRING \"${GIT_REV} (${GIT_BRANCH}, ${GIT_COMMIT_COUNT} commits in history)\"\n" ) endif() file( APPEND "${BINARY_DIR}/kbibtex-git-info.h.tmp" "#endif // KBIBTEX_GIT_INFO_H\n" ) if( EXISTS "${BINARY_DIR}/kbibtex-git-info.h.tmp" ) execute_process( COMMAND ${CMAKE_COMMAND} -E copy_if_different "kbibtex-git-info.h.tmp" "kbibtex-git-info.h" WORKING_DIRECTORY "${BINARY_DIR}" ) execute_process( COMMAND ${CMAKE_COMMAND} -E remove "kbibtex-git-info.h.tmp" WORKING_DIRECTORY "${BINARY_DIR}" ) else() message( STATUS "${BINARY_DIR}/kbibtex-git-info.h.tmp does not exist" ) endif() diff --git a/src/gui/delayedexecutiontimer.cpp b/src/gui/delayedexecutiontimer.cpp index b084c9f7..239174ab 100644 --- a/src/gui/delayedexecutiontimer.cpp +++ b/src/gui/delayedexecutiontimer.cpp @@ -1,77 +1,77 @@ // krazy:excludeall=copyright,license /* Copyright (c) 2011, Andre Somers All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the Rathenau Instituut, Andre Somers nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANDRE SOMERS AND/OR RATHENAU INSTITUTE BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* -Found at http://qt-project.org/wiki/Delay_action_to_wait_for_user_interaction +Found at https://wiki.qt.io/Delay_action_to_wait_for_user_interaction modified/simplified by Thomas Fischer */ #include "delayedexecutiontimer.h" #include DelayedExecutionTimer::DelayedExecutionTimer(int maximumDelay, int minimumDelay, QObject *parent): QObject(parent), m_isEnabled(true), m_minimumDelay(minimumDelay), m_maximumDelay(maximumDelay), m_minimumTimer(new QTimer(this)), m_maximumTimer(new QTimer(this)) { connect(m_minimumTimer, &QTimer::timeout, this, &DelayedExecutionTimer::timeout); connect(m_maximumTimer, &QTimer::timeout, this, &DelayedExecutionTimer::timeout); } DelayedExecutionTimer::DelayedExecutionTimer(QObject *parent): QObject(parent), m_isEnabled(true), m_minimumDelay(250), m_maximumDelay(1000), m_minimumTimer(new QTimer(this)), m_maximumTimer(new QTimer(this)) { connect(m_minimumTimer, &QTimer::timeout, this, &DelayedExecutionTimer::timeout); connect(m_maximumTimer, &QTimer::timeout, this, &DelayedExecutionTimer::timeout); } void DelayedExecutionTimer::timeout() { m_minimumTimer->stop(); m_maximumTimer->stop(); emit triggered(); } void DelayedExecutionTimer::trigger() { if (!m_isEnabled) return; /// ignore trigger events if disabled if (!m_maximumTimer->isActive()) { m_maximumTimer->start(m_maximumDelay); } m_minimumTimer->stop(); m_minimumTimer->start(m_minimumDelay); } diff --git a/src/gui/delayedexecutiontimer.h b/src/gui/delayedexecutiontimer.h index 2763c2f7..0fad39ae 100644 --- a/src/gui/delayedexecutiontimer.h +++ b/src/gui/delayedexecutiontimer.h @@ -1,97 +1,97 @@ // krazy:excludeall=copyright,license /* Copyright (c) 2011, Andre Somers All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the Rathenau Instituut, Andre Somers nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANDRE SOMERS AND/OR RATHENAU INSTITUTE BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* -Found at http://qt-project.org/wiki/Delay_action_to_wait_for_user_interaction +Found at https://wiki.qt.io/Delay_action_to_wait_for_user_interaction modified/simplified by Thomas Fischer */ #ifndef DELAYEDEXECUTIONTIMER_H #define DELAYEDEXECUTIONTIMER_H #include class QTimer; /** Class to delay execution an action in response to events that may come in bursts */ class DelayedExecutionTimer : public QObject { Q_OBJECT public: explicit DelayedExecutionTimer(int maximumDelay = 1000, int minimumDelay = 250, QObject *parent = nullptr); explicit DelayedExecutionTimer(QObject *parent); /** The minimum delay is the time the class will wait after being triggered before emitting the triggered() signals. */ void setMinimumDelay(int delay) { m_minimumDelay = delay; } int minimumDelay() const { return m_minimumDelay; } /** The maximum delay is the maximum time that will pass before a call to the trigger() slot leads to a triggered() signal. */ void setMaximumDelay(int delay) { m_maximumDelay = delay; } int maximumDelay() const { return m_maximumDelay; } /** * Toggle if this timer is reacting on trigger signals. * This timer may still send out trigger events itself. * @param isEnabled timer will react if set to true */ void setEnabled(bool isEnabled) { m_isEnabled = isEnabled; } signals: void triggered(); public slots: void trigger(); private slots: void timeout(); private: bool m_isEnabled; int m_minimumDelay; int m_maximumDelay; QTimer *m_minimumTimer; QTimer *m_maximumTimer; }; #endif // DELAYEDEXECUTIONTIMER_H diff --git a/src/io/encoderlatex.cpp b/src/io/encoderlatex.cpp index 575d8ffd..75170269 100644 --- a/src/io/encoderlatex.cpp +++ b/src/io/encoderlatex.cpp @@ -1,1463 +1,1463 @@ /*************************************************************************** * Copyright (C) 2004-2019 by Thomas Fischer * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, see . * ***************************************************************************/ #include "encoderlatex.h" #include #include "logging_io.h" inline bool isAsciiLetter(const QChar c) { return (c.unicode() >= static_cast('A') && c.unicode() <= static_cast('Z')) || (c.unicode() >= static_cast('a') && c.unicode() <= static_cast('z')); } inline int asciiLetterOrDigitToPos(const QChar c) { static const ushort upperCaseLetterA = QLatin1Char('A').unicode(); static const ushort upperCaseLetterZ = QLatin1Char('Z').unicode(); static const ushort lowerCaseLetterA = QLatin1Char('a').unicode(); static const ushort lowerCaseLetterZ = QLatin1Char('z').unicode(); static const ushort digit0 = QLatin1Char('0').unicode(); static const ushort digit9 = QLatin1Char('9').unicode(); const ushort unicode = c.unicode(); if (unicode >= upperCaseLetterA && unicode <= upperCaseLetterZ) return unicode - upperCaseLetterA; else if (unicode >= lowerCaseLetterA && unicode <= lowerCaseLetterZ) return unicode + 26 - lowerCaseLetterA; else if (unicode >= digit0 && unicode <= digit9) return unicode + 52 - digit0; else return -1; } inline bool isIJ(const QChar c) { static const QChar upperCaseLetterI = QLatin1Char('I'); static const QChar upperCaseLetterJ = QLatin1Char('J'); static const QChar lowerCaseLetterI = QLatin1Char('i'); static const QChar lowerCaseLetterJ = QLatin1Char('j'); return c == upperCaseLetterI || c == upperCaseLetterJ || c == lowerCaseLetterI || c == lowerCaseLetterJ; } enum EncoderLaTeXCommandDirection { DirectionCommandToUnicode = 1, DirectionUnicodeToCommand = 2, DirectionBoth = DirectionCommandToUnicode | DirectionUnicodeToCommand }; /** * General documentation on this topic: - * http://www.tex.ac.uk/CTAN/macros/latex/doc/encguide.pdf + * https://www.latex-project.org/help/documentation/encguide.pdf * https://mirror.hmc.edu/ctan/macros/xetex/latex/xecjk/xunicode-symbols.pdf * ftp://ftp.dante.de/tex-archive/biblio/biber/documentation/utf8-macro-map.html */ /** * This structure contains information how escaped characters * such as \"a are translated to an Unicode character and back. * The structure is a table with three columns: (1) the modifier * (in the example before the quotation mark) (2) the ASCII * character ((in the example before the 'a') (3) the Unicode * character described by a hexcode. * This data structure is used both directly and indirectly via * the LookupTable structure which is initialized when the * EncoderLaTeX object is created. */ static const struct EncoderLaTeXEscapedCharacter { const QChar modifier; const QChar letter; const ushort unicode; const EncoderLaTeXCommandDirection direction; } encoderLaTeXEscapedCharacters[] = { {QLatin1Char('`'), QLatin1Char('A'), 0x00C0, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('A'), 0x00C1, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('A'), 0x00C2, DirectionBoth}, {QLatin1Char('~'), QLatin1Char('A'), 0x00C3, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('A'), 0x00C4, DirectionBoth}, {QLatin1Char('r'), QLatin1Char('A'), 0x00C5, DirectionBoth}, /** 0x00C6: see EncoderLaTeXCharacterCommand */ {QLatin1Char('c'), QLatin1Char('C'), 0x00C7, DirectionBoth}, {QLatin1Char('`'), QLatin1Char('E'), 0x00C8, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('E'), 0x00C9, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('E'), 0x00CA, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('E'), 0x00CB, DirectionBoth}, {QLatin1Char('`'), QLatin1Char('I'), 0x00CC, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('I'), 0x00CD, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('I'), 0x00CE, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('I'), 0x00CF, DirectionBoth}, /** 0x00D0: see EncoderLaTeXCharacterCommand */ {QLatin1Char('~'), QLatin1Char('N'), 0x00D1, DirectionBoth}, {QLatin1Char('`'), QLatin1Char('O'), 0x00D2, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('O'), 0x00D3, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('O'), 0x00D4, DirectionBoth}, {QLatin1Char('~'), QLatin1Char('O'), 0x00D5, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('O'), 0x00D6, DirectionBoth}, /** 0x00D7: see EncoderLaTeXCharacterCommand */ /** 0x00D8: see EncoderLaTeXCharacterCommand */ {QLatin1Char('`'), QLatin1Char('U'), 0x00D9, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('U'), 0x00DA, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('U'), 0x00DB, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('U'), 0x00DC, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('Y'), 0x00DD, DirectionBoth}, /** 0x00DE: see EncoderLaTeXCharacterCommand */ {QLatin1Char('"'), QLatin1Char('s'), 0x00DF, DirectionBoth}, {QLatin1Char('`'), QLatin1Char('a'), 0x00E0, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('a'), 0x00E1, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('a'), 0x00E2, DirectionBoth}, {QLatin1Char('~'), QLatin1Char('a'), 0x00E3, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('a'), 0x00E4, DirectionBoth}, {QLatin1Char('r'), QLatin1Char('a'), 0x00E5, DirectionBoth}, /** 0x00E6: see EncoderLaTeXCharacterCommand */ {QLatin1Char('c'), QLatin1Char('c'), 0x00E7, DirectionBoth}, {QLatin1Char('`'), QLatin1Char('e'), 0x00E8, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('e'), 0x00E9, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('e'), 0x00EA, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('e'), 0x00EB, DirectionBoth}, {QLatin1Char('`'), QLatin1Char('i'), 0x00EC, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('i'), 0x00ED, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('i'), 0x00EE, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('i'), 0x00EF, DirectionBoth}, /** 0x00F0: see EncoderLaTeXCharacterCommand */ {QLatin1Char('~'), QLatin1Char('n'), 0x00F1, DirectionBoth}, {QLatin1Char('`'), QLatin1Char('o'), 0x00F2, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('o'), 0x00F3, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('o'), 0x00F4, DirectionBoth}, {QLatin1Char('~'), QLatin1Char('o'), 0x00F5, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('o'), 0x00F6, DirectionBoth}, /** 0x00F7: see EncoderLaTeXCharacterCommand */ /** 0x00F8: see EncoderLaTeXCharacterCommand */ {QLatin1Char('`'), QLatin1Char('u'), 0x00F9, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('u'), 0x00FA, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('u'), 0x00FB, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('u'), 0x00FC, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('y'), 0x00FD, DirectionBoth}, /** 0x00FE: see EncoderLaTeXCharacterCommand */ {QLatin1Char('"'), QLatin1Char('y'), 0x00FF, DirectionBoth}, {QLatin1Char('='), QLatin1Char('A'), 0x0100, DirectionBoth}, {QLatin1Char('='), QLatin1Char('a'), 0x0101, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('A'), 0x0102, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('a'), 0x0103, DirectionBoth}, {QLatin1Char('k'), QLatin1Char('A'), 0x0104, DirectionBoth}, {QLatin1Char('k'), QLatin1Char('a'), 0x0105, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('C'), 0x0106, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('c'), 0x0107, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('C'), 0x0108, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('c'), 0x0109, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('C'), 0x010A, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('c'), 0x010B, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('C'), 0x010C, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('c'), 0x010D, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('D'), 0x010E, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('d'), 0x010F, DirectionBoth}, {QLatin1Char('B'), QLatin1Char('D'), 0x0110, DirectionCommandToUnicode}, {QLatin1Char('B'), QLatin1Char('d'), 0x0111, DirectionCommandToUnicode}, {QLatin1Char('='), QLatin1Char('E'), 0x0112, DirectionBoth}, {QLatin1Char('='), QLatin1Char('e'), 0x0113, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('E'), 0x0114, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('e'), 0x0115, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('E'), 0x0116, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('e'), 0x0117, DirectionBoth}, {QLatin1Char('k'), QLatin1Char('E'), 0x0118, DirectionBoth}, {QLatin1Char('k'), QLatin1Char('e'), 0x0119, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('E'), 0x011A, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('e'), 0x011B, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('G'), 0x011C, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('g'), 0x011D, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('G'), 0x011E, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('g'), 0x011F, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('G'), 0x0120, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('g'), 0x0121, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('G'), 0x0122, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('g'), 0x0123, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('H'), 0x0124, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('h'), 0x0125, DirectionBoth}, {QLatin1Char('B'), QLatin1Char('H'), 0x0126, DirectionCommandToUnicode}, {QLatin1Char('B'), QLatin1Char('h'), 0x0127, DirectionCommandToUnicode}, {QLatin1Char('~'), QLatin1Char('I'), 0x0128, DirectionBoth}, {QLatin1Char('~'), QLatin1Char('i'), 0x0129, DirectionBoth}, {QLatin1Char('='), QLatin1Char('I'), 0x012A, DirectionBoth}, {QLatin1Char('='), QLatin1Char('i'), 0x012B, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('I'), 0x012C, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('i'), 0x012D, DirectionBoth}, {QLatin1Char('k'), QLatin1Char('I'), 0x012E, DirectionBoth}, {QLatin1Char('k'), QLatin1Char('i'), 0x012F, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('I'), 0x0130, DirectionBoth}, /** 0x0131: see EncoderLaTeXCharacterCommand */ /** 0x0132: see EncoderLaTeXCharacterCommand */ /** 0x0133: see EncoderLaTeXCharacterCommand */ {QLatin1Char('^'), QLatin1Char('J'), 0x012E, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('j'), 0x012F, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('K'), 0x0136, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('k'), 0x0137, DirectionBoth}, /** 0x0138: see EncoderLaTeXCharacterCommand */ {QLatin1Char('\''), QLatin1Char('L'), 0x0139, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('l'), 0x013A, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('L'), 0x013B, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('l'), 0x013C, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('L'), 0x013D, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('l'), 0x013E, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('L'), 0x013F, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('l'), 0x0140, DirectionBoth}, {QLatin1Char('B'), QLatin1Char('L'), 0x0141, DirectionCommandToUnicode}, {QLatin1Char('B'), QLatin1Char('l'), 0x0142, DirectionCommandToUnicode}, {QLatin1Char('\''), QLatin1Char('N'), 0x0143, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('n'), 0x0144, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('n'), 0x0145, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('n'), 0x0146, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('N'), 0x0147, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('n'), 0x0148, DirectionBoth}, /** 0x0149: TODO n preceded by apostrophe */ {QLatin1Char('m'), QLatin1Char('N'), 0x014A, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('n'), 0x014B, DirectionCommandToUnicode}, {QLatin1Char('='), QLatin1Char('O'), 0x014C, DirectionBoth}, {QLatin1Char('='), QLatin1Char('o'), 0x014D, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('O'), 0x014E, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('o'), 0x014F, DirectionBoth}, {QLatin1Char('H'), QLatin1Char('O'), 0x0150, DirectionBoth}, {QLatin1Char('H'), QLatin1Char('o'), 0x0151, DirectionBoth}, /** 0x0152: see EncoderLaTeXCharacterCommand */ /** 0x0153: see EncoderLaTeXCharacterCommand */ {QLatin1Char('\''), QLatin1Char('R'), 0x0154, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('r'), 0x0155, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('R'), 0x0156, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('r'), 0x0157, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('R'), 0x0158, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('r'), 0x0159, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('S'), 0x015A, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('s'), 0x015B, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('S'), 0x015C, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('s'), 0x015D, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('S'), 0x015E, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('s'), 0x015F, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('S'), 0x0160, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('s'), 0x0161, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('T'), 0x0162, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('t'), 0x0163, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('T'), 0x0164, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('t'), 0x0165, DirectionBoth}, {QLatin1Char('B'), QLatin1Char('T'), 0x0166, DirectionCommandToUnicode}, {QLatin1Char('B'), QLatin1Char('t'), 0x0167, DirectionCommandToUnicode}, {QLatin1Char('~'), QLatin1Char('U'), 0x0168, DirectionBoth}, {QLatin1Char('~'), QLatin1Char('u'), 0x0169, DirectionBoth}, {QLatin1Char('='), QLatin1Char('U'), 0x016A, DirectionBoth}, {QLatin1Char('='), QLatin1Char('u'), 0x016B, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('U'), 0x016C, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('u'), 0x016D, DirectionBoth}, {QLatin1Char('r'), QLatin1Char('U'), 0x016E, DirectionBoth}, {QLatin1Char('r'), QLatin1Char('u'), 0x016F, DirectionBoth}, {QLatin1Char('H'), QLatin1Char('U'), 0x0170, DirectionBoth}, {QLatin1Char('H'), QLatin1Char('u'), 0x0171, DirectionBoth}, {QLatin1Char('k'), QLatin1Char('U'), 0x0172, DirectionBoth}, {QLatin1Char('k'), QLatin1Char('u'), 0x0173, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('W'), 0x0174, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('w'), 0x0175, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('Y'), 0x0176, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('y'), 0x0177, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('Y'), 0x0178, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('Z'), 0x0179, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('z'), 0x017A, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('Z'), 0x017B, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('z'), 0x017C, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('Z'), 0x017D, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('z'), 0x017E, DirectionBoth}, /** 0x017F: TODO long s */ {QLatin1Char('B'), QLatin1Char('b'), 0x0180, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('B'), 0x0181, DirectionCommandToUnicode}, /** 0x0182 */ /** 0x0183 */ /** 0x0184 */ /** 0x0185 */ {QLatin1Char('m'), QLatin1Char('O'), 0x0186, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('C'), 0x0187, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('c'), 0x0188, DirectionCommandToUnicode}, {QLatin1Char('M'), QLatin1Char('D'), 0x0189, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('D'), 0x018A, DirectionCommandToUnicode}, /** 0x018B */ /** 0x018C */ /** 0x018D */ {QLatin1Char('M'), QLatin1Char('E'), 0x018E, DirectionCommandToUnicode}, /** 0x018F */ {QLatin1Char('m'), QLatin1Char('E'), 0x0190, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('F'), 0x0191, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('f'), 0x0192, DirectionCommandToUnicode}, /** 0x0193 */ {QLatin1Char('m'), QLatin1Char('G'), 0x0194, DirectionCommandToUnicode}, /** 0x0195: see EncoderLaTeXCharacterCommand */ {QLatin1Char('m'), QLatin1Char('I'), 0x0196, DirectionCommandToUnicode}, {QLatin1Char('B'), QLatin1Char('I'), 0x0197, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('K'), 0x0198, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('k'), 0x0199, DirectionCommandToUnicode}, {QLatin1Char('B'), QLatin1Char('l'), 0x019A, DirectionCommandToUnicode}, /** 0x019B */ /** 0x019C */ {QLatin1Char('m'), QLatin1Char('J'), 0x019D, DirectionCommandToUnicode}, /** 0x019E */ /** 0x019F */ /** 0x01A0 */ /** 0x01A1 */ /** 0x01A2 */ /** 0x01A3 */ {QLatin1Char('m'), QLatin1Char('P'), 0x01A4, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('p'), 0x01A5, DirectionCommandToUnicode}, /** 0x01A6 */ /** 0x01A7 */ /** 0x01A8 */ /** 0x01A9: see EncoderLaTeXCharacterCommand */ /** 0x01AA */ /** 0x01AB */ {QLatin1Char('m'), QLatin1Char('T'), 0x01AC, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('t'), 0x01AD, DirectionCommandToUnicode}, {QLatin1Char('M'), QLatin1Char('T'), 0x01AE, DirectionCommandToUnicode}, /** 0x01AF */ /** 0x01B0 */ {QLatin1Char('m'), QLatin1Char('U'), 0x01B1, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('V'), 0x01B2, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('Y'), 0x01B3, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('y'), 0x01B4, DirectionCommandToUnicode}, {QLatin1Char('B'), QLatin1Char('Z'), 0x01B5, DirectionCommandToUnicode}, {QLatin1Char('B'), QLatin1Char('z'), 0x01B6, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('Z'), 0x01B7, DirectionCommandToUnicode}, /** 0x01B8 */ /** 0x01B9 */ /** 0x01BA */ {QLatin1Char('B'), QLatin1Char('2'), 0x01BB, DirectionCommandToUnicode}, /** 0x01BC */ /** 0x01BD */ /** 0x01BE */ /** 0x01BF */ /** 0x01C0 */ /** 0x01C1 */ /** 0x01C2 */ /** 0x01C3 */ /** 0x01C4 */ /** 0x01C5 */ /** 0x01C6 */ /** 0x01C7 */ /** 0x01C8 */ /** 0x01C9 */ /** 0x01CA */ /** 0x01CB */ /** 0x01CC */ {QLatin1Char('v'), QLatin1Char('A'), 0x01CD, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('a'), 0x01CE, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('G'), 0x01E6, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('g'), 0x01E7, DirectionBoth}, {QLatin1Char('k'), QLatin1Char('O'), 0x01EA, DirectionBoth}, {QLatin1Char('k'), QLatin1Char('o'), 0x01EB, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('F'), 0x01F4, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('f'), 0x01F5, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('A'), 0x0226, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('a'), 0x0227, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('E'), 0x0228, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('e'), 0x0229, DirectionBoth}, {QLatin1Char('='), QLatin1Char('Y'), 0x0232, DirectionBoth}, {QLatin1Char('='), QLatin1Char('y'), 0x0233, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('O'), 0x022E, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('o'), 0x022F, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('B'), 0x1E02, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('b'), 0x1E03, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('B'), 0x1E04, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('b'), 0x1E05, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('D'), 0x1E0A, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('d'), 0x1E0B, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('D'), 0x1E0C, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('d'), 0x1E0D, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('D'), 0x1E10, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('d'), 0x1E11, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('E'), 0x1E1E, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('e'), 0x1E1F, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('H'), 0x1E22, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('h'), 0x1E23, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('H'), 0x1E24, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('h'), 0x1E25, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('H'), 0x1E26, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('h'), 0x1E27, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('H'), 0x1E28, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('h'), 0x1E29, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('K'), 0x1E32, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('k'), 0x1E33, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('L'), 0x1E36, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('l'), 0x1E37, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('M'), 0x1E40, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('m'), 0x1E41, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('M'), 0x1E42, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('m'), 0x1E43, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('N'), 0x1E44, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('n'), 0x1E45, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('N'), 0x1E46, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('n'), 0x1E47, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('P'), 0x1E56, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('p'), 0x1E57, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('R'), 0x1E58, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('r'), 0x1E59, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('R'), 0x1E5A, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('r'), 0x1E5B, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('S'), 0x1E60, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('s'), 0x1E61, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('S'), 0x1E62, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('s'), 0x1E63, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('T'), 0x1E6A, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('t'), 0x1E6B, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('T'), 0x1E6C, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('t'), 0x1E6D, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('V'), 0x1E7E, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('v'), 0x1E7F, DirectionBoth}, {QLatin1Char('`'), QLatin1Char('W'), 0x1E80, DirectionBoth}, {QLatin1Char('`'), QLatin1Char('w'), 0x1E81, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('W'), 0x1E82, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('w'), 0x1E83, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('W'), 0x1E84, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('w'), 0x1E85, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('W'), 0x1E86, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('w'), 0x1E87, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('W'), 0x1E88, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('w'), 0x1E88, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('X'), 0x1E8A, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('x'), 0x1E8B, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('X'), 0x1E8C, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('x'), 0x1E8D, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('Y'), 0x1E8E, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('y'), 0x1E8F, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('Z'), 0x1E92, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('z'), 0x1E93, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('t'), 0x1E97, DirectionBoth}, {QLatin1Char('r'), QLatin1Char('w'), 0x1E98, DirectionBoth}, {QLatin1Char('r'), QLatin1Char('y'), 0x1E99, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('A'), 0x1EA0, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('a'), 0x1EA1, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('E'), 0x1EB8, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('e'), 0x1EB9, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('I'), 0x1ECA, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('i'), 0x1ECB, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('O'), 0x1ECC, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('o'), 0x1ECD, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('U'), 0x1EE4, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('u'), 0x1EE5, DirectionBoth}, {QLatin1Char('`'), QLatin1Char('Y'), 0x1EF2, DirectionBoth}, {QLatin1Char('`'), QLatin1Char('y'), 0x1EF3, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('Y'), 0x1EF4, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('y'), 0x1EF5, DirectionBoth}, {QLatin1Char('r'), QLatin1Char('q'), 0x2019, DirectionCommandToUnicode} ///< tricky: this is \rq }; /** * This structure contains information on the usage of dotless i * and dotless j in combination with accent-like modifiers. * Combinations such as \"{\i} are translated to an Unicode character * and back. The structure is a table with three columns: (1) the * modified (in the example before the quotation mark) (2) the ASCII * character (in the example before the 'i') (3) the Unicode * character described by a hexcode. */ // TODO other cases of \i and \j? static const struct DotlessIJCharacter { const QChar modifier; const QChar letter; const ushort unicode; const EncoderLaTeXCommandDirection direction; } dotlessIJCharacters[] = { {QLatin1Char('`'), QLatin1Char('i'), 0x00EC, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('i'), 0x00ED, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('i'), 0x00EE, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('i'), 0x00EF, DirectionBoth}, {QLatin1Char('~'), QLatin1Char('i'), 0x0129, DirectionBoth}, {QLatin1Char('='), QLatin1Char('i'), 0x012B, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('i'), 0x012D, DirectionBoth}, {QLatin1Char('k'), QLatin1Char('i'), 0x012F, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('j'), 0x0135, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('i'), 0x01D0, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('j'), 0x01F0, DirectionBoth}, {QLatin1Char('G'), QLatin1Char('i'), 0x0209, DirectionCommandToUnicode} }; /** * This lookup allows to quickly find hits in the * EncoderLaTeXEscapedCharacter table. This data structure here * consists of a number of rows. Each row consists of a * modifier (like '"' or 'v') and an array of Unicode chars. * Letters 'A'..'Z','a'..'z','0'..'9' are used as index to this * array by invocing asciiLetterOrDigitToPos(). * This data structure is built in the constructor. */ static const int lookupTableNumModifiers = 32; static const int lookupTableNumCharacters = 26 * 2 + 10; static struct EncoderLaTeXEscapedCharacterLookupTableRow { QChar modifier; QChar unicode[lookupTableNumCharacters]; } *lookupTable[lookupTableNumModifiers]; /** * This data structure keeps track of math commands, which * have to be treated differently in text and math mode. * The math command like "subset of" could be used directly * in math mode, but must be enclosed in \ensuremath{...} * in text mode. */ static const struct MathCommand { const QString command; const ushort unicode; const EncoderLaTeXCommandDirection direction; } mathCommands[] = { {QStringLiteral("pm"), 0x00B1, DirectionBoth}, {QStringLiteral("times"), 0x00D7, DirectionBoth}, {QStringLiteral("div"), 0x00F7, DirectionBoth}, {QStringLiteral("phi"), 0x0278, DirectionBoth}, ///< see also 0x03C6 (GREEK SMALL LETTER PHI) {QStringLiteral("Alpha"), 0x0391, DirectionBoth}, {QStringLiteral("Beta"), 0x0392, DirectionBoth}, {QStringLiteral("Gamma"), 0x0393, DirectionBoth}, {QStringLiteral("Delta"), 0x0394, DirectionBoth}, {QStringLiteral("Epsilon"), 0x0395, DirectionBoth}, {QStringLiteral("Zeta"), 0x0396, DirectionBoth}, {QStringLiteral("Eta"), 0x0397, DirectionBoth}, {QStringLiteral("Theta"), 0x0398, DirectionBoth}, {QStringLiteral("Iota"), 0x0399, DirectionBoth}, {QStringLiteral("Kappa"), 0x039A, DirectionBoth}, {QStringLiteral("Lamda"), 0x039B, DirectionCommandToUnicode}, ///< \Lamda does not exist, this is mostly for spelling errors {QStringLiteral("Lambda"), 0x039B, DirectionBoth}, {QStringLiteral("Mu"), 0x039C, DirectionBoth}, {QStringLiteral("Nu"), 0x039D, DirectionBoth}, {QStringLiteral("Xi"), 0x039E, DirectionBoth}, {QStringLiteral("Omicron"), 0x039F, DirectionBoth}, {QStringLiteral("Pi"), 0x03A0, DirectionBoth}, {QStringLiteral("Rho"), 0x03A1, DirectionBoth}, {QStringLiteral("Sigma"), 0x03A3, DirectionBoth}, {QStringLiteral("Tau"), 0x03A4, DirectionBoth}, {QStringLiteral("Upsilon"), 0x03A5, DirectionBoth}, {QStringLiteral("Phi"), 0x03A6, DirectionBoth}, {QStringLiteral("Chi"), 0x03A7, DirectionBoth}, {QStringLiteral("Psi"), 0x03A8, DirectionBoth}, {QStringLiteral("Omega"), 0x03A9, DirectionBoth}, {QStringLiteral("alpha"), 0x03B1, DirectionBoth}, {QStringLiteral("beta"), 0x03B2, DirectionBoth}, {QStringLiteral("gamma"), 0x03B3, DirectionBoth}, {QStringLiteral("delta"), 0x03B4, DirectionBoth}, {QStringLiteral("varepsilon"), 0x03B5, DirectionBoth}, {QStringLiteral("zeta"), 0x03B6, DirectionBoth}, {QStringLiteral("eta"), 0x03B7, DirectionBoth}, {QStringLiteral("theta"), 0x03B8, DirectionBoth}, {QStringLiteral("iota"), 0x03B9, DirectionBoth}, {QStringLiteral("kappa"), 0x03BA, DirectionBoth}, {QStringLiteral("lamda"), 0x03BB, DirectionBoth}, ///< \lamda does not exist, this is mostly for spelling errors {QStringLiteral("lambda"), 0x03BB, DirectionBoth}, {QStringLiteral("mu"), 0x03BC, DirectionBoth}, {QStringLiteral("nu"), 0x03BD, DirectionBoth}, {QStringLiteral("xi"), 0x03BE, DirectionBoth}, {QStringLiteral("omicron"), 0x03BF, DirectionBoth}, {QStringLiteral("pi"), 0x03C0, DirectionBoth}, {QStringLiteral("rho"), 0x03C1, DirectionBoth}, {QStringLiteral("varsigma"), 0x03C2, DirectionBoth}, {QStringLiteral("sigma"), 0x03C3, DirectionBoth}, {QStringLiteral("tau"), 0x03C4, DirectionBoth}, {QStringLiteral("upsilon"), 0x03C5, DirectionBoth}, {QStringLiteral("varphi"), 0x03C6, DirectionBoth}, ///< see also 0x0278 (LATIN SMALL LETTER PHI) {QStringLiteral("chi"), 0x03C7, DirectionBoth}, {QStringLiteral("psi"), 0x03C8, DirectionBoth}, {QStringLiteral("omega"), 0x03C9, DirectionBoth}, {QStringLiteral("vartheta"), 0x03D1, DirectionBoth}, {QStringLiteral("varpi"), 0x03D6, DirectionBoth}, {QStringLiteral("digamma"), 0x03DC, DirectionBoth}, {QStringLiteral("varkappa"), 0x03F0, DirectionBoth}, {QStringLiteral("varrho"), 0x03F1, DirectionBoth}, {QStringLiteral("epsilon"), 0x03F5, DirectionBoth}, {QStringLiteral("backepsilon"), 0x03F6, DirectionBoth}, {QStringLiteral("aleph"), 0x05D0, DirectionBoth}, {QStringLiteral("dagger"), 0x2020, DirectionBoth}, {QStringLiteral("ddagger"), 0x2021, DirectionBoth}, {QStringLiteral("mathbb{C}"), 0x2102, DirectionBoth}, {QStringLiteral("ell"), 0x2113, DirectionBoth}, {QStringLiteral("mho"), 0x2127, DirectionBoth}, {QStringLiteral("beth"), 0x2136, DirectionBoth}, {QStringLiteral("gimel"), 0x2137, DirectionBoth}, {QStringLiteral("daleth"), 0x2138, DirectionBoth}, {QStringLiteral("rightarrow"), 0x2192, DirectionBoth}, {QStringLiteral("forall"), 0x2200, DirectionBoth}, {QStringLiteral("complement"), 0x2201, DirectionBoth}, {QStringLiteral("partial"), 0x2202, DirectionBoth}, {QStringLiteral("exists"), 0x2203, DirectionBoth}, {QStringLiteral("nexists"), 0x2204, DirectionBoth}, {QStringLiteral("varnothing"), 0x2205, DirectionBoth}, {QStringLiteral("nabla"), 0x2207, DirectionBoth}, {QStringLiteral("in"), 0x2208, DirectionBoth}, {QStringLiteral("notin"), 0x2209, DirectionBoth}, {QStringLiteral("ni"), 0x220B, DirectionBoth}, {QStringLiteral("not\\ni"), 0x220C, DirectionBoth}, {QStringLiteral("asterisk"), 0x2217, DirectionCommandToUnicode}, {QStringLiteral("infty"), 0x221E, DirectionBoth}, {QStringLiteral("leq"), 0x2264, DirectionBoth}, {QStringLiteral("geq"), 0x2265, DirectionBoth}, {QStringLiteral("lneq"), 0x2268, DirectionBoth}, {QStringLiteral("gneq"), 0x2269, DirectionBoth}, {QStringLiteral("ll"), 0x226A, DirectionBoth}, {QStringLiteral("gg"), 0x226B, DirectionBoth}, {QStringLiteral("nless"), 0x226E, DirectionBoth}, {QStringLiteral("ngtr"), 0x226F, DirectionBoth}, {QStringLiteral("nleq"), 0x2270, DirectionBoth}, {QStringLiteral("ngeq"), 0x2271, DirectionBoth}, {QStringLiteral("subset"), 0x2282, DirectionBoth}, {QStringLiteral("supset"), 0x2283, DirectionBoth}, {QStringLiteral("subseteq"), 0x2286, DirectionBoth}, {QStringLiteral("supseteq"), 0x2287, DirectionBoth}, {QStringLiteral("nsubseteq"), 0x2288, DirectionBoth}, {QStringLiteral("nsupseteq"), 0x2289, DirectionBoth}, {QStringLiteral("subsetneq"), 0x228A, DirectionBoth}, {QStringLiteral("supsetneq"), 0x228A, DirectionBoth}, {QStringLiteral("Subset"), 0x22D0, DirectionBoth}, {QStringLiteral("Supset"), 0x22D1, DirectionBoth}, {QStringLiteral("lll"), 0x22D8, DirectionBoth}, {QStringLiteral("ggg"), 0x22D9, DirectionBoth}, {QStringLiteral("top"), 0x22A4, DirectionBoth}, {QStringLiteral("bot"), 0x22A5, DirectionBoth}, }; /** * This data structure holds commands representing a single * character. For example, it maps \AA to A with a ring (Nordic * letter) and back. The structure is a table with two columns: * (1) the command's name without a backslash (in the example * before the 'AA') (2) the Unicode character described by a * hexcode. */ static const struct EncoderLaTeXCharacterCommand { const QString command; const ushort unicode; const EncoderLaTeXCommandDirection direction; } encoderLaTeXCharacterCommands[] = { {QStringLiteral("textexclamdown"), 0x00A1, DirectionCommandToUnicode}, {QStringLiteral("textcent"), 0x00A2, DirectionBoth}, {QStringLiteral("pounds"), 0x00A3, DirectionBoth}, {QStringLiteral("textsterling"), 0x00A3, DirectionBoth}, /** 0x00A4 */ {QStringLiteral("textyen"), 0x00A5, DirectionBoth}, {QStringLiteral("textbrokenbar"), 0x00A6, DirectionBoth}, {QStringLiteral("S"), 0x00A7, DirectionBoth}, {QStringLiteral("textsection"), 0x00A7, DirectionBoth}, /** 0x00A8 */ {QStringLiteral("copyright"), 0x00A9, DirectionBoth}, {QStringLiteral("textcopyright"), 0x00A9, DirectionBoth}, {QStringLiteral("textordfeminine"), 0x00AA, DirectionBoth}, {QStringLiteral("guillemotleft"), 0x00AB, DirectionCommandToUnicode}, {QStringLiteral("textflqq"), 0x00AB, DirectionCommandToUnicode}, {QStringLiteral("flqq"), 0x00AB, DirectionBoth}, /** 0x00AC */ /** 0x00AD */ {QStringLiteral("textregistered"), 0x00AE, DirectionBoth}, /** 0x00AF */ {QStringLiteral("textdegree"), 0x00B0, DirectionBoth}, {QStringLiteral("textpm"), 0x00B1, DirectionBoth}, {QStringLiteral("textplusminus"), 0x00B1, DirectionCommandToUnicode}, /** 0x00B2 */ /** 0x00B3 */ /** 0x00B4 */ {QStringLiteral("textmu"), 0x00B5, DirectionBoth}, {QStringLiteral("textparagraph"), 0x00B6, DirectionBoth}, {QStringLiteral("textpilcrow"), 0x00B6, DirectionBoth}, {QStringLiteral("textperiodcentered"), 0x00B7, DirectionCommandToUnicode}, {QStringLiteral("textcdot"), 0x00B7, DirectionBoth}, {QStringLiteral("textcentereddot"), 0x00B7, DirectionCommandToUnicode}, /** 0x00B8 */ /** 0x00B9 */ {QStringLiteral("textordmasculine"), 0x00BA, DirectionBoth}, {QStringLiteral("guillemotright"), 0x00BB, DirectionCommandToUnicode}, {QStringLiteral("textfrqq"), 0x00BB, DirectionCommandToUnicode}, {QStringLiteral("frqq"), 0x00BB, DirectionBoth}, {QStringLiteral("textonequarter"), 0x00BC, DirectionBoth}, {QStringLiteral("textonehalf"), 0x00BD, DirectionBoth}, {QStringLiteral("textthreequarters"), 0x00BE, DirectionBoth}, {QStringLiteral("textquestiondown"), 0x00BF, DirectionCommandToUnicode}, // TODO /// recommended to write ?` instead of \textquestiondown {QStringLiteral("AA"), 0x00C5, DirectionBoth}, {QStringLiteral("AE"), 0x00C6, DirectionBoth}, {QStringLiteral("DH"), 0x00D0, DirectionBoth}, {QStringLiteral("texttimes"), 0x00D7, DirectionBoth}, {QStringLiteral("textmultiply"), 0x00D7, DirectionCommandToUnicode}, {QStringLiteral("O"), 0x00D8, DirectionBoth}, {QStringLiteral("TH"), 0x00DE, DirectionBoth}, {QStringLiteral("Thorn"), 0x00DE, DirectionCommandToUnicode}, {QStringLiteral("textThorn"), 0x00DE, DirectionCommandToUnicode}, {QStringLiteral("ss"), 0x00DF, DirectionBoth}, {QStringLiteral("aa"), 0x00E5, DirectionBoth}, {QStringLiteral("ae"), 0x00E6, DirectionBoth}, {QStringLiteral("dh"), 0x00F0, DirectionBoth}, {QStringLiteral("textdiv"), 0x00F7, DirectionBoth}, {QStringLiteral("textdivide"), 0x00F7, DirectionCommandToUnicode}, {QStringLiteral("o"), 0x00F8, DirectionBoth}, {QStringLiteral("th"), 0x00FE, DirectionBoth}, {QStringLiteral("textthorn"), 0x00FE, DirectionCommandToUnicode}, {QStringLiteral("textthornvari"), 0x00FE, DirectionCommandToUnicode}, {QStringLiteral("textthornvarii"), 0x00FE, DirectionCommandToUnicode}, {QStringLiteral("textthornvariii"), 0x00FE, DirectionCommandToUnicode}, {QStringLiteral("textthornvariv"), 0x00FE, DirectionCommandToUnicode}, {QStringLiteral("Aogonek"), 0x0104, DirectionCommandToUnicode}, {QStringLiteral("aogonek"), 0x0105, DirectionCommandToUnicode}, {QStringLiteral("DJ"), 0x0110, DirectionBoth}, {QStringLiteral("dj"), 0x0111, DirectionBoth}, {QStringLiteral("textcrd"), 0x0111, DirectionCommandToUnicode}, {QStringLiteral("textHslash"), 0x0126, DirectionCommandToUnicode}, {QStringLiteral("textHbar"), 0x0126, DirectionCommandToUnicode}, {QStringLiteral("textcrh"), 0x0127, DirectionCommandToUnicode}, {QStringLiteral("texthbar"), 0x0127, DirectionCommandToUnicode}, {QStringLiteral("i"), 0x0131, DirectionBoth}, {QStringLiteral("IJ"), 0x0132, DirectionBoth}, {QStringLiteral("ij"), 0x0133, DirectionBoth}, {QStringLiteral("textkra"), 0x0138, DirectionCommandToUnicode}, {QStringLiteral("Lcaron"), 0x013D, DirectionCommandToUnicode}, {QStringLiteral("lcaron"), 0x013E, DirectionCommandToUnicode}, {QStringLiteral("L"), 0x0141, DirectionBoth}, {QStringLiteral("Lstroke"), 0x0141, DirectionCommandToUnicode}, {QStringLiteral("l"), 0x0142, DirectionBoth}, {QStringLiteral("lstroke"), 0x0142, DirectionCommandToUnicode}, {QStringLiteral("textbarl"), 0x0142, DirectionCommandToUnicode}, {QStringLiteral("NG"), 0x014A, DirectionBoth}, {QStringLiteral("ng"), 0x014B, DirectionBoth}, {QStringLiteral("OE"), 0x0152, DirectionBoth}, {QStringLiteral("oe"), 0x0153, DirectionBoth}, {QStringLiteral("Racute"), 0x0154, DirectionCommandToUnicode}, {QStringLiteral("racute"), 0x0155, DirectionCommandToUnicode}, {QStringLiteral("Sacute"), 0x015A, DirectionCommandToUnicode}, {QStringLiteral("sacute"), 0x015B, DirectionCommandToUnicode}, {QStringLiteral("Scedilla"), 0x015E, DirectionCommandToUnicode}, {QStringLiteral("scedilla"), 0x015F, DirectionCommandToUnicode}, {QStringLiteral("Scaron"), 0x0160, DirectionCommandToUnicode}, {QStringLiteral("scaron"), 0x0161, DirectionCommandToUnicode}, {QStringLiteral("Tcaron"), 0x0164, DirectionCommandToUnicode}, {QStringLiteral("tcaron"), 0x0165, DirectionCommandToUnicode}, {QStringLiteral("textTstroke"), 0x0166, DirectionCommandToUnicode}, {QStringLiteral("textTbar"), 0x0166, DirectionCommandToUnicode}, {QStringLiteral("textTslash"), 0x0166, DirectionCommandToUnicode}, {QStringLiteral("texttstroke"), 0x0167, DirectionCommandToUnicode}, {QStringLiteral("texttbar"), 0x0167, DirectionCommandToUnicode}, {QStringLiteral("texttslash"), 0x0167, DirectionCommandToUnicode}, {QStringLiteral("Zdotaccent"), 0x017B, DirectionCommandToUnicode}, {QStringLiteral("zdotaccent"), 0x017C, DirectionCommandToUnicode}, {QStringLiteral("Zcaron"), 0x017D, DirectionCommandToUnicode}, {QStringLiteral("zcaron"), 0x017E, DirectionCommandToUnicode}, {QStringLiteral("textlongs"), 0x017F, DirectionCommandToUnicode}, {QStringLiteral("textcrb"), 0x0180, DirectionCommandToUnicode}, {QStringLiteral("textBhook"), 0x0181, DirectionCommandToUnicode}, {QStringLiteral("texthausaB"), 0x0181, DirectionCommandToUnicode}, {QStringLiteral("textOopen"), 0x0186, DirectionCommandToUnicode}, {QStringLiteral("textChook"), 0x0187, DirectionCommandToUnicode}, {QStringLiteral("textchook"), 0x0188, DirectionCommandToUnicode}, {QStringLiteral("texthtc"), 0x0188, DirectionCommandToUnicode}, {QStringLiteral("textDafrican"), 0x0189, DirectionCommandToUnicode}, {QStringLiteral("textDhook"), 0x018A, DirectionCommandToUnicode}, {QStringLiteral("texthausaD"), 0x018A, DirectionCommandToUnicode}, {QStringLiteral("textEreversed"), 0x018E, DirectionCommandToUnicode}, {QStringLiteral("textrevE"), 0x018E, DirectionCommandToUnicode}, {QStringLiteral("textEopen"), 0x0190, DirectionCommandToUnicode}, {QStringLiteral("textFhook"), 0x0191, DirectionCommandToUnicode}, {QStringLiteral("textflorin"), 0x0192, DirectionBoth}, {QStringLiteral("textgamma"), 0x0194, DirectionCommandToUnicode}, {QStringLiteral("textGammaafrican"), 0x0194, DirectionCommandToUnicode}, {QStringLiteral("hv"), 0x0195, DirectionCommandToUnicode}, {QStringLiteral("texthvlig"), 0x0195, DirectionCommandToUnicode}, {QStringLiteral("textIotaafrican"), 0x0196, DirectionCommandToUnicode}, {QStringLiteral("textKhook"), 0x0198, DirectionCommandToUnicode}, {QStringLiteral("texthausaK"), 0x0198, DirectionCommandToUnicode}, {QStringLiteral("texthtk"), 0x0199, DirectionCommandToUnicode}, {QStringLiteral("textkhook"), 0x0199, DirectionCommandToUnicode}, {QStringLiteral("textbarl"), 0x019A, DirectionCommandToUnicode}, {QStringLiteral("textcrlambda"), 0x019B, DirectionCommandToUnicode}, {QStringLiteral("textNhookleft"), 0x019D, DirectionCommandToUnicode}, {QStringLiteral("textnrleg"), 0x019E, DirectionCommandToUnicode}, {QStringLiteral("textPUnrleg"), 0x019E, DirectionCommandToUnicode}, {QStringLiteral("Ohorn"), 0x01A0, DirectionCommandToUnicode}, {QStringLiteral("ohorn"), 0x01A1, DirectionCommandToUnicode}, {QStringLiteral("textPhook"), 0x01A4, DirectionCommandToUnicode}, {QStringLiteral("texthtp"), 0x01A5, DirectionCommandToUnicode}, {QStringLiteral("textphook"), 0x01A5, DirectionCommandToUnicode}, {QStringLiteral("ESH"), 0x01A9, DirectionCommandToUnicode}, {QStringLiteral("textEsh"), 0x01A9, DirectionCommandToUnicode}, {QStringLiteral("textlooptoprevsh"), 0x01AA, DirectionCommandToUnicode}, {QStringLiteral("textlhtlongi"), 0x01AA, DirectionCommandToUnicode}, {QStringLiteral("textlhookt"), 0x01AB, DirectionCommandToUnicode}, {QStringLiteral("textThook"), 0x01AC, DirectionCommandToUnicode}, {QStringLiteral("textthook"), 0x01AD, DirectionCommandToUnicode}, {QStringLiteral("texthtt"), 0x01AD, DirectionCommandToUnicode}, {QStringLiteral("textTretroflexhook"), 0x01AE, DirectionCommandToUnicode}, {QStringLiteral("Uhorn"), 0x01AF, DirectionCommandToUnicode}, {QStringLiteral("uhorn"), 0x01B0, DirectionCommandToUnicode}, {QStringLiteral("textupsilon"), 0x01B1, DirectionCommandToUnicode}, {QStringLiteral("textVhook"), 0x01B2, DirectionCommandToUnicode}, {QStringLiteral("textYhook"), 0x01B3, DirectionCommandToUnicode}, {QStringLiteral("textvhook"), 0x01B4, DirectionCommandToUnicode}, {QStringLiteral("Zbar"), 0x01B5, DirectionCommandToUnicode}, {QStringLiteral("zbar"), 0x01B6, DirectionCommandToUnicode}, {QStringLiteral("EZH"), 0x01B7, DirectionCommandToUnicode}, {QStringLiteral("textEzh"), 0x01B7, DirectionCommandToUnicode}, {QStringLiteral("LJ"), 0x01C7, DirectionCommandToUnicode}, {QStringLiteral("Lj"), 0x01C8, DirectionCommandToUnicode}, {QStringLiteral("lj"), 0x01C9, DirectionCommandToUnicode}, {QStringLiteral("NJ"), 0x01CA, DirectionCommandToUnicode}, {QStringLiteral("Nj"), 0x01CB, DirectionCommandToUnicode}, {QStringLiteral("nj"), 0x01CC, DirectionCommandToUnicode}, {QStringLiteral("DZ"), 0x01F1, DirectionCommandToUnicode}, {QStringLiteral("Dz"), 0x01F2, DirectionCommandToUnicode}, {QStringLiteral("dz"), 0x01F3, DirectionCommandToUnicode}, {QStringLiteral("HV"), 0x01F6, DirectionCommandToUnicode}, {QStringLiteral("j"), 0x0237, DirectionBoth}, {QStringLiteral("ldots"), 0x2026, DirectionBoth}, {QStringLiteral("grqq"), 0x201C, DirectionCommandToUnicode}, {QStringLiteral("textquotedblleft"), 0x201C, DirectionCommandToUnicode}, {QStringLiteral("rqq"), 0x201D, DirectionCommandToUnicode}, {QStringLiteral("textquotedblright"), 0x201D, DirectionCommandToUnicode}, {QStringLiteral("glqq"), 0x201E, DirectionCommandToUnicode}, {QStringLiteral("SS"), 0x1E9E, DirectionBoth}, {QStringLiteral("textendash"), 0x2013, DirectionCommandToUnicode}, {QStringLiteral("textemdash"), 0x2014, DirectionCommandToUnicode}, {QStringLiteral("textquoteleft"), 0x2018, DirectionCommandToUnicode}, {QStringLiteral("lq"), 0x2018, DirectionBoth}, {QStringLiteral("textquoteright"), 0x2019, DirectionCommandToUnicode}, {QStringLiteral("rq"), 0x2019, DirectionBoth}, ///< tricky one: 'r' is a valid modifier {QStringLiteral("quotesinglbase"), 0x201A, DirectionBoth}, {QStringLiteral("quotedblbase"), 0x201E, DirectionBoth}, {QStringLiteral("textbullet "), 0x2022, DirectionBoth}, {QStringLiteral("guilsinglleft "), 0x2039, DirectionBoth}, {QStringLiteral("guilsinglright "), 0x203A, DirectionBoth}, {QStringLiteral("textcelsius"), 0x2103, DirectionBoth}, {QStringLiteral("textleftarrow"), 0x2190, DirectionBoth}, {QStringLiteral("textuparrow"), 0x2191, DirectionBoth}, {QStringLiteral("textrightarrow"), 0x2192, DirectionBoth}, {QStringLiteral("textdownarrow"), 0x2193, DirectionBoth} }; const QChar EncoderLaTeX::encoderLaTeXProtectedSymbols[] = {QLatin1Char('#'), QLatin1Char('&'), QLatin1Char('%')}; const QChar EncoderLaTeX::encoderLaTeXProtectedTextOnlySymbols[] = {QLatin1Char('_')}; /** * This data structure holds LaTeX symbol sequences (without * any backslash) that represent a single Unicode character. * For example, it maps --- to an 'em dash' and back. * The structure is a table with two columns: (1) the symbol * sequence (in the example before the '---') (2) the Unicode * character described by a hexcode. */ static const struct EncoderLaTeXSymbolSequence { const QString latex; const ushort unicode; const EncoderLaTeXCommandDirection direction; } encoderLaTeXSymbolSequences[] = { {QStringLiteral("!`"), 0x00A1, DirectionBoth}, {QStringLiteral("\"<"), 0x00AB, DirectionBoth}, {QStringLiteral("\">"), 0x00BB, DirectionBoth}, {QStringLiteral("?`"), 0x00BF, DirectionBoth}, {QStringLiteral("---"), 0x2014, DirectionBoth}, ///< --- must come before -- {QStringLiteral("--"), 0x2013, DirectionBoth}, {QStringLiteral("``"), 0x201C, DirectionBoth}, {QStringLiteral("''"), 0x201D, DirectionBoth}, {QStringLiteral("ff"), 0xFB00, DirectionUnicodeToCommand}, {QStringLiteral("fi"), 0xFB01, DirectionUnicodeToCommand}, {QStringLiteral("fl"), 0xFB02, DirectionUnicodeToCommand}, {QStringLiteral("ffi"), 0xFB03, DirectionUnicodeToCommand}, {QStringLiteral("ffl"), 0xFB04, DirectionUnicodeToCommand}, {QStringLiteral("ft"), 0xFB05, DirectionUnicodeToCommand}, {QStringLiteral("st"), 0xFB06, DirectionUnicodeToCommand} }; EncoderLaTeX::EncoderLaTeX() : Encoder() { /// Initialize lookup table with NULL pointers for (int i = 0; i < lookupTableNumModifiers; ++i) lookupTable[i] = nullptr; int lookupTableCount = 0; /// Go through all table rows of encoderLaTeXEscapedCharacters for (const EncoderLaTeXEscapedCharacter &encoderLaTeXEscapedCharacter : encoderLaTeXEscapedCharacters) { /// Check if this row's modifier is already known bool knownModifier = false; int j; for (j = lookupTableCount - 1; j >= 0; --j) { knownModifier |= lookupTable[j]->modifier == encoderLaTeXEscapedCharacter.modifier; if (knownModifier) break; } if (!knownModifier) { /// Ok, this row's modifier appeared for the first time, /// therefore initialize memory structure, i.e. row in lookupTable lookupTable[lookupTableCount] = new EncoderLaTeXEscapedCharacterLookupTableRow; lookupTable[lookupTableCount]->modifier = encoderLaTeXEscapedCharacter.modifier; /// If no special character is known for a letter+modifier /// combination, fall back using the ASCII character only for (ushort k = 0; k < 26; ++k) { lookupTable[lookupTableCount]->unicode[k] = QChar(QLatin1Char('A').unicode() + k); lookupTable[lookupTableCount]->unicode[k + 26] = QChar(QLatin1Char('a').unicode() + k); } for (ushort k = 0; k < 10; ++k) lookupTable[lookupTableCount]->unicode[k + 52] = QChar(QLatin1Char('0').unicode() + k); j = lookupTableCount; ++lookupTableCount; } /// Add the letter as of the current row in encoderLaTeXEscapedCharacters /// into Unicode char array in the current modifier's row in the lookup table. int pos = -1; if ((pos = asciiLetterOrDigitToPos(encoderLaTeXEscapedCharacter.letter)) >= 0) lookupTable[j]->unicode[pos] = QChar(encoderLaTeXEscapedCharacter.unicode); else qCWarning(LOG_KBIBTEX_IO) << "Cannot handle letter " << encoderLaTeXEscapedCharacter.letter; } } EncoderLaTeX::~EncoderLaTeX() { /// Clean-up memory for (int i = lookupTableNumModifiers - 1; i >= 0; --i) if (lookupTable[i] != nullptr) delete lookupTable[i]; } QString EncoderLaTeX::decode(const QString &input) const { const int len = input.length(); QString output; output.reserve(len); bool inMathMode = false; int cachedAsciiLetterOrDigitToPos = -1; /// Go through input char by char for (int i = 0; i < len; ++i) { /** * Repeatedly check if input data contains a verbatim command * like \url{...}, copy it to output, and update i to point * to the next character after the verbatim command. */ while (testAndCopyVerbatimCommands(input, i, output)); if (i >= len) break; /// Fetch current input char const QChar c = input[i]; if (c == QLatin1Char('{')) { /// First case: An opening curly bracket, /// which is harmless (see else case), unless ... if (i < len - 3 && input[i + 1] == QLatin1Char('\\')) { /// ... it continues with a backslash /// Next, check if there follows a modifier after the backslash /// For example an quotation mark as used in {\"a} const int lookupTablePos = modifierInLookupTable(input[i + 2].toLatin1()); /// Check for spaces between modifier and character, for example /// like {\H o} int skipSpaces = 0; while (i + 3 + skipSpaces < len && input[i + 3 + skipSpaces] == QLatin1Char(' ') && skipSpaces < 16) ++skipSpaces; if (lookupTablePos >= 0 && i + skipSpaces < len - 4 && (cachedAsciiLetterOrDigitToPos = asciiLetterOrDigitToPos(input[i + 3 + skipSpaces])) >= 0 && input[i + 4 + skipSpaces] == QLatin1Char('}')) { /// If we found a modifier which is followed by /// a letter followed by a closing curly bracket, /// we are looking at something like {\"A} /// Use lookup table to see what Unicode char this /// represents const QChar unicodeLetter = lookupTable[lookupTablePos]->unicode[cachedAsciiLetterOrDigitToPos]; if (unicodeLetter.unicode() < 127) { /// This combination of modifier and letter is not known, /// so try to preserve it output.append(input.midRef(i, 5 + skipSpaces)); qCWarning(LOG_KBIBTEX_IO) << "Don't know how to translate this into Unicode: " << input.mid(i, 5 + skipSpaces); } else output.append(unicodeLetter); /// Step over those additional characters i += 4 + skipSpaces; } else if (lookupTablePos >= 0 && i + skipSpaces < len - 5 && input[i + 3 + skipSpaces] == QLatin1Char('\\') && isIJ(input[i + 4 + skipSpaces]) && input[i + 5 + skipSpaces] == QLatin1Char('}')) { /// This is the case for {\'\i} or alike. bool found = false; for (const DotlessIJCharacter &dotlessIJCharacter : dotlessIJCharacters) if (dotlessIJCharacter.letter == input[i + 4 + skipSpaces] && dotlessIJCharacter.modifier == input[i + 2]) { output.append(QChar(dotlessIJCharacter.unicode)); i += 5 + skipSpaces; found = true; break; } if (!found) qCWarning(LOG_KBIBTEX_IO) << "Cannot interpret BACKSLASH" << input[i + 2] << "BACKSLASH" << input[i + 4 + skipSpaces]; } else if (lookupTablePos >= 0 && i + skipSpaces < len - 6 && input[i + 3 + skipSpaces] == QLatin1Char('{') && (cachedAsciiLetterOrDigitToPos = asciiLetterOrDigitToPos(input[i + 4 + skipSpaces])) >= 0 && input[i + 5 + skipSpaces] == QLatin1Char('}') && input[i + 6 + skipSpaces] == QLatin1Char('}')) { /// If we found a modifier which is followed by /// an opening curly bracket followed by a letter /// followed by two closing curly brackets, /// we are looking at something like {\"{A}} /// Use lookup table to see what Unicode char this /// represents const QChar unicodeLetter = lookupTable[lookupTablePos]->unicode[cachedAsciiLetterOrDigitToPos]; if (unicodeLetter.unicode() < 127) { /// This combination of modifier and letter is not known, /// so try to preserve it output.append(input.midRef(i, 7 + skipSpaces)); qCWarning(LOG_KBIBTEX_IO) << "Don't know how to translate this into Unicode: " << input.mid(i, 7 + skipSpaces); } else output.append(unicodeLetter); /// Step over those additional characters i += 6 + skipSpaces; } else if (lookupTablePos >= 0 && i + skipSpaces < len - 7 && input[i + 3 + skipSpaces] == QLatin1Char('{') && input[i + 4 + skipSpaces] == QLatin1Char('\\') && isIJ(input[i + 5 + skipSpaces]) && input[i + 6 + skipSpaces] == QLatin1Char('}') && input[i + 7 + skipSpaces] == QLatin1Char('}')) { /// This is the case for {\'{\i}} or alike. bool found = false; for (const DotlessIJCharacter &dotlessIJCharacter : dotlessIJCharacters) if (dotlessIJCharacter.letter == input[i + 5 + skipSpaces] && dotlessIJCharacter.modifier == input[i + 2]) { output.append(QChar(dotlessIJCharacter.unicode)); i += 7 + skipSpaces; found = true; break; } if (!found) qCWarning(LOG_KBIBTEX_IO) << "Cannot interpret BACKSLASH" << input[i + 2] << "BACKSLASH {" << input[i + 5 + skipSpaces] << "}"; } else { /// Now, the case of something like {\AA} is left /// to check for const QString alpha = readAlphaCharacters(input, i + 2); int nextPosAfterAlpha = i + 2 + alpha.size(); if (nextPosAfterAlpha < input.length() && input[nextPosAfterAlpha] == QLatin1Char('}')) { /// We are dealing actually with a string like {\AA} /// Check which command it is, /// insert corresponding Unicode character bool foundCommand = false; for (const EncoderLaTeXCharacterCommand &encoderLaTeXCharacterCommand : encoderLaTeXCharacterCommands) { if (encoderLaTeXCharacterCommand.command == alpha) { output.append(QChar(encoderLaTeXCharacterCommand.unicode)); foundCommand = true; break; } } /// Check if a math command has been read, /// like \subset /// (automatically skipped if command was found above) for (const MathCommand &mathCommand : mathCommands) { if (mathCommand.command == alpha) { if (output.endsWith(QStringLiteral("\\ensuremath"))) { /// Remove "\ensuremath" right before this math command, /// it will be re-inserted when exporting/saving the document output = output.left(output.length() - 11); } output.append(QChar(mathCommand.unicode)); foundCommand = true; break; } } if (foundCommand) i = nextPosAfterAlpha; else { /// Dealing with a string line {\noopsort} /// (see BibTeX documentation where this gets explained) output.append(c); } } else { /// Could be something like {\tt filename.txt} /// Keep it as it is output.append(c); } } } else { /// Nothing special, copy input char to output output.append(c); } } else if (c == QLatin1Char('\\') && i < len - 1) { /// Second case: A backslash as in \"o /// Sometimes such command are closed with just {}, /// so remember if to check for that bool checkForExtraCurlyAtEnd = false; /// Check if there follows a modifier after the backslash /// For example an quotation mark as used in \"a const int lookupTablePos = modifierInLookupTable(input[i + 1]); /// Check for spaces between modifier and character, for example /// like \H o int skipSpaces = 0; while (i + 2 + skipSpaces < len && input[i + 2 + skipSpaces] == QLatin1Char(' ') && skipSpaces < 16) ++skipSpaces; if (lookupTablePos >= 0 && i + skipSpaces <= len - 3 && (cachedAsciiLetterOrDigitToPos = asciiLetterOrDigitToPos(input[i + 2 + skipSpaces])) >= 0 && (i + skipSpaces == len - 3 || input[i + 1] == QLatin1Char('"') || input[i + 1] == QLatin1Char('\'') || input[i + 1] == QLatin1Char('`') || input[i + 1] == QLatin1Char('='))) { // TODO more special cases? /// We found a special modifier which is followed by /// a letter followed by normal text without any /// delimiter, so we are looking at something like /// \"u inside Kr\"uger /// Use lookup table to see what Unicode char this /// represents const QChar unicodeLetter = lookupTable[lookupTablePos]->unicode[cachedAsciiLetterOrDigitToPos]; if (unicodeLetter.unicode() < 127) { /// This combination of modifier and letter is not known, /// so try to preserve it output.append(input.midRef(i, 3 + skipSpaces)); qCWarning(LOG_KBIBTEX_IO) << "Don't know how to translate this into Unicode: " << input.mid(i, 3 + skipSpaces); } else output.append(unicodeLetter); /// Step over those additional characters i += 2 + skipSpaces; } else if (lookupTablePos >= 0 && i + skipSpaces <= len - 3 && i + skipSpaces <= len - 3 && (cachedAsciiLetterOrDigitToPos = asciiLetterOrDigitToPos(input[i + 2 + skipSpaces])) >= 0 && (i + skipSpaces == len - 3 || input[i + 3 + skipSpaces] == QLatin1Char('}') || input[i + 3 + skipSpaces] == QLatin1Char('{') || input[i + 3 + skipSpaces] == QLatin1Char(' ') || input[i + 3 + skipSpaces] == QLatin1Char('\t') || input[i + 3 + skipSpaces] == QLatin1Char('\\') || input[i + 3 + skipSpaces] == QLatin1Char('\r') || input[i + 3 + skipSpaces] == QLatin1Char('\n'))) { /// We found a modifier which is followed by /// a letter followed by a command delimiter such /// as a whitespace, so we are looking at something /// like \"u followed by a space /// Use lookup table to see what Unicode char this /// represents const QChar unicodeLetter = lookupTable[lookupTablePos]->unicode[cachedAsciiLetterOrDigitToPos]; if (unicodeLetter.unicode() < 127) { /// This combination of modifier and letter is not known, /// so try to preserve it output.append(input.midRef(i, 3)); qCWarning(LOG_KBIBTEX_IO) << "Don't know how to translate this into Unicode: " << input.mid(i, 3); } else output.append(unicodeLetter); /// Step over those additional characters i += 2 + skipSpaces; /// Now, after this command, a whitespace may follow /// which has to get "eaten" as it acts as a command /// delimiter if (input[i + 1] == QLatin1Char(' ') || input[i + 1] == QLatin1Char('\r') || input[i + 1] == QLatin1Char('\n')) ++i; else { /// If no whitespace follows, still /// check for extra curly brackets checkForExtraCurlyAtEnd = true; } } else if (lookupTablePos >= 0 && i + skipSpaces < len - 4 && input[i + 2 + skipSpaces] == QLatin1Char('{') && (cachedAsciiLetterOrDigitToPos = asciiLetterOrDigitToPos(input[i + 3 + skipSpaces])) >= 0 && input[i + 4 + skipSpaces] == QLatin1Char('}')) { /// We found a modifier which is followed by an opening /// curly bracket followed a letter followed by a closing /// curly bracket, so we are looking at something /// like \"{u} /// Use lookup table to see what Unicode char this /// represents const QChar unicodeLetter = lookupTable[lookupTablePos]->unicode[cachedAsciiLetterOrDigitToPos]; if (unicodeLetter.unicode() < 127) { /// This combination of modifier and letter is not known, /// so try to preserve it output.append(input.midRef(i, 5 + skipSpaces)); qCWarning(LOG_KBIBTEX_IO) << "Don't know how to translate this into Unicode: " << input.mid(i, 5 + skipSpaces); } else output.append(unicodeLetter); /// Step over those additional characters i += 4 + skipSpaces; } else if (lookupTablePos >= 0 && i + skipSpaces < len - 3 && input[i + 2 + skipSpaces] == QLatin1Char('\\') && isIJ(input[i + 3 + skipSpaces])) { /// This is the case for \'\i or alike. bool found = false; for (const DotlessIJCharacter &dotlessIJCharacter : dotlessIJCharacters) if (dotlessIJCharacter.letter == input[i + 3 + skipSpaces] && dotlessIJCharacter.modifier == input[i + 1]) { output.append(QChar(dotlessIJCharacter.unicode)); i += 3 + skipSpaces; found = true; break; } if (!found) qCWarning(LOG_KBIBTEX_IO) << "Cannot interpret BACKSLASH" << input[i + 1] << "BACKSLASH" << input[i + 3 + skipSpaces]; } else if (lookupTablePos >= 0 && i + skipSpaces < len - 5 && input[i + 2 + skipSpaces] == QLatin1Char('{') && input[i + 3 + skipSpaces] == QLatin1Char('\\') && isIJ(input[i + 4 + skipSpaces]) && input[i + 5 + skipSpaces] == QLatin1Char('}')) { /// This is the case for \'{\i} or alike. bool found = false; for (const DotlessIJCharacter &dotlessIJCharacter : dotlessIJCharacters) if (dotlessIJCharacter.letter == input[i + 4 + skipSpaces] && dotlessIJCharacter.modifier == input[i + 1]) { output.append(QChar(dotlessIJCharacter.unicode)); i += 5 + skipSpaces; found = true; break; } if (!found) qCWarning(LOG_KBIBTEX_IO) << "Cannot interpret BACKSLASH" << input[i + 1] << "BACKSLASH {" << input[i + 4 + skipSpaces] << "}"; } else if (i < len - 1) { /// Now, the case of something like \AA is left /// to check for const QString alpha = readAlphaCharacters(input, i + 1); int nextPosAfterAlpha = i + 1 + alpha.size(); if (alpha.size() >= 1 && alpha.at(0).isLetter()) { /// We are dealing actually with a string like \AA or \o /// Check which command it is, /// insert corresponding Unicode character bool foundCommand = false; for (const EncoderLaTeXCharacterCommand &encoderLaTeXCharacterCommand : encoderLaTeXCharacterCommands) { if (encoderLaTeXCharacterCommand.command == alpha) { output.append(QChar(encoderLaTeXCharacterCommand.unicode)); foundCommand = true; break; } } if (foundCommand) { /// Now, after a command, a whitespace may follow /// which has to get "eaten" as it acts as a command /// delimiter if (nextPosAfterAlpha < input.length() && (input[nextPosAfterAlpha] == QLatin1Char(' ') || input[nextPosAfterAlpha] == QLatin1Char('\r') || input[nextPosAfterAlpha] == QLatin1Char('\n'))) ++nextPosAfterAlpha; else { /// If no whitespace follows, still /// check for extra curly brackets checkForExtraCurlyAtEnd = true; } i = nextPosAfterAlpha - 1; } else { /// No command found? Just copy input char to output output.append(c); } } else { /// Maybe we are dealing with a string like \& or \_ /// Check which command it is bool foundCommand = false; for (const QChar &encoderLaTeXProtectedSymbol : encoderLaTeXProtectedSymbols) if (encoderLaTeXProtectedSymbol == input[i + 1]) { output.append(encoderLaTeXProtectedSymbol); foundCommand = true; break; } if (!foundCommand && !inMathMode) for (const QChar &encoderLaTeXProtectedTextOnlySymbol : encoderLaTeXProtectedTextOnlySymbols) if (encoderLaTeXProtectedTextOnlySymbol == input[i + 1]) { output.append(encoderLaTeXProtectedTextOnlySymbol); foundCommand = true; break; } /// If command has been found, nothing has to be done /// except for hopping over this backslash if (foundCommand) ++i; else if (i < len - 1 && input[i + 1] == QChar(0x002c /* comma */)) { /// Found a thin space: \, /// Replacing Latex-like thin space with Unicode thin space output.append(QChar(0x2009)); // foundCommand = true; ///< only necessary if more tests will follow in the future ++i; } else { /// Nothing special, copy input char to output output.append(c); } } } else { /// Nothing special, copy input char to output output.append(c); } /// Finally, check if there may be extra curly brackets /// like {} and hop over them if (checkForExtraCurlyAtEnd && i < len - 2 && input[i + 1] == QLatin1Char('{') && input[i + 2] == QLatin1Char('}')) i += 2; } else { /// So far, no opening curly bracket and no backslash /// May still be a symbol sequence like --- bool isSymbolSequence = false; /// Go through all known symbol sequnces for (const EncoderLaTeXSymbolSequence &encoderLaTeXSymbolSequence : encoderLaTeXSymbolSequences) { /// First, check if read input character matches beginning of symbol sequence /// and input buffer as enough characters left to potentially contain /// symbol sequence const int latexLen = encoderLaTeXSymbolSequence.latex.length(); if ((encoderLaTeXSymbolSequence.direction & DirectionCommandToUnicode) && encoderLaTeXSymbolSequence.latex[0] == c && i <= len - latexLen) { /// Now actually check if symbol sequence is in input buffer isSymbolSequence = true; for (int p = 1; isSymbolSequence && p < latexLen; ++p) isSymbolSequence &= encoderLaTeXSymbolSequence.latex[p] == input[i + p]; if (isSymbolSequence) { /// Ok, found sequence: insert Unicode character in output /// and hop over sequence in input buffer output.append(QChar(encoderLaTeXSymbolSequence.unicode)); i += encoderLaTeXSymbolSequence.latex.length() - 1; break; } } } if (!isSymbolSequence) { /// No symbol sequence found, so just copy input to output output.append(c); /// Still, check if input character is a dollar sign /// without a preceding backslash, means toggling between /// text mode and math mode if (c == QLatin1Char('$') && (i == 0 || input[i - 1] != QLatin1Char('\\'))) inMathMode = !inMathMode; } } } output.squeeze(); return output; } bool EncoderLaTeX::testAndCopyVerbatimCommands(const QString &input, int &pos, QString &output) const { int copyBytesCount = 0; int openedClosedCurlyBrackets = 0; /// check for \url if (pos < input.length() - 6 && input.mid(pos, 5) == QStringLiteral("\\url{")) { copyBytesCount = 5; openedClosedCurlyBrackets = 1; } if (copyBytesCount > 0) { while (openedClosedCurlyBrackets > 0 && pos + copyBytesCount < input.length()) { ++copyBytesCount; if (input[pos + copyBytesCount] == QLatin1Char('{') && input[pos + copyBytesCount - 1] != QLatin1Char('\\')) ++openedClosedCurlyBrackets; else if (input[pos + copyBytesCount] == QLatin1Char('}') && input[pos + copyBytesCount - 1] != QLatin1Char('\\')) --openedClosedCurlyBrackets; } output.append(input.midRef(pos, copyBytesCount)); pos += copyBytesCount; } return copyBytesCount > 0; } QString EncoderLaTeX::encode(const QString &ninput, const TargetEncoding targetEncoding) const { /// Perform Canonical Decomposition followed by Canonical Composition const QString input = ninput.normalized(QString::NormalizationForm_C); int len = input.length(); QString output; output.reserve(len); bool inMathMode = false; /// Go through input char by char for (int i = 0; i < len; ++i) { /** * Repeatedly check if input data contains a verbatim command * like \url{...}, append it to output, and update i to point * to the next character after the verbatim command. */ while (testAndCopyVerbatimCommands(input, i, output)); if (i >= len) break; const QChar c = input[i]; if (targetEncoding == TargetEncodingASCII && c.unicode() > 127) { /// If current char is outside ASCII boundaries ... bool found = false; /// Handle special cases of i without a dot (\i) for (const DotlessIJCharacter &dotlessIJCharacter : dotlessIJCharacters) if (c.unicode() == dotlessIJCharacter.unicode && (dotlessIJCharacter.direction & DirectionUnicodeToCommand)) { output.append(QString(QStringLiteral("{\\%1\\%2}")).arg(dotlessIJCharacter.modifier, dotlessIJCharacter.letter)); found = true; break; } if (!found) { /// ... test if there is a symbol sequence like --- /// to encode it for (const EncoderLaTeXSymbolSequence &encoderLaTeXSymbolSequence : encoderLaTeXSymbolSequences) if (encoderLaTeXSymbolSequence.unicode == c.unicode() && (encoderLaTeXSymbolSequence.direction & DirectionUnicodeToCommand)) { for (int l = 0; l < encoderLaTeXSymbolSequence.latex.length(); ++l) output.append(encoderLaTeXSymbolSequence.latex[l]); found = true; break; } } if (!found) { /// Ok, no symbol sequence. Let's test character /// commands like \ss for (const EncoderLaTeXCharacterCommand &encoderLaTeXCharacterCommand : encoderLaTeXCharacterCommands) if (encoderLaTeXCharacterCommand.unicode == c.unicode() && (encoderLaTeXCharacterCommand.direction & DirectionUnicodeToCommand)) { output.append(QString(QStringLiteral("{\\%1}")).arg(encoderLaTeXCharacterCommand.command)); found = true; break; } } if (!found) { /// Ok, neither a character command. Let's test /// escaped characters with modifiers like \"a for (const EncoderLaTeXEscapedCharacter &encoderLaTeXEscapedCharacter : encoderLaTeXEscapedCharacters) if (encoderLaTeXEscapedCharacter.unicode == c.unicode() && (encoderLaTeXEscapedCharacter.direction & DirectionUnicodeToCommand)) { const QString formatString = isAsciiLetter(encoderLaTeXEscapedCharacter.modifier) ? QStringLiteral("{\\%1 %2}") : QStringLiteral("{\\%1%2}"); output.append(formatString.arg(encoderLaTeXEscapedCharacter.modifier).arg(encoderLaTeXEscapedCharacter.letter)); found = true; break; } } if (!found) { /// Ok, test for math commands for (const MathCommand &mathCommand : mathCommands) if (mathCommand.unicode == c.unicode() && (mathCommand.direction & DirectionUnicodeToCommand)) { if (inMathMode) output.append(QString(QStringLiteral("\\%1{}")).arg(mathCommand.command)); else output.append(QString(QStringLiteral("\\ensuremath{\\%1}")).arg(mathCommand.command)); found = true; break; } } if (!found && c.unicode() == 0x2009) { /// Thin space output.append(QStringLiteral("\\,")); found = true; } if (!found) { qCWarning(LOG_KBIBTEX_IO) << "Don't know how to encode Unicode char" << QString(QStringLiteral("0x%1")).arg(c.unicode(), 4, 16, QLatin1Char('0')); output.append(c); } } else { /// Current character is normal ASCII /// and targetEncoding was set to accept only ASCII characters /// -- or -- targetEncoding was set to accept UTF-8 characters /// Still, some characters have special meaning /// in TeX and have to be preceded with a backslash bool found = false; for (const QChar &encoderLaTeXProtectedSymbol : encoderLaTeXProtectedSymbols) if (encoderLaTeXProtectedSymbol == c) { output.append(QLatin1Char('\\')); found = true; break; } if (!found && !inMathMode) for (const QChar &encoderLaTeXProtectedTextOnlySymbol : encoderLaTeXProtectedTextOnlySymbols) if (encoderLaTeXProtectedTextOnlySymbol == c) { output.append(QLatin1Char('\\')); break; } /// Dump character to output output.append(c); /// Finally, check if input character is a dollar sign /// without a preceding backslash, means toggling between /// text mode and math mode if (c == QLatin1Char('$') && (i == 0 || input[i - 1] != QLatin1Char('\\'))) inMathMode = !inMathMode; } } output.squeeze(); return output; } int EncoderLaTeX::modifierInLookupTable(const QChar modifier) const { for (int m = 0; m < lookupTableNumModifiers && lookupTable[m] != nullptr; ++m) if (lookupTable[m]->modifier == modifier) return m; return -1; } QString EncoderLaTeX::readAlphaCharacters(const QString &base, int startFrom) const { const int len = base.size(); for (int j = startFrom; j < len; ++j) { if (!isAsciiLetter(base[j])) return base.mid(startFrom, j - startFrom); } return base.mid(startFrom); } const EncoderLaTeX &EncoderLaTeX::instance() { static const EncoderLaTeX self; return self; } diff --git a/src/io/fileimporterbibtex.cpp b/src/io/fileimporterbibtex.cpp index f0fb8e07..5989d206 100644 --- a/src/io/fileimporterbibtex.cpp +++ b/src/io/fileimporterbibtex.cpp @@ -1,1327 +1,1327 @@ /*************************************************************************** * Copyright (C) 2004-2019 by Thomas Fischer * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, see . * ***************************************************************************/ #include "fileimporterbibtex.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "encoder.h" #include "encoderlatex.h" #include "logging_io.h" #define qint64toint(a) (static_cast(qMax(0LL,qMin(0x7fffffffLL,(a))))) FileImporterBibTeX::FileImporterBibTeX(QObject *parent) : FileImporter(parent), m_cancelFlag(false), m_textStream(nullptr), m_commentHandling(IgnoreComments), m_keywordCasing(KBibTeX::cLowerCase), m_lineNo(1) { m_keysForPersonDetection.append(Entry::ftAuthor); m_keysForPersonDetection.append(Entry::ftEditor); m_keysForPersonDetection.append(QStringLiteral("bookauthor")); /// used by JSTOR } File *FileImporterBibTeX::load(QIODevice *iodevice) { m_cancelFlag = false; if (!iodevice->isReadable() && !iodevice->open(QIODevice::ReadOnly)) { qCWarning(LOG_KBIBTEX_IO) << "Input device not readable"; emit message(SeverityError, QStringLiteral("Input device not readable")); return nullptr; } File *result = new File(); /// Used to determine if file prefers quotation marks over /// curly brackets or the other way around m_statistics.countCurlyBrackets = 0; m_statistics.countQuotationMarks = 0; m_statistics.countFirstNameFirst = 0; m_statistics.countLastNameFirst = 0; m_statistics.countNoCommentQuote = 0; m_statistics.countCommentPercent = 0; m_statistics.countCommentCommand = 0; m_statistics.countProtectedTitle = 0; m_statistics.countUnprotectedTitle = 0; m_statistics.mostRecentListSeparator.clear(); m_textStream = new QTextStream(iodevice); m_textStream->setCodec(Preferences::defaultBibTeXEncoding.toLatin1()); ///< unless we learn something else, assume default codec result->setProperty(File::Encoding, Preferences::defaultBibTeXEncoding); QString rawText; rawText.reserve(qint64toint(iodevice->size())); while (!m_textStream->atEnd()) { QString line = m_textStream->readLine(); bool skipline = evaluateParameterComments(m_textStream, line.toLower(), result); // FIXME XML data should be removed somewhere else? onlinesearch ... if (line.startsWith(QStringLiteral(""))) /// Hop over XML declarations skipline = true; if (!skipline) rawText.append(line).append("\n"); } delete m_textStream; /** Remove HTML code from the input source */ // FIXME HTML data should be removed somewhere else? onlinesearch ... const int originalLength = rawText.length(); rawText = rawText.remove(KBibTeX::htmlRegExp); const int afterHTMLremovalLength = rawText.length(); if (originalLength != afterHTMLremovalLength) { qCInfo(LOG_KBIBTEX_IO) << (originalLength - afterHTMLremovalLength) << "characters of HTML tags have been removed"; emit message(SeverityInfo, QString(QStringLiteral("%1 characters of HTML tags have been removed")).arg(originalLength - afterHTMLremovalLength)); } // TODO really necessary to pipe data through several QTextStreams? m_textStream = new QTextStream(&rawText, QIODevice::ReadOnly); m_textStream->setCodec(Preferences::defaultBibTeXEncoding.toLower() == QStringLiteral("latex") ? "us-ascii" : Preferences::defaultBibTeXEncoding.toLatin1()); m_lineNo = 1; m_prevLine = m_currentLine = QString(); m_knownElementIds.clear(); readChar(); while (!m_nextChar.isNull() && !m_cancelFlag && !m_textStream->atEnd()) { emit progress(qint64toint(m_textStream->pos()), rawText.length()); Element *element = nextElement(); if (element != nullptr) { if (m_commentHandling == KeepComments || !Comment::isComment(*element)) result->append(QSharedPointer(element)); else delete element; } } emit progress(100, 100); if (m_cancelFlag) { qCWarning(LOG_KBIBTEX_IO) << "Loading bibliography data has been canceled"; emit message(SeverityError, QStringLiteral("Loading bibliography data has been canceled")); delete result; result = nullptr; } delete m_textStream; if (result != nullptr) { /// Set the file's preferences for string delimiters /// deduced from statistics built while parsing the file result->setProperty(File::StringDelimiter, m_statistics.countQuotationMarks > m_statistics.countCurlyBrackets ? QStringLiteral("\"\"") : QStringLiteral("{}")); /// Set the file's preferences for name formatting result->setProperty(File::NameFormatting, m_statistics.countFirstNameFirst > m_statistics.countLastNameFirst ? Preferences::personNameFormatFirstLast : Preferences::personNameFormatLastFirst); /// Set the file's preferences for title protected Qt::CheckState triState = (m_statistics.countProtectedTitle > m_statistics.countUnprotectedTitle * 4) ? Qt::Checked : ((m_statistics.countProtectedTitle * 4 < m_statistics.countUnprotectedTitle) ? Qt::Unchecked : Qt::PartiallyChecked); result->setProperty(File::ProtectCasing, static_cast(triState)); /// Set the file's preferences for quoting of comments if (m_statistics.countNoCommentQuote > m_statistics.countCommentCommand && m_statistics.countNoCommentQuote > m_statistics.countCommentPercent) result->setProperty(File::QuoteComment, static_cast(Preferences::qcNone)); else if (m_statistics.countCommentCommand > m_statistics.countNoCommentQuote && m_statistics.countCommentCommand > m_statistics.countCommentPercent) result->setProperty(File::QuoteComment, static_cast(Preferences::qcCommand)); else result->setProperty(File::QuoteComment, static_cast(Preferences::qcPercentSign)); if (!m_statistics.mostRecentListSeparator.isEmpty()) result->setProperty(File::ListSeparator, m_statistics.mostRecentListSeparator); // TODO gather more statistics for keyword casing etc. } iodevice->close(); return result; } bool FileImporterBibTeX::guessCanDecode(const QString &rawText) { static const QRegularExpression bibtexLikeText(QStringLiteral("@\\w+\\{.+\\}")); QString text = EncoderLaTeX::instance().decode(rawText); return bibtexLikeText.match(text).hasMatch(); } void FileImporterBibTeX::cancel() { m_cancelFlag = true; } Element *FileImporterBibTeX::nextElement() { Token token = nextToken(); if (token == tAt) { const QString elementType = readSimpleString(); const QString elementTypeLower = elementType.toLower(); if (elementTypeLower == QStringLiteral("comment")) { ++m_statistics.countCommentCommand; return readCommentElement(); } else if (elementTypeLower == QStringLiteral("string")) return readMacroElement(); else if (elementTypeLower == QStringLiteral("preamble")) return readPreambleElement(); else if (elementTypeLower == QStringLiteral("import")) { qCDebug(LOG_KBIBTEX_IO) << "Skipping potential HTML/JavaScript @import statement near line" << m_lineNo; emit message(SeverityInfo, QString(QStringLiteral("Skipping potential HTML/JavaScript @import statement near line %1")).arg(m_lineNo)); return nullptr; } else if (!elementType.isEmpty()) return readEntryElement(elementType); else { qCWarning(LOG_KBIBTEX_IO) << "Element type after '@' is empty or invalid near line" << m_lineNo; emit message(SeverityError, QString(QStringLiteral("Element type after '@' is empty or invalid near line %1")).arg(m_lineNo)); return nullptr; } } else if (token == tUnknown && m_nextChar == QLatin1Char('%')) { /// do not complain about LaTeX-like comments, just eat them ++m_statistics.countCommentPercent; return readPlainCommentElement(QString()); } else if (token == tUnknown) { if (m_nextChar.isLetter()) { qCDebug(LOG_KBIBTEX_IO) << "Unknown character" << m_nextChar << "near line" << m_lineNo << "(" << m_prevLine << endl << m_currentLine << ")" << ", treating as comment"; emit message(SeverityInfo, QString(QStringLiteral("Unknown character '%1' near line %2, treating as comment")).arg(m_nextChar).arg(m_lineNo)); } else if (m_nextChar.isPrint()) { qCDebug(LOG_KBIBTEX_IO) << "Unknown character" << m_nextChar << "(" << QString(QStringLiteral("0x%1")).arg(m_nextChar.unicode(), 4, 16, QLatin1Char('0')) << ") near line" << m_lineNo << "(" << m_prevLine << endl << m_currentLine << ")" << ", treating as comment"; emit message(SeverityInfo, QString(QStringLiteral("Unknown character '%1' (0x%2) near line %3, treating as comment")).arg(m_nextChar).arg(m_nextChar.unicode(), 4, 16, QLatin1Char('0')).arg(m_lineNo)); } else { qCDebug(LOG_KBIBTEX_IO) << "Unknown character" << QString(QStringLiteral("0x%1")).arg(m_nextChar.unicode(), 4, 16, QLatin1Char('0')) << "near line" << m_lineNo << "(" << m_prevLine << endl << m_currentLine << ")" << ", treating as comment"; emit message(SeverityInfo, QString(QStringLiteral("Unknown character 0x%1 near line %2, treating as comment")).arg(m_nextChar.unicode(), 4, 16, QLatin1Char('0')).arg(m_lineNo)); } ++m_statistics.countNoCommentQuote; return readPlainCommentElement(QString(m_prevChar) + m_nextChar); } if (token != tEOF) { qCWarning(LOG_KBIBTEX_IO) << "Don't know how to parse next token of type" << tokenidToString(token) << "in line" << m_lineNo << "(" << m_prevLine << endl << m_currentLine << ")" << endl; emit message(SeverityError, QString(QStringLiteral("Don't know how to parse next token of type %1 in line %2")).arg(tokenidToString(token)).arg(m_lineNo)); } return nullptr; } Comment *FileImporterBibTeX::readCommentElement() { if (!readCharUntil(QStringLiteral("{("))) return nullptr; return new Comment(EncoderLaTeX::instance().decode(readBracketString())); } Comment *FileImporterBibTeX::readPlainCommentElement(const QString &prefix) { QString result = EncoderLaTeX::instance().decode(prefix + readLine()); while (m_nextChar == QLatin1Char('\n') || m_nextChar == QLatin1Char('\r')) readChar(); while (!m_nextChar.isNull() && m_nextChar != QLatin1Char('@')) { const QChar nextChar = m_nextChar; const QString line = readLine(); while (m_nextChar == QLatin1Char('\n') || m_nextChar == QLatin1Char('\r')) readChar(); result.append(EncoderLaTeX::instance().decode((nextChar == QLatin1Char('%') ? QString() : QString(nextChar)) + line)); } if (result.startsWith(QStringLiteral("x-kbibtex"))) { qCWarning(LOG_KBIBTEX_IO) << "Plain comment element starts with 'x-kbibtex', this should not happen"; emit message(SeverityWarning, QStringLiteral("Plain comment element starts with 'x-kbibtex', this should not happen")); /// ignore special comments return nullptr; } return new Comment(result); } Macro *FileImporterBibTeX::readMacroElement() { Token token = nextToken(); while (token != tBracketOpen) { if (token == tEOF) { qCWarning(LOG_KBIBTEX_IO) << "Error in parsing macro near line" << m_lineNo << "(" << m_prevLine << endl << m_currentLine << "): Opening curly brace '{' expected"; emit message(SeverityError, QString(QStringLiteral("Error in parsing macro near line %1: Opening curly brace '{' expected")).arg(m_lineNo)); return nullptr; } token = nextToken(); } QString key = readSimpleString(); if (key.isEmpty()) { /// Cope with empty keys, /// duplicates are handled further below key = QStringLiteral("EmptyId"); } else if (!Encoder::containsOnlyAscii(key)) { /// Try to avoid non-ascii characters in ids const QString newKey = Encoder::instance().convertToPlainAscii(key); qCWarning(LOG_KBIBTEX_IO) << "Macro key" << key << "near line" << m_lineNo << "contains non-ASCII characters, converted to" << newKey; emit message(SeverityWarning, QString(QStringLiteral("Macro key '%1' near line %2 contains non-ASCII characters, converted to '%3'")).arg(key).arg(m_lineNo).arg(newKey)); key = newKey; } /// Check for duplicate entry ids, avoid collisions if (m_knownElementIds.contains(key)) { static const QString newIdPattern = QStringLiteral("%1-%2"); int idx = 2; QString newKey = newIdPattern.arg(key).arg(idx); while (m_knownElementIds.contains(newKey)) newKey = newIdPattern.arg(key).arg(++idx); qCDebug(LOG_KBIBTEX_IO) << "Duplicate macro key" << key << ", using replacement key" << newKey; emit message(SeverityWarning, QString(QStringLiteral("Duplicate macro key '%1', using replacement key '%2'")).arg(key, newKey)); key = newKey; } m_knownElementIds.insert(key); if (nextToken() != tAssign) { qCCritical(LOG_KBIBTEX_IO) << "Error in parsing macro" << key << "near line" << m_lineNo << "(" << m_prevLine << endl << m_currentLine << "): Assign symbol '=' expected"; emit message(SeverityError, QString(QStringLiteral("Error in parsing macro '%1' near line %2: Assign symbol '=' expected")).arg(key).arg(m_lineNo)); return nullptr; } Macro *macro = new Macro(key); do { bool isStringKey = false; QString text = readString(isStringKey); if (text.isNull()) { qCWarning(LOG_KBIBTEX_IO) << "Error in parsing macro" << key << "near line" << m_lineNo << "(" << m_prevLine << endl << m_currentLine << "): Could not read macro's text"; emit message(SeverityError, QString(QStringLiteral("Error in parsing macro '%1' near line %2: Could not read macro's text")).arg(key).arg(m_lineNo)); delete macro; return nullptr; } text = EncoderLaTeX::instance().decode(bibtexAwareSimplify(text)); if (isStringKey) macro->value().append(QSharedPointer(new MacroKey(text))); else macro->value().append(QSharedPointer(new PlainText(text))); token = nextToken(); } while (token == tDoublecross); return macro; } Preamble *FileImporterBibTeX::readPreambleElement() { Token token = nextToken(); while (token != tBracketOpen) { if (token == tEOF) { qCWarning(LOG_KBIBTEX_IO) << "Error in parsing preamble near line" << m_lineNo << "(" << m_prevLine << endl << m_currentLine << "): Opening curly brace '{' expected"; emit message(SeverityError, QString(QStringLiteral("Error in parsing preamble near line %1: Opening curly brace '{' expected")).arg(m_lineNo)); return nullptr; } token = nextToken(); } Preamble *preamble = new Preamble(); do { bool isStringKey = false; QString text = readString(isStringKey); if (text.isNull()) { qCWarning(LOG_KBIBTEX_IO) << "Error in parsing preamble near line" << m_lineNo << "(" << m_prevLine << endl << m_currentLine << "): Could not read preamble's text"; emit message(SeverityError, QString(QStringLiteral("Error in parsing preamble near line %1: Could not read preamble's text")).arg(m_lineNo)); delete preamble; return nullptr; } /// Remember: strings from preamble do not get encoded, /// may contain raw LaTeX commands and code text = bibtexAwareSimplify(text); if (isStringKey) preamble->value().append(QSharedPointer<MacroKey>(new MacroKey(text))); else preamble->value().append(QSharedPointer<PlainText>(new PlainText(text))); token = nextToken(); } while (token == tDoublecross); return preamble; } Entry *FileImporterBibTeX::readEntryElement(const QString &typeString) { Token token = nextToken(); while (token != tBracketOpen) { if (token == tEOF) { qCWarning(LOG_KBIBTEX_IO) << "Error in parsing entry near line" << m_lineNo << "(" << m_prevLine << endl << m_currentLine << "): Opening curly brace '{' expected"; emit message(SeverityError, QString(QStringLiteral("Error in parsing entry near line %1: Opening curly brace '{' expected")).arg(m_lineNo)); return nullptr; } token = nextToken(); } QString id = readSimpleString(QStringLiteral(",}"), true).trimmed(); if (id.isEmpty()) { if (m_nextChar == QLatin1Char(',') || m_nextChar == QLatin1Char('}')) { /// Cope with empty ids, /// duplicates are handled further below id = QStringLiteral("EmptyId"); } else { qCWarning(LOG_KBIBTEX_IO) << "Error in parsing entry near line" << m_lineNo << ":" << m_prevLine << endl << m_currentLine << "): Could not read entry id"; emit message(SeverityError, QString(QStringLiteral("Error in parsing preambentryle near line %1: Could not read entry id")).arg(m_lineNo)); return nullptr; } } else { if (id.contains(QStringLiteral("\\")) || id.contains(QStringLiteral("{"))) { const QString newId = EncoderLaTeX::instance().decode(id); qCWarning(LOG_KBIBTEX_IO) << "Entry id" << id << "near line" << m_lineNo << "contains backslashes or curly brackets, converted to" << newId; emit message(SeverityWarning, QString(QStringLiteral("Entry id '%1' near line %2 contains backslashes or curly brackets, converted to '%3'")).arg(id).arg(m_lineNo).arg(newId)); id = newId; } if (!Encoder::containsOnlyAscii(id)) { /// Try to avoid non-ascii characters in ids const QString newId = Encoder::instance().convertToPlainAscii(id); qCWarning(LOG_KBIBTEX_IO) << "Entry id" << id << "near line" << m_lineNo << "contains non-ASCII characters, converted to" << newId; emit message(SeverityWarning, QString(QStringLiteral("Entry id '%1' near line %2 contains non-ASCII characters, converted to '%3'")).arg(id).arg(m_lineNo).arg(newId)); id = newId; } } static const QVector<QChar> invalidIdCharacters = {QLatin1Char('{'), QLatin1Char('}'), QLatin1Char(',')}; for (const QChar &invalidIdCharacter : invalidIdCharacters) if (id.contains(invalidIdCharacter)) { qCWarning(LOG_KBIBTEX_IO) << "Entry id" << id << "near line" << m_lineNo << "contains invalid character" << invalidIdCharacter; emit message(SeverityError, QString(QStringLiteral("Entry id '%1' near line %2 contains invalid character '%3'")).arg(id).arg(m_lineNo).arg(invalidIdCharacter)); return nullptr; } /// Check for duplicate entry ids, avoid collisions if (m_knownElementIds.contains(id)) { static const QString newIdPattern = QStringLiteral("%1-%2"); int idx = 2; QString newId = newIdPattern.arg(id).arg(idx); while (m_knownElementIds.contains(newId)) newId = newIdPattern.arg(id).arg(++idx); qCDebug(LOG_KBIBTEX_IO) << "Duplicate id" << id << "near line" << m_lineNo << ", using replacement id" << newId; emit message(SeverityInfo, QString(QStringLiteral("Duplicate id '%1' near line %2, using replacement id '%3'")).arg(id).arg(m_lineNo).arg(newId)); id = newId; } m_knownElementIds.insert(id); Entry *entry = new Entry(BibTeXEntries::instance().format(typeString, m_keywordCasing), id); token = nextToken(); do { if (token == tBracketClose) break; else if (token == tEOF) { qCWarning(LOG_KBIBTEX_IO) << "Unexpected end of data in entry" << id << "near line" << m_lineNo << ":" << m_prevLine << endl << m_currentLine; emit message(SeverityError, QString(QStringLiteral("Unexpected end of data in entry '%1' near line %2")).arg(id).arg(m_lineNo)); delete entry; return nullptr; } else if (token != tComma) { if (m_nextChar.isLetter()) { qCWarning(LOG_KBIBTEX_IO) << "Error in parsing entry" << id << "near line" << m_lineNo << "(" << m_prevLine << endl << m_currentLine << "): Comma symbol ',' expected but got character" << m_nextChar << "(token" << tokenidToString(token) << ")"; emit message(SeverityError, QString(QStringLiteral("Error in parsing entry '%1' near line %2: Comma symbol ',' expected but got character '%3' (token %4)")).arg(id).arg(m_lineNo).arg(m_nextChar).arg(tokenidToString(token))); } else if (m_nextChar.isPrint()) { qCWarning(LOG_KBIBTEX_IO) << "Error in parsing entry" << id << "near line" << m_lineNo << "(" << m_prevLine << endl << m_currentLine << "): Comma symbol ',' expected but got character" << m_nextChar << "(" << QString(QStringLiteral("0x%1")).arg(m_nextChar.unicode(), 4, 16, QLatin1Char('0')) << ", token" << tokenidToString(token) << ")"; emit message(SeverityError, QString(QStringLiteral("Error in parsing entry '%1' near line %2: Comma symbol ',' expected but got character '%3' (0x%4, token %5)")).arg(id).arg(m_lineNo).arg(m_nextChar).arg(m_nextChar.unicode(), 4, 16, QLatin1Char('0')).arg(tokenidToString(token))); } else { qCWarning(LOG_KBIBTEX_IO) << "Error in parsing entry" << id << "near line" << m_lineNo << "(" << m_prevLine << endl << m_currentLine << "): Comma symbol (,) expected but got character" << QString(QStringLiteral("0x%1")).arg(m_nextChar.unicode(), 4, 16, QLatin1Char('0')) << "(token" << tokenidToString(token) << ")"; emit message(SeverityError, QString(QStringLiteral("Error in parsing entry '%1' near line %2: Comma symbol ',' expected but got character 0x%3 (token %4)")).arg(id).arg(m_lineNo).arg(m_nextChar.unicode(), 4, 16, QLatin1Char('0')).arg(tokenidToString(token))); } delete entry; return nullptr; } QString keyName = BibTeXFields::instance().format(readSimpleString(), m_keywordCasing); if (keyName.isEmpty()) { token = nextToken(); if (token == tBracketClose) { /// Most often it is the case that the previous line ended with a comma, /// implying that this entry continues, but instead it gets closed by /// a closing curly bracket. qCDebug(LOG_KBIBTEX_IO) << "Issue while parsing entry" << id << "near line" << m_lineNo << "(" << m_prevLine << endl << m_currentLine << "): Last key-value pair ended with a non-conformant comma, ignoring that"; emit message(SeverityInfo, QString(QStringLiteral("Issue while parsing entry '%1' near line %2: Last key-value pair ended with a non-conformant comma, ignoring that")).arg(id).arg(m_lineNo)); break; } else { /// Something looks terribly wrong qCWarning(LOG_KBIBTEX_IO) << "Error in parsing entry" << id << "near line" << m_lineNo << "(" << m_prevLine << endl << m_currentLine << "): Closing curly bracket expected, but found" << tokenidToString(token); emit message(SeverityError, QString(QStringLiteral("Error in parsing entry '%1' near line %2: Closing curly bracket expected, but found %3")).arg(id).arg(m_lineNo).arg(tokenidToString(token))); delete entry; return nullptr; } } /// Try to avoid non-ascii characters in keys const QString newkeyName = Encoder::instance().convertToPlainAscii(keyName); if (newkeyName != keyName) { qCWarning(LOG_KBIBTEX_IO) << "Field name " << keyName << "near line" << m_lineNo << "contains non-ASCII characters, converted to" << newkeyName; emit message(SeverityWarning, QString(QStringLiteral("Field name '%1' near line %2 contains non-ASCII characters, converted to '%3'")).arg(keyName).arg(m_lineNo).arg(newkeyName)); keyName = newkeyName; } token = nextToken(); if (token != tAssign) { qCWarning(LOG_KBIBTEX_IO) << "Error in parsing entry" << id << ", field name" << keyName << "near line" << m_lineNo << "(" << m_prevLine << endl << m_currentLine << "): Assign symbol '=' expected after field name"; emit message(SeverityError, QString(QStringLiteral("Error in parsing entry '%1', field name '%2' near line %3: Assign symbol '=' expected after field name")).arg(id, keyName).arg(m_lineNo)); delete entry; return nullptr; } Value value; /// check for duplicate fields if (entry->contains(keyName)) { if (keyName.toLower() == Entry::ftKeywords || keyName.toLower() == Entry::ftUrl) { /// Special handling of keywords and URLs: instead of using fallback names /// like "keywords2", "keywords3", ..., append new keywords to /// already existing keyword value value = entry->value(keyName); } else if (m_keysForPersonDetection.contains(keyName.toLower())) { /// Special handling of authors and editors: instead of using fallback names /// like "author2", "author3", ..., append new authors to /// already existing author value value = entry->value(keyName); } else { int i = 2; QString appendix = QString::number(i); while (entry->contains(keyName + appendix)) { ++i; appendix = QString::number(i); } qCDebug(LOG_KBIBTEX_IO) << "Entry" << id << "already contains a key" << keyName << "near line" << m_lineNo << "(" << m_prevLine << endl << m_currentLine << "), using" << (keyName + appendix); emit message(SeverityWarning, QString(QStringLiteral("Entry '%1' already contains a key '%2' near line %4, using '%3'")).arg(id, keyName, keyName + appendix).arg(m_lineNo)); keyName += appendix; } } token = readValue(value, keyName); if (token != tBracketClose && token != tComma) { qCWarning(LOG_KBIBTEX_IO) << "Failed to read value in entry" << id << ", field name" << keyName << "near line" << m_lineNo << "(" << m_prevLine << endl << m_currentLine << ")"; emit message(SeverityError, QString(QStringLiteral("Failed to read value in entry '%1', field name '%2' near line %3")).arg(id, keyName).arg(m_lineNo)); delete entry; return nullptr; } entry->insert(keyName, value); } while (true); return entry; } FileImporterBibTeX::Token FileImporterBibTeX::nextToken() { if (!skipWhiteChar()) { /// Some error occurred while reading from data stream return tEOF; } Token result = tUnknown; switch (m_nextChar.toLatin1()) { case '@': result = tAt; break; case '{': case '(': result = tBracketOpen; break; case '}': case ')': result = tBracketClose; break; case ',': result = tComma; break; case '=': result = tAssign; break; case '#': result = tDoublecross; break; default: if (m_textStream->atEnd()) result = tEOF; } if (m_nextChar != QLatin1Char('%')) { /// Unclean solution, but necessary for comments /// that have a percent sign as a prefix readChar(); } return result; } QString FileImporterBibTeX::readString(bool &isStringKey) { /// Most often it is not a string key isStringKey = false; if (!skipWhiteChar()) { /// Some error occurred while reading from data stream return QString(); ///< return null QString } switch (m_nextChar.toLatin1()) { case '{': case '(': { ++m_statistics.countCurlyBrackets; const QString result = readBracketString(); return result; } case '"': { ++m_statistics.countQuotationMarks; const QString result = readQuotedString(); return result; } default: isStringKey = true; const QString result = readSimpleString(); return result; } } QString FileImporterBibTeX::readSimpleString(const QString &until, const bool readNestedCurlyBrackets) { static const QString extraAlphaNumChars = QString(QStringLiteral("?'`-_:.+/$\\\"&")); QString result; ///< 'result' is Null on purpose: simple strings cannot be empty in contrast to e.g. quoted strings if (!skipWhiteChar()) { /// Some error occurred while reading from data stream return QString(); ///< return null QString } QChar prevChar = QChar(0x00); while (!m_nextChar.isNull()) { if (readNestedCurlyBrackets && m_nextChar == QLatin1Char('{') && prevChar != QLatin1Char('\\')) { int depth = 1; while (depth > 0) { result.append(m_nextChar); prevChar = m_nextChar; if (!readChar()) return result; if (m_nextChar == QLatin1Char('{') && prevChar != QLatin1Char('\\')) ++depth; else if (m_nextChar == QLatin1Char('}') && prevChar != QLatin1Char('\\')) --depth; } result.append(m_nextChar); prevChar = m_nextChar; if (!readChar()) return result; } const ushort nextCharUnicode = m_nextChar.unicode(); if (!until.isEmpty()) { /// Variable "until" has user-defined value if (m_nextChar == QLatin1Char('\n') || m_nextChar == QLatin1Char('\r') || until.contains(m_nextChar)) { /// Force break on line-breaks or if one of the "until" chars has been read break; } else { /// Append read character to final result result.append(m_nextChar); } } else if ((nextCharUnicode >= static_cast<ushort>('a') && nextCharUnicode <= static_cast<ushort>('z')) || (nextCharUnicode >= static_cast<ushort>('A') && nextCharUnicode <= static_cast<ushort>('Z')) || (nextCharUnicode >= static_cast<ushort>('0') && nextCharUnicode <= static_cast<ushort>('9')) || extraAlphaNumChars.contains(m_nextChar)) { /// Accept default set of alpha-numeric characters result.append(m_nextChar); } else break; prevChar = m_nextChar; if (!readChar()) break; } return result; } QString FileImporterBibTeX::readQuotedString() { QString result(0, QChar()); ///< Construct an empty but non-null string Q_ASSERT_X(m_nextChar == QLatin1Char('"'), "QString FileImporterBibTeX::readQuotedString()", "m_nextChar is not '\"'"); if (!readChar()) { /// Some error occurred while reading from data stream return QString(); ///< return null QString } while (!m_nextChar.isNull()) { if (m_nextChar == QLatin1Char('"') && m_prevChar != QLatin1Char('\\') && m_prevChar != QLatin1Char('{')) break; else result.append(m_nextChar); if (!readChar()) { /// Some error occurred while reading from data stream return QString(); ///< return null QString } } if (!readChar()) { /// Some error occurred while reading from data stream return QString(); ///< return null QString } /// Remove protection around quotation marks result.replace(QStringLiteral("{\"}"), QStringLiteral("\"")); return result; } QString FileImporterBibTeX::readBracketString() { static const QChar backslash = QLatin1Char('\\'); QString result(0, QChar()); ///< Construct an empty but non-null string const QChar openingBracket = m_nextChar; const QChar closingBracket = openingBracket == QLatin1Char('{') ? QLatin1Char('}') : (openingBracket == QLatin1Char('(') ? QLatin1Char(')') : QChar()); Q_ASSERT_X(!closingBracket.isNull(), "QString FileImporterBibTeX::readBracketString()", "openingBracket==m_nextChar is neither '{' nor '('"); int counter = 1; if (!readChar()) { /// Some error occurred while reading from data stream return QString(); ///< return null QString } while (!m_nextChar.isNull()) { if (m_nextChar == openingBracket && m_prevChar != backslash) ++counter; else if (m_nextChar == closingBracket && m_prevChar != backslash) --counter; if (counter == 0) { break; } else result.append(m_nextChar); if (!readChar()) { /// Some error occurred while reading from data stream return QString(); ///< return null QString } } if (!readChar()) { /// Some error occurred while reading from data stream return QString(); ///< return null QString } return result; } FileImporterBibTeX::Token FileImporterBibTeX::readValue(Value &value, const QString &key) { Token token = tUnknown; const QString iKey = key.toLower(); static const QSet<QString> verbatimKeys {Entry::ftColor.toLower(), Entry::ftCrossRef.toLower(), Entry::ftXData.toLower()}; do { bool isStringKey = false; const QString rawText = readString(isStringKey); if (rawText.isNull()) return tEOF; QString text = EncoderLaTeX::instance().decode(rawText); /// for all entries except for abstracts ... if (iKey != Entry::ftAbstract && !(iKey.startsWith(Entry::ftUrl) && !iKey.startsWith(Entry::ftUrlDate)) && !iKey.startsWith(Entry::ftLocalFile) && !iKey.startsWith(Entry::ftFile)) { /// ... remove redundant spaces including newlines text = bibtexAwareSimplify(text); } /// abstracts will keep their formatting (regarding line breaks) /// as requested by Thomas Jensch via mail (20 October 2010) /// Maintain statistics on if (book) titles are protected /// by surrounding curly brackets if (iKey == Entry::ftTitle || iKey == Entry::ftBookTitle) { if (text[0] == QLatin1Char('{') && text[text.length() - 1] == QLatin1Char('}')) ++m_statistics.countProtectedTitle; else ++m_statistics.countUnprotectedTitle; } if (m_keysForPersonDetection.contains(iKey)) { if (isStringKey) value.append(QSharedPointer<MacroKey>(new MacroKey(text))); else { CommaContainment comma = ccContainsComma; parsePersonList(text, value, &comma, m_lineNo, this); /// Update statistics on name formatting if (comma == ccContainsComma) ++m_statistics.countLastNameFirst; else ++m_statistics.countFirstNameFirst; } } else if (iKey == Entry::ftPages) { static const QRegularExpression rangeInAscii(QStringLiteral("\\s*--?\\s*")); text.replace(rangeInAscii, QChar(0x2013)); if (isStringKey) value.append(QSharedPointer<MacroKey>(new MacroKey(text))); else value.append(QSharedPointer<PlainText>(new PlainText(text))); } else if ((iKey.startsWith(Entry::ftUrl) && !iKey.startsWith(Entry::ftUrlDate)) || iKey.startsWith(Entry::ftLocalFile) || iKey.startsWith(Entry::ftFile) || iKey == QStringLiteral("ee") || iKey == QStringLiteral("biburl")) { if (isStringKey) value.append(QSharedPointer<MacroKey>(new MacroKey(text))); else { /// Assumption: in fields like Url or LocalFile, file names are separated by ; static const QRegularExpression semicolonSpace = QRegularExpression(QStringLiteral("[;]\\s*")); const QStringList fileList = rawText.split(semicolonSpace, QString::SkipEmptyParts); for (const QString &filename : fileList) { value.append(QSharedPointer<VerbatimText>(new VerbatimText(filename))); } } } else if (iKey.startsWith(Entry::ftFile)) { if (isStringKey) value.append(QSharedPointer<MacroKey>(new MacroKey(text))); else { /// Assumption: this field was written by Mendeley, which uses /// a very strange format for file names: /// :C$\backslash$:/Users/BarisEvrim/Documents/Mendeley Desktop/GeversPAMI10.pdf:pdf /// :: /// :Users/Fred/Library/Application Support/Mendeley Desktop/Downloaded/Hasselman et al. - 2011 - (Still) Growing Up What should we be a realist about in the cognitive and behavioural sciences Abstract.pdf:pdf const QRegularExpressionMatch match = KBibTeX::mendeleyFileRegExp.match(rawText); if (match.hasMatch()) { static const QString backslashLaTeX = QStringLiteral("$\\backslash$"); QString filename = match.captured(1).remove(backslashLaTeX); if (filename.startsWith(QStringLiteral("home/")) || filename.startsWith(QStringLiteral("Users/"))) { /// Mendeley doesn't have a slash at the beginning of absolute paths, /// so, insert one /// See bug 19833, comment 5: https://gna.org/bugs/index.php?19833#comment5 filename.prepend(QLatin1Char('/')); } value.append(QSharedPointer<VerbatimText>(new VerbatimText(filename))); } else value.append(QSharedPointer<VerbatimText>(new VerbatimText(text))); } } else if (iKey == Entry::ftMonth) { if (isStringKey) { static const QRegularExpression monthThreeChars(QStringLiteral("^[a-z]{3}"), QRegularExpression::CaseInsensitiveOption); if (monthThreeChars.match(text).hasMatch()) text = text.left(3).toLower(); value.append(QSharedPointer<MacroKey>(new MacroKey(text))); } else value.append(QSharedPointer<PlainText>(new PlainText(text))); } else if (iKey.startsWith(Entry::ftDOI)) { if (isStringKey) value.append(QSharedPointer<MacroKey>(new MacroKey(text))); else { /// Take care of "; " which separates multiple DOIs, but which may baffle the regexp QString preprocessedText = rawText; preprocessedText.replace(QStringLiteral("; "), QStringLiteral(" ")); /// Extract everything that looks like a DOI using a regular expression, /// ignore everything else QRegularExpressionMatchIterator doiRegExpMatchIt = KBibTeX::doiRegExp.globalMatch(preprocessedText); while (doiRegExpMatchIt.hasNext()) { const QRegularExpressionMatch doiRegExpMatch = doiRegExpMatchIt.next(); value.append(QSharedPointer<VerbatimText>(new VerbatimText(doiRegExpMatch.captured(0)))); } } } else if (iKey == Entry::ftKeywords) { if (isStringKey) value.append(QSharedPointer<MacroKey>(new MacroKey(text))); else { char splitChar; const QList<QSharedPointer<Keyword> > keywords = splitKeywords(text, &splitChar); for (const auto &keyword : keywords) value.append(keyword); /// Memorize (some) split characters for later use /// (e.g. when writing file again) if (splitChar == ';') m_statistics.mostRecentListSeparator = QStringLiteral("; "); else if (splitChar == ',') m_statistics.mostRecentListSeparator = QStringLiteral(", "); } } else if (verbatimKeys.contains(iKey)) { if (isStringKey) value.append(QSharedPointer<MacroKey>(new MacroKey(text))); else value.append(QSharedPointer<VerbatimText>(new VerbatimText(rawText))); } else { if (isStringKey) value.append(QSharedPointer<MacroKey>(new MacroKey(text))); else value.append(QSharedPointer<PlainText>(new PlainText(text))); } token = nextToken(); } while (token == tDoublecross); return token; } bool FileImporterBibTeX::readChar() { /// Memorize previous char m_prevChar = m_nextChar; if (m_textStream->atEnd()) { /// At end of data stream m_nextChar = QChar::Null; return false; } /// Read next char *m_textStream >> m_nextChar; /// Test for new line if (m_nextChar == QLatin1Char('\n')) { /// Update variables tracking line numbers and line content ++m_lineNo; m_prevLine = m_currentLine; m_currentLine.clear(); } else { /// Add read char to current line m_currentLine.append(m_nextChar); } return true; } bool FileImporterBibTeX::readCharUntil(const QString &until) { Q_ASSERT_X(!until.isEmpty(), "bool FileImporterBibTeX::readCharUntil(const QString &until)", "\"until\" is empty or invalid"); bool result = true; while (!until.contains(m_nextChar) && (result = readChar())); return result; } bool FileImporterBibTeX::skipWhiteChar() { bool result = true; while ((m_nextChar.isSpace() || m_nextChar == QLatin1Char('\t') || m_nextChar == QLatin1Char('\n') || m_nextChar == QLatin1Char('\r')) && result) result = readChar(); return result; } QString FileImporterBibTeX::readLine() { QString result; while (m_nextChar != QLatin1Char('\n') && m_nextChar != QLatin1Char('\r') && readChar()) result.append(m_nextChar); return result; } QList<QSharedPointer<Keyword> > FileImporterBibTeX::splitKeywords(const QString &text, char *usedSplitChar) { QList<QSharedPointer<Keyword> > result; static const QHash<char, QRegularExpression> splitAlong = { {'\n', QRegularExpression(QStringLiteral("\\s*\n\\s*"))}, {';', QRegularExpression(QStringLiteral("\\s*;\\s*"))}, {',', QRegularExpression(QString("\\s*,\\s*"))} }; if (usedSplitChar != nullptr) *usedSplitChar = '\0'; for (auto it = splitAlong.constBegin(); it != splitAlong.constEnd(); ++it) { /// check if character is contained in text (should be cheap to test) if (text.contains(QLatin1Char(it.key()))) { /// split text along a pattern like spaces-splitchar-spaces /// extract keywords static const QRegularExpression unneccessarySpacing(QStringLiteral("[ \n\r\t]+")); const QStringList keywords = text.split(it.value(), QString::SkipEmptyParts).replaceInStrings(unneccessarySpacing, QStringLiteral(" ")); /// build QList of Keyword objects from keywords for (const QString &keyword : keywords) { result.append(QSharedPointer<Keyword>(new Keyword(keyword))); } /// Memorize (some) split characters for later use /// (e.g. when writing file again) if (usedSplitChar != nullptr) *usedSplitChar = it.key(); /// no more splits necessary break; } } /// no split was performed, so whole text must be a single keyword if (result.isEmpty()) result.append(QSharedPointer<Keyword>(new Keyword(text))); return result; } QList<QSharedPointer<Person> > FileImporterBibTeX::splitNames(const QString &text, const int line_number, QObject *parent) { /// Case: Smith, John and Johnson, Tim /// Case: Smith, John and Fulkerson, Ford and Johnson, Tim /// Case: Smith, John, Fulkerson, Ford, and Johnson, Tim /// Case: John Smith and Tim Johnson /// Case: John Smith and Ford Fulkerson and Tim Johnson /// Case: Smith, John, Johnson, Tim /// Case: Smith, John, Fulkerson, Ford, Johnson, Tim /// Case: John Smith, Tim Johnson /// Case: John Smith, Tim Johnson, Ford Fulkerson /// Case: Smith, John ; Johnson, Tim ; Fulkerson, Ford (IEEE Xplore) /// German case: Robert A. Gehring und Bernd Lutterbeck QString internalText = text; /// Remove invalid characters such as dots or (double) daggers for footnotes static const QList<QChar> invalidChars {QChar(0x00b7), QChar(0x2020), QChar(0x2217), QChar(0x2021), QChar(0x002a), QChar(0x21d1) /** Upwards double arrow */}; for (const auto &invalidChar : invalidChars) /// Replacing daggers with commas ensures that they act as persons' names separator internalText = internalText.replace(invalidChar, QChar(',')); /// Remove numbers to footnotes static const QRegularExpression numberFootnoteRegExp(QStringLiteral("(\\w)\\d+\\b")); internalText = internalText.replace(numberFootnoteRegExp, QStringLiteral("\\1")); /// Remove academic degrees static const QRegularExpression academicDegreesRegExp(QStringLiteral("(,\\s*)?(MA|PhD)\\b")); internalText = internalText.remove(academicDegreesRegExp); /// Remove email addresses static const QRegularExpression emailAddressRegExp(QStringLiteral("\\b[a-zA-Z0-9][a-zA-Z0-9._-]+[a-zA-Z0-9]@[a-z0-9][a-z0-9-]*([.][a-z0-9-]+)*([.][a-z]+)+\\b")); internalText = internalText.remove(emailAddressRegExp); /// Split input string into tokens which are either name components (first or last name) /// or full names (composed of first and last name), depending on the input string's structure static const QRegularExpression split(QStringLiteral("\\s*([,]+|[,]*\\b[au]nd\\b|[;]|&|\\n|\\s{4,})\\s*")); const QStringList authorTokenList = internalText.split(split, QString::SkipEmptyParts); bool containsSpace = true; for (QStringList::ConstIterator it = authorTokenList.constBegin(); containsSpace && it != authorTokenList.constEnd(); ++it) containsSpace = (*it).contains(QChar(' ')); QList<QSharedPointer<Person> > result; result.reserve(authorTokenList.size()); if (containsSpace) { /// Tokens look like "John Smith" for (const QString &authorToken : authorTokenList) { QSharedPointer<Person> person = personFromString(authorToken, nullptr, line_number, parent); if (!person.isNull()) result.append(person); } } else { /// Tokens look like "Smith" or "John" /// Assumption: two consecutive tokens form a name for (QStringList::ConstIterator it = authorTokenList.constBegin(); it != authorTokenList.constEnd(); ++it) { QString lastname = *it; ++it; if (it != authorTokenList.constEnd()) { lastname += QStringLiteral(", ") + (*it); QSharedPointer<Person> person = personFromString(lastname, nullptr, line_number, parent); if (!person.isNull()) result.append(person); } else break; } } return result; } void FileImporterBibTeX::parsePersonList(const QString &text, Value &value, const int line_number, QObject *parent) { parsePersonList(text, value, nullptr, line_number, parent); } void FileImporterBibTeX::parsePersonList(const QString &text, Value &value, CommaContainment *comma, const int line_number, QObject *parent) { static const QString tokenAnd = QStringLiteral("and"); static const QString tokenOthers = QStringLiteral("others"); static QStringList tokens; contextSensitiveSplit(text, tokens); if (tokens.count() > 0) { if (tokens[0] == tokenAnd) { qCInfo(LOG_KBIBTEX_IO) << "Person list starts with" << tokenAnd << "near line" << line_number; if (parent != nullptr) QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, SeverityWarning), Q_ARG(QString, QString(QStringLiteral("Person list starts with 'and' near line %1")).arg(line_number))); } else if (tokens.count() > 1 && tokens[tokens.count() - 1] == tokenAnd) { qCInfo(LOG_KBIBTEX_IO) << "Person list ends with" << tokenAnd << "near line" << line_number; if (parent != nullptr) QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, SeverityWarning), Q_ARG(QString, QString(QStringLiteral("Person list ends with 'and' near line %1")).arg(line_number))); } if (tokens[0] == tokenOthers) { qCInfo(LOG_KBIBTEX_IO) << "Person list starts with" << tokenOthers << "near line" << line_number; if (parent != nullptr) QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, SeverityWarning), Q_ARG(QString, QString(QStringLiteral("Person list starts with 'others' near line %1")).arg(line_number))); } else if (tokens[tokens.count() - 1] == tokenOthers && (tokens.count() < 3 || tokens[tokens.count() - 2] != tokenAnd)) { - qCInfo(LOG_KBIBTEX_IO) << "Person list ends with" << tokenOthers << "but is not preceeded with name and" << tokenAnd << "near line" << line_number; + qCInfo(LOG_KBIBTEX_IO) << "Person list ends with" << tokenOthers << "but is not preceded with name and" << tokenAnd << "near line" << line_number; if (parent != nullptr) - QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, SeverityWarning), Q_ARG(QString, QString(QStringLiteral("Person list ends with 'others' but is not preceeded with name and 'and' near line %1")).arg(line_number))); + QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, SeverityWarning), Q_ARG(QString, QString(QStringLiteral("Person list ends with 'others' but is not preceded with name and 'and' near line %1")).arg(line_number))); } } int nameStart = 0; QString prevToken; for (int i = 0; i < tokens.count(); ++i) { if (tokens[i] == tokenAnd) { if (prevToken == tokenAnd) { qCInfo(LOG_KBIBTEX_IO) << "Two subsequent" << tokenAnd << "found in person list near line" << line_number; if (parent != nullptr) QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, SeverityWarning), Q_ARG(QString, QString(QStringLiteral("Two subsequent 'and' found in person list near line %1")).arg(line_number))); } else if (nameStart < i) { const QSharedPointer<Person> person = personFromTokenList(tokens.mid(nameStart, i - nameStart), comma, line_number, parent); if (!person.isNull()) value.append(person); else { qCInfo(LOG_KBIBTEX_IO) << "Text" << tokens.mid(nameStart, i - nameStart).join(' ') << "does not form a name near line" << line_number; if (parent != nullptr) QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, SeverityWarning), Q_ARG(QString, QString(QStringLiteral("Text '%1' does not form a name near line %2")).arg(tokens.mid(nameStart, i - nameStart).join(' ')).arg(line_number))); } } else { qCInfo(LOG_KBIBTEX_IO) << "Found" << tokenAnd << "but no name before it near line" << line_number; if (parent != nullptr) QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, SeverityWarning), Q_ARG(QString, QString(QStringLiteral("Found 'and' but no name before it near line %1")).arg(line_number))); } nameStart = i + 1; } else if (tokens[i] == tokenOthers) { if (i < tokens.count() - 1) { qCInfo(LOG_KBIBTEX_IO) << "Special word" << tokenOthers << "found before last position in person name near line" << line_number; if (parent != nullptr) QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, SeverityWarning), Q_ARG(QString, QString(QStringLiteral("Special word 'others' found before last position in person name near line %1")).arg(line_number))); } else value.append(QSharedPointer<PlainText>(new PlainText(QStringLiteral("others")))); nameStart = tokens.count() + 1; } prevToken = tokens[i]; } if (nameStart < tokens.count()) { const QSharedPointer<Person> person = personFromTokenList(tokens.mid(nameStart), comma, line_number, parent); if (!person.isNull()) value.append(person); else { qCInfo(LOG_KBIBTEX_IO) << "Text" << tokens.mid(nameStart).join(' ') << "does not form a name near line" << line_number; if (parent != nullptr) QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, SeverityWarning), Q_ARG(QString, QString(QStringLiteral("Text '%1' does not form a name near line %2")).arg(tokens.mid(nameStart).join(' ')).arg(line_number))); } } } QSharedPointer<Person> FileImporterBibTeX::personFromString(const QString &name, const int line_number, QObject *parent) { // TODO Merge with FileImporter::splitName return personFromString(name, nullptr, line_number, parent); } QSharedPointer<Person> FileImporterBibTeX::personFromString(const QString &name, CommaContainment *comma, const int line_number, QObject *parent) { // TODO Merge with FileImporter::splitName and FileImporterBibTeX::contextSensitiveSplit static QStringList tokens; contextSensitiveSplit(name, tokens); return personFromTokenList(tokens, comma, line_number, parent); } QSharedPointer<Person> FileImporterBibTeX::personFromTokenList(const QStringList &tokens, CommaContainment *comma, const int line_number, QObject *parent) { if (comma != nullptr) *comma = ccNoComma; /// Simple case: provided list of tokens is empty, return invalid Person if (tokens.isEmpty()) return QSharedPointer<Person>(); /** * Sequence of tokens may contain somewhere a comma, like * "Tuckwell," "Peter". In this case, fill two string lists: * one with tokens before the comma, one with tokens after the * comma (excluding the comma itself). Example: * partA = ( "Tuckwell" ); partB = ( "Peter" ); partC = ( "Jr." ) * If a comma was found, boolean variable gotComma is set. */ QStringList partA, partB, partC; int commaCount = 0; for (const QString &token : tokens) { /// Position where comma was found, or -1 if no comma in token int p = -1; if (commaCount < 2) { /// Only check if token contains comma /// if no comma was found before int bracketCounter = 0; for (int i = 0; i < token.length(); ++i) { /// Consider opening curly brackets if (token[i] == QChar('{')) ++bracketCounter; /// Consider closing curly brackets else if (token[i] == QChar('}')) --bracketCounter; /// Only if outside any open curly bracket environments /// consider comma characters else if (bracketCounter == 0 && token[i] == QChar(',')) { /// Memorize comma's position and break from loop p = i; break; } else if (bracketCounter < 0) { /// Should never happen: more closing brackets than opening ones qCWarning(LOG_KBIBTEX_IO) << "Opening and closing brackets do not match near line" << line_number; if (parent != nullptr) QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, SeverityWarning), Q_ARG(QString, QString(QStringLiteral("Opening and closing brackets do not match near line %1")).arg(line_number))); } } } if (p >= 0) { if (commaCount == 0) { if (p > 0) partA.append(token.left(p)); if (p < token.length() - 1) partB.append(token.mid(p + 1)); } else if (commaCount == 1) { if (p > 0) partB.append(token.left(p)); if (p < token.length() - 1) partC.append(token.mid(p + 1)); } ++commaCount; } else if (commaCount == 0) partA.append(token); else if (commaCount == 1) partB.append(token); else if (commaCount == 2) partC.append(token); } if (commaCount > 0) { if (comma != nullptr) *comma = ccContainsComma; return QSharedPointer<Person>(new Person(partC.isEmpty() ? partB.join(QChar(' ')) : partC.join(QChar(' ')), partA.join(QChar(' ')), partC.isEmpty() ? QString() : partB.join(QChar(' ')))); } /** * PubMed uses a special writing style for names, where the * last name is followed by single capital letters, each being * the first letter of each first name. Example: Tuckwell P H * So, check how many single capital letters are at the end of * the given token list */ partA.clear(); partB.clear(); bool singleCapitalLetters = true; QStringList::ConstIterator it = tokens.constEnd(); while (it != tokens.constBegin()) { --it; if (singleCapitalLetters && it->length() == 1 && it->at(0).isUpper()) partB.prepend(*it); else { singleCapitalLetters = false; partA.prepend(*it); } } if (!partB.isEmpty()) { /// Name was actually given in PubMed format return QSharedPointer<Person>(new Person(partB.join(QChar(' ')), partA.join(QChar(' ')))); } /** * Normally, the last upper case token in a name is the last name * (last names consisting of multiple space-separated parts *have* * to be protected by {...}), but some languages have fill words * in lower case belonging to the last name as well (example: "van"). * In addition, some languages have capital case letters as well * (example: "Di Cosmo"). * Exception: Special keywords such as "Jr." can be appended to the * name, not counted as part of the last name. */ partA.clear(); partB.clear(); partC.clear(); static const QSet<QString> capitalCaseLastNameFragments {QStringLiteral("Di")}; it = tokens.constEnd(); while (it != tokens.constBegin()) { --it; if (partB.isEmpty() && (it->toLower().startsWith(QStringLiteral("jr")) || it->toLower().startsWith(QStringLiteral("sr")) || it->toLower().startsWith(QStringLiteral("iii")))) /// handle name suffices like "Jr" or "III." partC.prepend(*it); else if (partB.isEmpty() || it->at(0).isLower() || capitalCaseLastNameFragments.contains(*it)) partB.prepend(*it); else partA.prepend(*it); } if (!partB.isEmpty()) { /// Name was actually like "Peter Ole van der Tuckwell", /// split into "Peter Ole" and "van der Tuckwell" return QSharedPointer<Person>(new Person(partA.join(QChar(' ')), partB.join(QChar(' ')), partC.isEmpty() ? QString() : partC.join(QChar(' ')))); } qCWarning(LOG_KBIBTEX_IO) << "Don't know how to handle name" << tokens.join(QLatin1Char(' ')) << "near line" << line_number; if (parent != nullptr) QMetaObject::invokeMethod(parent, "message", Qt::DirectConnection, QGenericReturnArgument(), Q_ARG(FileImporter::MessageSeverity, SeverityWarning), Q_ARG(QString, QString(QStringLiteral("Don't know how to handle name '%1' near line %2")).arg(tokens.join(QLatin1Char(' '))).arg(line_number))); return QSharedPointer<Person>(); } void FileImporterBibTeX::contextSensitiveSplit(const QString &text, QStringList &segments) { // TODO Merge with FileImporter::splitName and FileImporterBibTeX::personFromString int bracketCounter = 0; ///< keep track of opening and closing brackets: {...} QString buffer; int len = text.length(); segments.clear(); ///< empty list for results before proceeding for (int pos = 0; pos < len; ++pos) { if (text[pos] == '{') ++bracketCounter; else if (text[pos] == '}') --bracketCounter; if (text[pos].isSpace() && bracketCounter == 0) { if (!buffer.isEmpty()) { segments.append(buffer); buffer.clear(); } } else buffer.append(text[pos]); } if (!buffer.isEmpty()) segments.append(buffer); } QString FileImporterBibTeX::bibtexAwareSimplify(const QString &text) { QString result; int i = 0; /// Consume initial spaces ... while (i < text.length() && text[i].isSpace()) ++i; /// ... but if there have been spaces (i.e. i>0), then record a single space only if (i > 0) result.append(QStringLiteral(" ")); while (i < text.length()) { /// Consume non-spaces while (i < text.length() && !text[i].isSpace()) { result.append(text[i]); ++i; } /// String may end with a non-space if (i >= text.length()) break; /// Consume spaces, ... while (i < text.length() && text[i].isSpace()) ++i; /// ... but record only a single space result.append(QStringLiteral(" ")); } return result; } bool FileImporterBibTeX::evaluateParameterComments(QTextStream *textStream, const QString &line, File *file) { /// Assertion: variable "line" is all lower-case /** check if this file requests a special encoding */ if (line.startsWith(QStringLiteral("@comment{x-kbibtex-encoding=")) && line.endsWith(QLatin1Char('}'))) { const QString encoding = line.mid(28, line.length() - 29).toLower(); textStream->setCodec(encoding.toLower() == QStringLiteral("latex") ? "us-ascii" : encoding.toLatin1()); file->setProperty(File::Encoding, encoding.toLower() == QStringLiteral("latex") ? encoding : QString::fromLatin1(textStream->codec()->name())); return true; } else if (line.startsWith(QStringLiteral("@comment{x-kbibtex-personnameformatting=")) && line.endsWith(QLatin1Char('}'))) { // TODO usage of x-kbibtex-personnameformatting is deprecated, // as automatic detection is in place QString personNameFormatting = line.mid(40, line.length() - 41); file->setProperty(File::NameFormatting, personNameFormatting); return true; } else if (line.startsWith(QStringLiteral("% encoding:"))) { /// Interprete JabRef's encoding information QString encoding = line.mid(12); qCDebug(LOG_KBIBTEX_IO) << "Using JabRef's encoding:" << encoding; textStream->setCodec(encoding.toLatin1()); file->setProperty(File::Encoding, QString::fromLatin1(textStream->codec()->name())); return true; } return false; } QString FileImporterBibTeX::tokenidToString(Token token) { switch (token) { case tAt: return QString(QStringLiteral("At")); case tBracketClose: return QString(QStringLiteral("BracketClose")); case tBracketOpen: return QString(QStringLiteral("BracketOpen")); case tAlphaNumText: return QString(QStringLiteral("AlphaNumText")); case tAssign: return QString(QStringLiteral("Assign")); case tComma: return QString(QStringLiteral("Comma")); case tDoublecross: return QString(QStringLiteral("Doublecross")); case tEOF: return QString(QStringLiteral("EOF")); case tUnknown: return QString(QStringLiteral("Unknown")); default: return QString(QStringLiteral("<Unknown>")); } } void FileImporterBibTeX::setCommentHandling(CommentHandling commentHandling) { m_commentHandling = commentHandling; } diff --git a/src/io/fileinfo.cpp b/src/io/fileinfo.cpp index b3fbf9a4..1d3cda23 100644 --- a/src/io/fileinfo.cpp +++ b/src/io/fileinfo.cpp @@ -1,372 +1,372 @@ /*************************************************************************** * Copyright (C) 2004-2019 by Thomas Fischer <fischer@unix-ag.uni-kl.de> * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, see <https://www.gnu.org/licenses/>. * ***************************************************************************/ #include "fileinfo.h" #include <poppler-qt5.h> #include <QFileInfo> #include <QMimeDatabase> #include <QDir> #include <QTextStream> #include <QStandardPaths> #include <QRegularExpression> #include <QtConcurrentRun> #include <KBibTeX> #include <Entry> #include "logging_io.h" FileInfo::FileInfo() { /// nothing } const QString FileInfo::mimetypeOctetStream = QStringLiteral("application/octet-stream"); const QString FileInfo::mimetypeHTML = QStringLiteral("text/html"); const QString FileInfo::mimetypeBibTeX = QStringLiteral("text/x-bibtex"); const QString FileInfo::mimetypeRIS = QStringLiteral("application/x-research-info-systems"); const QString FileInfo::mimetypePDF = QStringLiteral("application/pdf"); QMimeType FileInfo::mimeTypeForUrl(const QUrl &url) { if (!url.isValid()) { qCWarning(LOG_KBIBTEX_IO) << "Cannot determine mime type for empty or invalid QUrl"; return QMimeType(); ///< invalid input gives invalid mime type } static const QMimeDatabase db; static const QMimeType mtHTML(db.mimeTypeForName(mimetypeHTML)); static const QMimeType mtOctetStream(db.mimeTypeForName(mimetypeOctetStream)); static const QMimeType mtBibTeX(db.mimeTypeForName(mimetypeBibTeX)); static const QMimeType mtPDF(db.mimeTypeForName(mimetypePDF)); static const QMimeType mtRIS(db.mimeTypeForName(mimetypeRIS)); /// Test if mime type for BibTeX is registered before determining file extension static const QString mimetypeBibTeXExt = mtBibTeX.preferredSuffix(); /// Test if mime type for RIS is registered before determining file extension static const QString mimetypeRISExt = mtRIS.preferredSuffix(); /// Test if mime type for PDF is registered before determining file extension static const QString mimetypePDFExt = mtPDF.preferredSuffix(); const QString extension = db.suffixForFileName(url.fileName()).toLower(); /// First, check preferred suffixes if (extension == mimetypeBibTeXExt) return mtBibTeX; else if (extension == mimetypeRISExt) return mtRIS; else if (extension == mimetypePDFExt) return mtPDF; /// Second, check any other suffixes else if (mtBibTeX.suffixes().contains(extension)) return mtBibTeX; else if (mtRIS.suffixes().contains(extension)) return mtRIS; else if (mtPDF.suffixes().contains(extension)) return mtPDF; /// Let the KDE subsystem guess the mime type QMimeType result = db.mimeTypeForUrl(url); /// Fall back to application/octet-stream if something goes wrong if (!result.isValid()) result = mtOctetStream; /// In case that KDE could not determine mime type, /// do some educated guesses on our own if (result.name() == mimetypeOctetStream) { if (url.scheme().startsWith(QStringLiteral("http"))) result = mtHTML; // TODO more tests? } return result; } void FileInfo::urlsInText(const QString &text, const TestExistence testExistence, const QString &baseDirectory, QSet<QUrl> &result) { if (text.isEmpty()) return; /// DOI identifiers have to extracted first as KBibTeX::fileListSeparatorRegExp /// contains characters that can be part of a DOI (e.g. ';') and thus could split /// a DOI in between. QString internalText = text; int pos = 0; QRegularExpressionMatch doiRegExpMatch; while ((doiRegExpMatch = KBibTeX::doiRegExp.match(internalText, pos)).hasMatch()) { pos = doiRegExpMatch.capturedStart(0); QString doiMatch = doiRegExpMatch.captured(0); const int semicolonHttpPos = doiMatch.indexOf(QStringLiteral(";http")); if (semicolonHttpPos > 0) doiMatch = doiMatch.left(semicolonHttpPos); const QUrl url(KBibTeX::doiUrlPrefix + QString(doiMatch).remove(QStringLiteral("\\"))); if (url.isValid() && !result.contains(url)) result << url; /// remove match from internal text to avoid duplicates /// Cut away any URL that may be right before found DOI number: /// For example, if DOI '10.1000/38-abc' was found in /// 'Lore ipsum http://doi.example.org/10.1000/38-abc Lore ipsum' /// also remove 'http://doi.example.org/' from the text, keeping only /// 'Lore ipsum Lore ipsum' static const QRegularExpression genericDoiUrlPrefix(QStringLiteral("http[s]?://[a-z0-9./-]+/$")); ///< looks like an URL const QRegularExpressionMatch genericDoiUrlPrefixMatch = genericDoiUrlPrefix.match(internalText.left(pos)); if (genericDoiUrlPrefixMatch.hasMatch()) /// genericDoiUrlPrefixMatch.captured(0) may contain (parts of) DOI internalText = internalText.left(genericDoiUrlPrefixMatch.capturedStart(0)) + internalText.mid(pos + doiMatch.length()); else internalText = internalText.left(pos) + internalText.mid(pos + doiMatch.length()); } const QStringList fileList = internalText.split(KBibTeX::fileListSeparatorRegExp, QString::SkipEmptyParts); for (const QString &text : fileList) { internalText = text; /// If testing for the actual existence of a filename found in the text ... if (testExistence == TestExistenceYes) { /// If a base directory (e.g. the location of the parent .bib file) is given /// and the potential filename fragment is NOT an absolute path, ... if (internalText.startsWith(QStringLiteral("~") + QDir::separator())) { const QString fullFilename = QDir::homePath() + internalText.mid(1); const QFileInfo fileInfo(fullFilename); const QUrl url = QUrl::fromLocalFile(fileInfo.canonicalFilePath()); if (fileInfo.exists() && fileInfo.isFile() && url.isValid() && !result.contains(url)) { result << url; /// Stop searching for URLs or filenames in current internal text continue; } } else if (!baseDirectory.isEmpty() && // TODO the following test assumes that absolute paths start // with a dir separator, which may only be true on Unix/Linux, // but not Windows. May be a test for 'first character is a letter, // second is ":", third is "\"' may be necessary. !internalText.startsWith(QDir::separator())) { /// To get the absolute path, prepend filename fragment with base directory const QString fullFilename = baseDirectory + QDir::separator() + internalText; const QFileInfo fileInfo(fullFilename); const QUrl url = QUrl::fromLocalFile(fileInfo.canonicalFilePath()); if (fileInfo.exists() && fileInfo.isFile() && url.isValid() && !result.contains(url)) { result << url; /// Stop searching for URLs or filenames in current internal text continue; } } else { /// Either the filename fragment is an absolute path OR no base directory /// was given (current working directory is assumed), ... const QFileInfo fileInfo(internalText); const QUrl url = QUrl::fromLocalFile(fileInfo.canonicalFilePath()); if (fileInfo.exists() && fileInfo.isFile() && url.isValid() && !result.contains(url)) { result << url; /// stop searching for URLs or filenames in current internal text continue; } } } /// extract URL from current field pos = 0; QRegularExpressionMatch urlRegExpMatch; while ((urlRegExpMatch = KBibTeX::urlRegExp.match(internalText, pos)).hasMatch()) { pos = urlRegExpMatch.capturedStart(0); const QString match = urlRegExpMatch.captured(0); QUrl url(match); if (url.isValid() && (testExistence == TestExistenceNo || !url.isLocalFile() || QFileInfo::exists(url.toLocalFile())) && !result.contains(url)) result << url; /// remove match from internal text to avoid duplicates internalText = internalText.left(pos) + internalText.mid(pos + match.length()); } /// explicitly check URL entry, may be an URL even if http:// or alike is missing pos = 0; QRegularExpressionMatch domainNameRegExpMatch; while ((domainNameRegExpMatch = KBibTeX::domainNameRegExp.match(internalText, pos)).hasMatch()) { pos = domainNameRegExpMatch.capturedStart(0); int pos2 = internalText.indexOf(QStringLiteral(" "), pos + 1); if (pos2 < 0) pos2 = internalText.length(); QString match = internalText.mid(pos, pos2 - pos); const QUrl url(QStringLiteral("http://") + match); // FIXME what about HTTPS? if (url.isValid() && !result.contains(url)) result << url; /// remove match from internal text to avoid duplicates internalText = internalText.left(pos) + internalText.mid(pos + match.length()); } /// extract general file-like patterns pos = 0; QRegularExpressionMatch fileRegExpMatch; while ((fileRegExpMatch = KBibTeX::fileRegExp.match(internalText, pos)).hasMatch()) { pos = fileRegExpMatch.capturedStart(0); const QString match = fileRegExpMatch.captured(0); const QFileInfo fi(match); const QUrl url = QUrl::fromLocalFile(!match.startsWith(QStringLiteral("/")) && !match.startsWith(QStringLiteral("http")) && fi.isRelative() && !baseDirectory.isEmpty() ? baseDirectory + QStringLiteral("/") + match : match); if (url.isValid() && (testExistence == TestExistenceNo || QFileInfo::exists(url.toLocalFile())) && !result.contains(url)) result << url; /// remove match from internal text to avoid duplicates internalText = internalText.left(pos) + internalText.mid(pos + match.length()); } } } QSet<QUrl> FileInfo::entryUrls(const QSharedPointer<const Entry> &entry, const QUrl &bibTeXUrl, TestExistence testExistence) { QSet<QUrl> result; if (entry.isNull() || entry->isEmpty()) return result; if (entry->contains(Entry::ftDOI)) { const QString doi = PlainTextValue::text(entry->value(Entry::ftDOI)); QRegularExpressionMatch doiRegExpMatch; if (!doi.isEmpty() && (doiRegExpMatch = KBibTeX::doiRegExp.match(doi)).hasMatch()) { QString match = doiRegExpMatch.captured(0); QUrl url(KBibTeX::doiUrlPrefix + match.remove(QStringLiteral("\\"))); result.insert(url); } } static const QString etPMID = QStringLiteral("pmid"); if (entry->contains(etPMID)) { const QString pmid = PlainTextValue::text(entry->value(etPMID)); bool ok = false; ok &= pmid.toInt(&ok) > 0; if (ok) { QUrl url(QStringLiteral("https://www.ncbi.nlm.nih.gov/pubmed/") + pmid); result.insert(url); } } static const QString etEPrint = QStringLiteral("eprint"); if (entry->contains(etEPrint)) { const QString eprint = PlainTextValue::text(entry->value(etEPrint)); if (!eprint.isEmpty()) { - QUrl url(QStringLiteral("http://arxiv.org/search?query=") + eprint); + QUrl url(QStringLiteral("https://arxiv.org/search?query=") + eprint); result.insert(url); } } const QString baseDirectory = bibTeXUrl.isValid() ? bibTeXUrl.adjusted(QUrl::RemoveFilename | QUrl::StripTrailingSlash).path() : QString(); for (Entry::ConstIterator it = entry->constBegin(); it != entry->constEnd(); ++it) { /// skip abstracts, they contain sometimes strange text fragments /// that are mistaken for URLs if (it.key().toLower() == Entry::ftAbstract) continue; const Value v = it.value(); for (const auto &valueItem : v) { QString plainText = PlainTextValue::text(*valueItem); static const QRegularExpression regExpEscapedChars = QRegularExpression(QStringLiteral("\\\\+([&_~])")); plainText.replace(regExpEscapedChars, QStringLiteral("\\1")); urlsInText(plainText, testExistence, baseDirectory, result); } } if (!baseDirectory.isEmpty()) { /// File types supported by "document preview" static const QStringList documentFileExtensions {QStringLiteral(".pdf"), QStringLiteral(".pdf.gz"), QStringLiteral(".pdf.bz2"), QStringLiteral(".ps"), QStringLiteral(".ps.gz"), QStringLiteral(".ps.bz2"), QStringLiteral(".eps"), QStringLiteral(".eps.gz"), QStringLiteral(".eps.bz2"), QStringLiteral(".html"), QStringLiteral(".xhtml"), QStringLiteral(".htm"), QStringLiteral(".dvi"), QStringLiteral(".djvu"), QStringLiteral(".wwf"), QStringLiteral(".jpeg"), QStringLiteral(".jpg"), QStringLiteral(".png"), QStringLiteral(".gif"), QStringLiteral(".tif"), QStringLiteral(".tiff")}; result.reserve(result.size() + documentFileExtensions.size() * 2); /// check if in the same directory as the BibTeX file /// a PDF file exists which filename is based on the entry's id for (const QString &extension : documentFileExtensions) { const QFileInfo fi(baseDirectory + QDir::separator() + entry->id() + extension); if (fi.exists()) { const QUrl url = QUrl::fromLocalFile(fi.canonicalFilePath()); if (!result.contains(url)) result << url; } } /// check if in the same directory as the BibTeX file there is a subdirectory /// similar to the BibTeX file's name and which contains a PDF file exists /// which filename is based on the entry's id static const QRegularExpression filenameExtension(QStringLiteral("\\.[^.]{2,5}$")); const QString basename = bibTeXUrl.fileName().remove(filenameExtension); QString directory = baseDirectory + QDir::separator() + basename; for (const QString &extension : documentFileExtensions) { const QFileInfo fi(directory + QDir::separator() + entry->id() + extension); if (fi.exists()) { const QUrl url = QUrl::fromLocalFile(fi.canonicalFilePath()); if (!result.contains(url)) result << url; } } } return result; } QString FileInfo::pdfToText(const QString &pdfFilename) { /// Build filename for text file where PDF file's plain text is cached const QString cacheDirectory = QStandardPaths::writableLocation(QStandardPaths::CacheLocation) + QStringLiteral("/pdftotext"); if (!QDir(cacheDirectory).exists() && !QDir::home().mkdir(cacheDirectory)) /// Could not create cache directory return QString(); static const QRegularExpression invalidChars(QStringLiteral("[^-a-z0-9_]"), QRegularExpression::CaseInsensitiveOption); const QString textFilename = QString(pdfFilename).remove(invalidChars).append(QStringLiteral(".txt")).prepend(QStringLiteral("/")).prepend(cacheDirectory); /// First, check if there is a cache text file if (QFileInfo::exists(textFilename)) { /// Load text from cache file QFile f(textFilename); if (f.open(QFile::ReadOnly)) { const QString text = QString::fromUtf8(f.readAll()); f.close(); return text; } } else /// No cache file exists, so run text extraction in another thread QtConcurrent::run(extractPDFTextToCache, pdfFilename, textFilename); return QString(); } void FileInfo::extractPDFTextToCache(const QString &pdfFilename, const QString &cacheFilename) { /// In case of multiple calls, skip text extraction if cache file already exists if (QFile(cacheFilename).exists()) return; QString text; QStringList msgList; /// Load PDF file through Poppler Poppler::Document *doc = Poppler::Document::load(pdfFilename); if (doc != nullptr) { static const int maxPages = 64; /// Build text by appending each page's text for (int i = 0; i < qMin(maxPages, doc->numPages()); ++i) text.append(doc->page(i)->text(QRect())).append(QStringLiteral("\n\n")); if (doc->numPages() > maxPages) msgList << QString(QStringLiteral("### Skipped %1 pages as PDF file contained too many pages (limit is %2 pages) ###")).arg(doc->numPages() - maxPages).arg(maxPages); delete doc; } else msgList << QStringLiteral("### Skipped as file could not be opened as PDF file ###"); /// Save text in cache file QFile f(cacheFilename); if (f.open(QFile::WriteOnly)) { static const int maxCharacters = 1 << 18; f.write(text.left(maxCharacters).toUtf8()); ///< keep only the first 2^18 many characters if (text.length() > maxCharacters) msgList << QString(QStringLiteral("### Text too long, skipping %1 characters ###")).arg(text.length() - maxCharacters); /// Write all messages (warnings) to end of text file for (const QString &msg : const_cast<const QStringList &>(msgList)) { static const char linebreak = '\n'; f.write(&linebreak, 1); f.write(msg.toUtf8()); } f.close(); } } diff --git a/src/networking/associatedfiles.h b/src/networking/associatedfiles.h index a6f89ccd..8c1e5e1b 100644 --- a/src/networking/associatedfiles.h +++ b/src/networking/associatedfiles.h @@ -1,130 +1,130 @@ /*************************************************************************** * Copyright (C) 2004-2019 by Thomas Fischer <fischer@unix-ag.uni-kl.de> * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, see <https://www.gnu.org/licenses/>. * ***************************************************************************/ #ifndef KBIBTEX_NETWORKING_ASSOCIATEDFILES_H #define KBIBTEX_NETWORKING_ASSOCIATEDFILES_H #include <QUrl> #include <Entry> #ifdef HAVE_KF5 #include "kbibtexnetworking_export.h" #endif // HAVE_KF5 class QWidget; class File; /** * Given a remote or local filename/URL, this class will, (1) at the user's * discretion, move or copy this file next to the bibliography's file into * the same directory, (2) rename the copied file (again, at the user's * discretion) to either match the corresponding entry's id or follow a name * provided by the user and (3) modify the entry to include a reference * (either relative or absolute path) to the newly moved/copied file or its * original filename/URL. * * @author Thomas Fischer <fischer@unix-ag.uni-kl.de> */ class KBIBTEXNETWORKING_EXPORT AssociatedFiles { public: enum PathType { ptAbsolute = 0, ///< Use absolute filenames/paths if possible ptRelative = 1 ///< Use relative filenames/paths if possible }; enum RenameOperation { roKeepName = 0, ///< Do not rename a file roEntryId = 1, ///< Rename the file following the entry's id roUserDefined = 2 ///< Rename after a string provided by the user }; enum MoveCopyOperation { mcoNoCopyMove = 0, ///< Do not move or copy a file, use a reference only mcoCopy = 1, ///< Copy the file next to the bibiliograpy file mcoMove = 2 /// Same as copy, but delete original }; /** * Based on a given URL to an external document, compute an URL used for association * and insert it into the given entry, either as local file or as URL. * * @param documentUrl URL to a document like 'http://www.example.com/publication.pdf' * @param entry bibliography entry where the URL is to be associated with - * @param bibTeXFile valid bibliography, preferrably with property 'File::Url' set + * @param bibTeXFile valid bibliography, preferably with property 'File::Url' set * @param pathType request either a relative or an absolute path * @return the computed URL string */ static QString insertUrl(const QUrl &documentUrl, QSharedPointer<Entry> &entry, const File *bibTeXFile, PathType pathType); /** * Compute how the URL string to be associated to a bibliographic entry may look * like for a given document URL, a given bibliography, and whether the URL string - * should be preferrably relative or absolute. + * should be preferably relative or absolute. * @param documentUrl URL to a document like 'http://www.example.com/publication.pdf' - * @param bibTeXFile valid bibliography, preferrably with property 'File::Url' set + * @param bibTeXFile valid bibliography, preferably with property 'File::Url' set * @param pathType request either a relative or an absolute path * @return the computed URL string */ static QString computeAssociateUrl(const QUrl &documentUrl, const File *bibTeXFile, PathType pathType); /** * For a given (remote) source URL and given various information such as which * bibliographic entry and file the local copy will be associated with, determine * a destination URL where the source document may be copied to. * This function will neither modify the bibliographic entry or file, nor do the * actual copying. * * @param sourceUrl The remote location of the document * @param entryId the identifier of the bibliography entry * @param bibTeXFile the bibliographic file * @param renameOperation what type of renaming is requested * @param userDefinedFilename an optional custom basename * @return A pair of URLs: refined source URL and computed destination URL */ static QPair<QUrl, QUrl> computeSourceDestinationUrls(const QUrl &sourceUrl, const QString &entryId, const File *bibTeXFile, RenameOperation renameOperation, const QString &userDefinedFilename); static QUrl copyDocument(const QUrl &document, const QString &entryId, const File *bibTeXFile, RenameOperation renameOperation, MoveCopyOperation moveCopyOperation, QWidget *widget, const QString &userDefinedFilename = QString()); private: /** * Translate a given URL of a document (e.g. a PDF file) to a string * representation pointing to the relative location of this document. * A "base URL", i.e. the bibliography's file location has to be provided * in order to calculate the relative location of the document. * "Upwards relativity" (i.e. paths containing "..") is not supported for this * functions output; in this case, an absolute path will be generated as fallback. * * @param document The document's URL * @param baseUrl The base URL * @return The document URL's string representation relative to the base URL */ static QString relativeFilename(const QUrl &document, const QUrl &baseUrl); /** * Translate a given URL of a document (e.g. a PDF file) to a string * representation pointing to the absolute location of this document. * A "base URL", i.e. the bibliography's file location may be provided to * resolve relative document URLs. * * @param document The document's URL * @param baseUrl The base URL * @return The document URL's string representation in absolute form */ static QString absoluteFilename(const QUrl &document, const QUrl &baseUrl); }; #endif // KBIBTEX_NETWORKING_ASSOCIATEDFILES_H diff --git a/src/networking/findpdf.h b/src/networking/findpdf.h index 815b7f9f..588bedb6 100644 --- a/src/networking/findpdf.h +++ b/src/networking/findpdf.h @@ -1,114 +1,110 @@ /*************************************************************************** * Copyright (C) 2004-2017 by Thomas Fischer <fischer@unix-ag.uni-kl.de> * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, see <https://www.gnu.org/licenses/>. * ***************************************************************************/ #ifndef KBIBTEX_NETWORKING_FINDPDF_H #define KBIBTEX_NETWORKING_FINDPDF_H #include "kbibtexnetworking_export.h" #include <QObject> #include <QList> #include <QSet> #include <QUrl> #include <Entry> -#ifdef HAVE_KF5 -#include "kbibtexnetworking_export.h" -#endif // HAVE_KF5 - class QNetworkAccessManager; class QNetworkReply; class QTemporaryFile; /** * Search known Internet resources (search engines) for PDF files * matching a given bibliography entry. * * @author Thomas Fischer <fischer@unix-ag.uni-kl.de> */ class KBIBTEXNETWORKING_EXPORT FindPDF : public QObject { Q_OBJECT public: /// Used in a later stage (user interface, @see FindPDFUI); /// tells the system if ... enum DownloadMode { NoDownload = 0, ///< Ignore this result item (no PDF file downloading) Download, ///< Download and store this PDF file in a user-specified location URLonly ///< Keep only the URL of the PDF; this URL will be inserted in the bib entry }; /// Structure to store data about every found PDF (potential search hit) typedef struct { QUrl url; ///< Where has this PDF been found? QString textPreview; ///< Text extracted from the PDF file QTemporaryFile *tempFilename; ///< Local temporary copy float relevance; /// Assessment of relevance (useful for sorting results) DownloadMode downloadMode; /// User's preference what to do with this hit (default is NoDownload) } ResultItem; explicit FindPDF(QObject *parent = nullptr); ~FindPDF() override; /** * Initiate a search for PDF files matching a given entry. * * @param entry entry to search PDF files for * @return @c true if the search could be started @c false if another search is still running */ bool search(const Entry &entry); /** * Once a search has been complete (signal @see finished), * this function allows to retrieve the collected results * @return @c After a search, list of results, @c before or during a search, an empty list */ QList<ResultItem> results(); signals: /** * A search initiated by @see search has been finished. */ void finished(); /** * Some update on the ongoing search. * Just of eye candy, can be safely ignored if no visualization of progress is possible. * * @param visitedPages how many web pages have been visited * @param runningJobs how many download/search operations are running in parallel * @param foundDocuments how many PDF files have been found */ void progress(int visitedPages, int runningJobs, int foundDocuments); public slots: /** * Abort any running downloads. */ void abort(); private slots: void downloadFinished(); private: class Private; Private *const d; }; #endif // KBIBTEX_NETWORKING_FINDPDF_H diff --git a/src/networking/onlinesearch/onlinesearchcernds.cpp b/src/networking/onlinesearch/onlinesearchcernds.cpp index 615276d4..3aa48c61 100644 --- a/src/networking/onlinesearch/onlinesearchcernds.cpp +++ b/src/networking/onlinesearch/onlinesearchcernds.cpp @@ -1,113 +1,113 @@ /**************************************************************************** * Copyright (C) 2004-2018 by Thomas Fischer <fischer@unix-ag.uni-kl.de> * * Copyright (C) 2013 Yngve I. Levinsen <yngve.inntjore.levinsen@cern.ch> * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, see <https://www.gnu.org/licenses/>. * ****************************************************************************/ #include "onlinesearchcernds.h" #include <QUrlQuery> #include <KLocalizedString> #include "logging_networking.h" OnlineSearchCERNDS::OnlineSearchCERNDS(QObject *parent) : OnlineSearchSimpleBibTeXDownload(parent) { /// nothing } QString OnlineSearchCERNDS::label() const { return i18n("CERN Document Server"); } QUrl OnlineSearchCERNDS::homepage() const { - return QUrl(QStringLiteral("http://cds.cern.ch/")); + return QUrl(QStringLiteral("https://cds.cern.ch/")); } QString OnlineSearchCERNDS::favIconUrl() const { - return QStringLiteral("http://cds.cern.ch/favicon.ico"); + return QStringLiteral("https://cds.cern.ch/favicon.ico"); } QUrl OnlineSearchCERNDS::buildQueryUrl(const QMap<QString, QString> &query, int numResults) { /// Example for a search URL: - /// http://cds.cern.ch/search?action_search=Search&sf=&so=d&rm=&sc=0&of=hx&f=&rg=10&ln=en&as=1&m1=a&p1=stone&f1=title&op1=a&m2=a&p2=smith&f2=author&op2=a&m3=a&p3=&f3= + /// https://cds.cern.ch/search?action_search=Search&sf=&so=d&rm=&sc=0&of=hx&f=&rg=10&ln=en&as=1&m1=a&p1=stone&f1=title&op1=a&m2=a&p2=smith&f2=author&op2=a&m3=a&p3=&f3= /// of=hx asks for BibTeX results /// rg=10 asks for 10 results /// c=CERN+Document+Server or c=Articles+%26+Preprints to limit scope /// Search search argument (X={1,2,3,...}): /// pX search text /// mX a=all words; o=any; e=exact phrase; p=partial phrase; r=regular expression /// opX a=AND; o=OR; n=AND NOT /// fX ""=any field; title; author; reportnumber; year; fulltext /// Build URL - QUrl url = QUrl(QStringLiteral("http://cds.cern.ch/search?ln=en&action_search=Search&c=Articles+%26+Preprints&as=1&sf=&so=d&rm=&sc=0&of=hx&f=")); + QUrl url = QUrl(QStringLiteral("https://cds.cern.ch/search?ln=en&action_search=Search&c=Articles+%26+Preprints&as=1&sf=&so=d&rm=&sc=0&of=hx&f=")); QUrlQuery q(url); /// Set number of expected results q.addQueryItem(QStringLiteral("rg"), QString::number(numResults)); /// Number search arguments int argumentCount = 0; /// add words from "free text" field const QStringList freeTextWords = splitRespectingQuotationMarks(query[queryKeyFreeText]); for (const QString &word : freeTextWords) { ++argumentCount; q.addQueryItem(QString(QStringLiteral("p%1")).arg(argumentCount), word); q.addQueryItem(QString(QStringLiteral("m%1")).arg(argumentCount), QStringLiteral("a")); q.addQueryItem(QString(QStringLiteral("op%1")).arg(argumentCount), QStringLiteral("a")); q.addQueryItem(QString(QStringLiteral("f%1")).arg(argumentCount), QString()); } /// add words from "author" field const QStringList authorWords = splitRespectingQuotationMarks(query[queryKeyAuthor]); for (const QString &word : authorWords) { ++argumentCount; q.addQueryItem(QString(QStringLiteral("p%1")).arg(argumentCount), word); q.addQueryItem(QString(QStringLiteral("m%1")).arg(argumentCount), QStringLiteral("a")); q.addQueryItem(QString(QStringLiteral("op%1")).arg(argumentCount), QStringLiteral("a")); q.addQueryItem(QString(QStringLiteral("f%1")).arg(argumentCount), QStringLiteral("author")); } /// add words from "title" field const QStringList titleWords = splitRespectingQuotationMarks(query[queryKeyTitle]); for (const QString &word : titleWords) { ++argumentCount; q.addQueryItem(QString(QStringLiteral("p%1")).arg(argumentCount), word); q.addQueryItem(QString(QStringLiteral("m%1")).arg(argumentCount), QStringLiteral("a")); q.addQueryItem(QString(QStringLiteral("op%1")).arg(argumentCount), QStringLiteral("a")); q.addQueryItem(QString(QStringLiteral("f%1")).arg(argumentCount), QStringLiteral("title")); } /// add words from "title" field const QString year = query[queryKeyYear]; if (!year.isEmpty()) { ++argumentCount; q.addQueryItem(QString(QStringLiteral("p%1")).arg(argumentCount), year); q.addQueryItem(QString(QStringLiteral("m%1")).arg(argumentCount), QStringLiteral("a")); q.addQueryItem(QString(QStringLiteral("op%1")).arg(argumentCount), QStringLiteral("a")); q.addQueryItem(QString(QStringLiteral("f%1")).arg(argumentCount), QStringLiteral("year")); } url.setQuery(q); return url; } diff --git a/src/networking/onlinesearch/onlinesearchgooglescholar.cpp b/src/networking/onlinesearch/onlinesearchgooglescholar.cpp index 1615c3e5..51d76607 100644 --- a/src/networking/onlinesearch/onlinesearchgooglescholar.cpp +++ b/src/networking/onlinesearch/onlinesearchgooglescholar.cpp @@ -1,474 +1,474 @@ /*************************************************************************** * Copyright (C) 2004-2019 by Thomas Fischer <fischer@unix-ag.uni-kl.de> * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, see <https://www.gnu.org/licenses/>. * ***************************************************************************/ #include "onlinesearchgooglescholar.h" #include <QNetworkReply> #include <QIcon> #include <QUrlQuery> #include <QRegularExpression> #include <QTimer> #ifdef HAVE_KF5 #include <KLocalizedString> #else // HAVE_KF5 #define i18n(text) QObject::tr(text) #endif // HAVE_KF5 #include <FileImporterBibTeX> #include "internalnetworkaccessmanager.h" #include "logging_networking.h" class OnlineSearchGoogleScholar::OnlineSearchGoogleScholarPrivate { public: int numResults; QMap<QString, QPair<QString, QString>> listBibTeXurls; QString queryFreetext, queryAuthor, queryYear; QString startPageUrl; QString advancedSearchPageUrl; QString queryPageUrl; FileImporterBibTeX *importer; OnlineSearchGoogleScholarPrivate(OnlineSearchGoogleScholar *parent) : numResults(0) { importer = new FileImporterBibTeX(parent); - startPageUrl = QStringLiteral("http://scholar.google.com/"); - queryPageUrl = QStringLiteral("http://%1/scholar"); + startPageUrl = QStringLiteral("https://scholar.google.com/"); + queryPageUrl = QStringLiteral("https://%1/scholar"); } ~OnlineSearchGoogleScholarPrivate() { delete importer; } QString documentUrlForBibTeXEntry(const QString &htmlText, int bibLinkPos) { /// Regular expression to detect text of a link to a document static const QRegularExpression documentLinkIndicator(QStringLiteral("\\[(PDF|HTML)\\]"), QRegularExpression::CaseInsensitiveOption); /// Text for link is *before* the BibTeX link in Google's HTML code int posDocumentLinkText = htmlText.lastIndexOf(documentLinkIndicator, bibLinkPos); /// Check position of previous BibTeX link to not extract the wrong document link int posPreviousBib = htmlText.lastIndexOf(QStringLiteral("/scholar.bib"), bibLinkPos - 3); if (posPreviousBib < 0) posPreviousBib = 0; /// no previous BibTeX entry? /// If all found position values look reasonable ... if (posDocumentLinkText > posPreviousBib) { /// There is a [PDF] or [HTML] link for this BibTeX entry, so find URL /// Variables p1 and p2 are used to close in to the document's URL int p1 = htmlText.lastIndexOf(QStringLiteral("<a "), posDocumentLinkText); if (p1 > 0) { p1 = htmlText.indexOf(QStringLiteral("href=\""), p1); if (p1 > 0) { int p2 = htmlText.indexOf(QLatin1Char('"'), p1 + 7); if (p2 > 0) return htmlText.mid(p1 + 6, p2 - p1 - 6).replace(QStringLiteral("&amp;"), QStringLiteral("&")); } } } return QString(); } QString mainUrlForBibTeXEntry(const QString &htmlText, int bibLinkPos) { /// Text for link is *before* the BibTeX link in Google's HTML code int posH3 = htmlText.lastIndexOf(QStringLiteral("<h3 "), bibLinkPos); /// Check position of previous BibTeX link to not extract the wrong document link int posPreviousBib = htmlText.lastIndexOf(QStringLiteral("/scholar.bib"), bibLinkPos - 3); if (posPreviousBib < 0) posPreviousBib = 0; /// no previous BibTeX entry? /// If all found position values look reasonable ... if (posH3 > posPreviousBib) { /// There is a h3 tag for this BibTeX entry, so find URL /// Variables p1 and p2 are used to close in to the document's URL int p1 = htmlText.indexOf(QStringLiteral("href=\""), posH3); if (p1 > 0) { int p2 = htmlText.indexOf(QLatin1Char('"'), p1 + 7); if (p2 > 0) return htmlText.mid(p1 + 6, p2 - p1 - 6).replace(QStringLiteral("&amp;"), QStringLiteral("&")); } } return QString(); } }; OnlineSearchGoogleScholar::OnlineSearchGoogleScholar(QObject *parent) : OnlineSearchAbstract(parent), d(new OnlineSearchGoogleScholar::OnlineSearchGoogleScholarPrivate(this)) { /// nothing } OnlineSearchGoogleScholar::~OnlineSearchGoogleScholar() { delete d; } void OnlineSearchGoogleScholar::startSearch(const QMap<QString, QString> &query, int numResults) { d->numResults = numResults; m_hasBeenCanceled = false; emit progress(curStep = 0, numSteps = numResults + 4); const auto respectingQuotationMarksFreeText = splitRespectingQuotationMarks(query[queryKeyFreeText]); const auto respectingQuotationMarksTitle = splitRespectingQuotationMarks(query[queryKeyTitle]); QStringList queryFragments; queryFragments.reserve(respectingQuotationMarksFreeText.size() + respectingQuotationMarksTitle.size()); for (const QString &queryFragment : respectingQuotationMarksFreeText) { queryFragments.append(encodeURL(queryFragment)); } for (const QString &queryFragment : respectingQuotationMarksTitle) { queryFragments.append(encodeURL(queryFragment)); } d->queryFreetext = queryFragments.join(QStringLiteral("+")); const auto respectingQuotationMarksAuthor = splitRespectingQuotationMarks(query[queryKeyAuthor]); queryFragments.clear(); queryFragments.reserve(respectingQuotationMarksAuthor.size()); for (const QString &queryFragment : respectingQuotationMarksAuthor) { queryFragments.append(encodeURL(queryFragment)); } d->queryAuthor = queryFragments.join(QStringLiteral("+")); d->queryYear = encodeURL(query[queryKeyYear]); QUrl url(d->startPageUrl); QNetworkRequest request(url); QNetworkReply *reply = InternalNetworkAccessManager::instance().get(request); InternalNetworkAccessManager::instance().setNetworkReplyTimeout(reply); connect(reply, &QNetworkReply::finished, this, &OnlineSearchGoogleScholar::doneFetchingStartPage); refreshBusyProperty(); } void OnlineSearchGoogleScholar::doneFetchingStartPage() { emit progress(++curStep, numSteps); QNetworkReply *reply = static_cast<QNetworkReply *>(sender()); QUrl newDomainUrl; if (handleErrors(reply, newDomainUrl)) { if (newDomainUrl.isValid() && newDomainUrl != reply->url()) { /// following redirection to country-specific domain ++numSteps; QNetworkRequest request(newDomainUrl); QNetworkReply *reply = InternalNetworkAccessManager::instance().get(request); InternalNetworkAccessManager::instance().setNetworkReplyTimeout(reply); connect(reply, &QNetworkReply::finished, this, &OnlineSearchGoogleScholar::doneFetchingStartPage); } else { /// landed on country-specific domain static const QRegularExpression pathToSettingsPage(QStringLiteral(" href=\"(/scholar_settings[^ \"]*)")); const QString htmlCode = QString::fromUtf8(reply->readAll()); // dumpToFile(QStringLiteral("01-doneFetchingStartPage.html"),htmlCode); const QRegularExpressionMatch pathToSettingsPageMatch = pathToSettingsPage.match(htmlCode); if (!pathToSettingsPageMatch.hasMatch() || pathToSettingsPageMatch.captured(1).isEmpty()) { qCWarning(LOG_KBIBTEX_NETWORKING) << "No link to Google Scholar settings found"; stopSearch(resultNoError); return; } QUrl url = reply->url().resolved(QUrl(decodeURL(pathToSettingsPageMatch.captured(1)))); QUrlQuery query(url); query.removeQueryItem(QStringLiteral("hl")); query.addQueryItem(QStringLiteral("hl"), QStringLiteral("en")); query.removeQueryItem(QStringLiteral("as_sdt")); query.addQueryItem(QStringLiteral("as_sdt"), QStringLiteral("0,5")); url.setQuery(query); const QUrl replyUrl = reply->url(); QTimer::singleShot(250, this, [this, url, replyUrl]() { QNetworkRequest request(url); QNetworkReply *newReply = InternalNetworkAccessManager::instance().get(request, replyUrl); InternalNetworkAccessManager::instance().setNetworkReplyTimeout(newReply); connect(newReply, &QNetworkReply::finished, this, &OnlineSearchGoogleScholar::doneFetchingConfigPage); }); } } refreshBusyProperty(); } void OnlineSearchGoogleScholar::doneFetchingConfigPage() { emit progress(++curStep, numSteps); QNetworkReply *reply = static_cast<QNetworkReply *>(sender()); QUrl redirUrl; if (handleErrors(reply, redirUrl)) { if (redirUrl.isValid()) { /// Redirection to another url ++numSteps; QNetworkRequest request(redirUrl); QNetworkReply *newReply = InternalNetworkAccessManager::instance().get(request, reply); InternalNetworkAccessManager::instance().setNetworkReplyTimeout(newReply); connect(newReply, &QNetworkReply::finished, this, &OnlineSearchGoogleScholar::doneFetchingConfigPage); } else { const QString htmlText = QString::fromUtf8(reply->readAll().constData()); // dumpToFile(QStringLiteral("02-doneFetchingConfigPage.html"), htmlText); static const QRegularExpression formOpeningTag(QStringLiteral("<form [^>]+action=\"([^\"]*scholar_setprefs[^\"]*)")); const QRegularExpressionMatch formOpeningTagMatch = formOpeningTag.match(htmlText); const int formOpeningTagPos = formOpeningTagMatch.capturedStart(0); if (formOpeningTagPos < 0) { qCWarning(LOG_KBIBTEX_NETWORKING) << "Could not find opening tag for form:" << formOpeningTag.pattern(); stopSearch(resultNoError); return; } QMap<QString, QString> inputMap = formParameters(htmlText, formOpeningTagPos); inputMap[QStringLiteral("hl")] = QStringLiteral("en"); inputMap[QStringLiteral("scis")] = QStringLiteral("yes"); inputMap[QStringLiteral("scisf")] = QStringLiteral("4"); inputMap[QStringLiteral("num")] = QString::number(d->numResults); inputMap[QStringLiteral("submit")] = QString(); QUrl url = reply->url().resolved(QUrl(decodeURL(formOpeningTagMatch.captured(1)))); QUrlQuery query(url); for (QMap<QString, QString>::ConstIterator it = inputMap.constBegin(); it != inputMap.constEnd(); ++it) { query.removeQueryItem(it.key()); query.addQueryItem(it.key(), it.value()); } url.setQuery(query); QTimer::singleShot(250, this, [this, url, reply]() { QNetworkRequest request(url); QNetworkReply *newReply = InternalNetworkAccessManager::instance().get(request, reply); InternalNetworkAccessManager::instance().setNetworkReplyTimeout(newReply); connect(newReply, &QNetworkReply::finished, this, &OnlineSearchGoogleScholar::doneFetchingSetConfigPage); }); } } refreshBusyProperty(); } void OnlineSearchGoogleScholar::doneFetchingSetConfigPage() { emit progress(++curStep, numSteps); QNetworkReply *reply = static_cast<QNetworkReply *>(sender()); QUrl redirUrl; if (handleErrors(reply, redirUrl)) { if (redirUrl.isValid()) { /// Redirection to another url ++numSteps; QNetworkRequest request(redirUrl); QNetworkReply *newReply = InternalNetworkAccessManager::instance().get(request, reply); InternalNetworkAccessManager::instance().setNetworkReplyTimeout(newReply); connect(newReply, &QNetworkReply::finished, this, &OnlineSearchGoogleScholar::doneFetchingSetConfigPage); } else { // const QString htmlText = QString::fromUtf8(reply->readAll().constData()); // dumpToFile(QStringLiteral("03-doneFetchingSetConfigPage.html"), htmlText); QUrl url(QString(d->queryPageUrl).arg(reply->url().host())); QUrlQuery query(url); query.addQueryItem(QStringLiteral("as_q"), d->queryFreetext); query.addQueryItem(QStringLiteral("as_sauthors"), d->queryAuthor); query.addQueryItem(QStringLiteral("as_ylo"), d->queryYear); query.addQueryItem(QStringLiteral("as_yhi"), d->queryYear); query.addQueryItem(QStringLiteral("as_vis"), QStringLiteral("1")); ///< include citations query.addQueryItem(QStringLiteral("num"), QString::number(d->numResults)); query.addQueryItem(QStringLiteral("btnG"), QStringLiteral("Search Scholar")); url.setQuery(query); QTimer::singleShot(250, this, [this, url, reply]() { QNetworkRequest request(url); QNetworkReply *newReply = InternalNetworkAccessManager::instance().get(request, reply); InternalNetworkAccessManager::instance().setNetworkReplyTimeout(newReply); connect(newReply, &QNetworkReply::finished, this, &OnlineSearchGoogleScholar::doneFetchingQueryPage); }); } } refreshBusyProperty(); } void OnlineSearchGoogleScholar::doneFetchingQueryPage() { emit progress(++curStep, numSteps); QNetworkReply *reply = static_cast<QNetworkReply *>(sender()); QUrl redirUrl; if (handleErrors(reply, redirUrl)) { if (redirUrl.isValid()) { /// Redirection to another url ++numSteps; QNetworkRequest request(redirUrl); QNetworkReply *newReply = InternalNetworkAccessManager::instance().get(request, reply); InternalNetworkAccessManager::instance().setNetworkReplyTimeout(newReply); connect(newReply, &QNetworkReply::finished, this, &OnlineSearchGoogleScholar::doneFetchingQueryPage); } else { const QString htmlText = QString::fromUtf8(reply->readAll().constData()); // dumpToFile(QStringLiteral("04-doneFetchingQueryPage.html"), htmlText); d->listBibTeXurls.clear(); #ifdef HAVE_KF5 if (htmlText.contains(QStringLiteral("enable JavaScript")) || htmlText.contains(QStringLiteral("re not a robot"))) { qCInfo(LOG_KBIBTEX_NETWORKING) << "'Google Scholar' denied scrapping data because it thinks KBibTeX is a robot."; sendVisualNotification(i18n("'Google Scholar' denied scrapping data because it thinks you are a robot."), label(), QStringLiteral("kbibtex"), 7 * 1000); } else { #endif // HAVE_KF5 static const QRegularExpression linkToBib("/scholar.bib\\?[^\" >]+"); QRegularExpressionMatchIterator linkToBibMatchIterator = linkToBib.globalMatch(htmlText); while (linkToBibMatchIterator.hasNext()) { const QRegularExpressionMatch linkToBibMatch = linkToBibMatchIterator.next(); const int pos = linkToBibMatch.capturedStart(); /// Try to figure out [PDF] or [HTML] link associated with BibTeX entry const QString documentUrl = d->documentUrlForBibTeXEntry(htmlText, pos); /// Extract primary link associated with BibTeX entry const QString primaryUrl = d->mainUrlForBibTeXEntry(htmlText, pos); const QString bibtexUrl(QStringLiteral("https://") + reply->url().host() + linkToBibMatch.captured().replace(QStringLiteral("&amp;"), QStringLiteral("&"))); d->listBibTeXurls.insert(bibtexUrl, qMakePair(primaryUrl, documentUrl)); } #ifdef HAVE_KF5 } #endif // HAVE_KF5 if (!d->listBibTeXurls.isEmpty()) { const auto listBibTeXurlsFront = d->listBibTeXurls.begin(); const QString bibtexUrl = listBibTeXurlsFront.key(); const QString primaryUrl = listBibTeXurlsFront.value().first; const QString documentUrl = listBibTeXurlsFront.value().second; QTimer::singleShot(250, this, [this, bibtexUrl, primaryUrl, documentUrl, reply]() { QNetworkRequest request(bibtexUrl); QNetworkReply *newReply = InternalNetworkAccessManager::instance().get(request, reply); if (!primaryUrl.isEmpty()) { /// Store primary URL as a property of the request/reply newReply->setProperty("primaryurl", QVariant::fromValue<QString>(primaryUrl)); } if (!documentUrl.isEmpty()) { /// Store URL to document as a property of the request/reply newReply->setProperty("documenturl", QVariant::fromValue<QString>(documentUrl)); } InternalNetworkAccessManager::instance().setNetworkReplyTimeout(newReply); connect(newReply, &QNetworkReply::finished, this, &OnlineSearchGoogleScholar::doneFetchingBibTeX); }); d->listBibTeXurls.erase(listBibTeXurlsFront); } else stopSearch(resultNoError); } } refreshBusyProperty(); } void OnlineSearchGoogleScholar::doneFetchingBibTeX() { emit progress(++curStep, numSteps); QNetworkReply *reply = static_cast<QNetworkReply *>(sender()); /// Extract previously stored URLs from reply const QString primaryUrl = reply->property("primaryurl").toString(); const QString documentUrl = reply->property("documenturl").toString(); QUrl newDomainUrl; if (handleErrors(reply, newDomainUrl)) { if (newDomainUrl.isValid() && newDomainUrl != reply->url()) { /// following redirection to country-specific domain ++numSteps; QNetworkRequest request(newDomainUrl); QNetworkReply *reply = InternalNetworkAccessManager::instance().get(request); InternalNetworkAccessManager::instance().setNetworkReplyTimeout(reply); connect(reply, &QNetworkReply::finished, this, &OnlineSearchGoogleScholar::doneFetchingBibTeX); } else { /// ensure proper treatment of UTF-8 characters const QString rawText = QString::fromUtf8(reply->readAll()); // dumpToFile(QStringLiteral("05-doneFetchingBibTeX.bib"),rawText); File *bibtexFile = d->importer->fromString(rawText); bool hasEntry = false; if (bibtexFile != nullptr) { for (const auto &element : const_cast<const File &>(*bibtexFile)) { QSharedPointer<Entry> entry = element.dynamicCast<Entry>(); if (!entry.isNull()) { Value v; v.append(QSharedPointer<VerbatimText>(new VerbatimText(label()))); entry->insert(QStringLiteral("x-fetchedfrom"), v); if (!primaryUrl.isEmpty()) { /// There is an external document associated with this BibTeX entry Value urlValue = entry->value(Entry::ftUrl); urlValue.append(QSharedPointer<VerbatimText>(new VerbatimText(primaryUrl))); entry->insert(Entry::ftUrl, urlValue); } if (!documentUrl.isEmpty() && primaryUrl != documentUrl /** avoid duplicates */) { /// There is a web page associated with this BibTeX entry Value urlValue = entry->value(Entry::ftUrl); urlValue.append(QSharedPointer<VerbatimText>(new VerbatimText(documentUrl))); entry->insert(Entry::ftUrl, urlValue); } emit foundEntry(entry); hasEntry = true; } } delete bibtexFile; } if (!hasEntry) { qCWarning(LOG_KBIBTEX_NETWORKING) << "Searching" << label() << "resulted in invalid BibTeX data:" << rawText; stopSearch(resultUnspecifiedError); } else if (!d->listBibTeXurls.isEmpty()) { const auto listBibTeXurlsFront = d->listBibTeXurls.begin(); const QString bibtexUrl = listBibTeXurlsFront.key(); const QString primaryUrl = listBibTeXurlsFront.value().first; const QString documentUrl = listBibTeXurlsFront.value().second; QNetworkRequest request(bibtexUrl); QNetworkReply *newReply = InternalNetworkAccessManager::instance().get(request, reply); InternalNetworkAccessManager::instance().setNetworkReplyTimeout(newReply); if (!primaryUrl.isEmpty()) { /// Store primary URL as a property of the request/reply newReply->setProperty("primaryurl", QVariant::fromValue<QString>(primaryUrl)); } if (!documentUrl.isEmpty()) { /// Store URL to document as a property of the request/reply newReply->setProperty("documenturl", QVariant::fromValue<QString>(documentUrl)); } connect(newReply, &QNetworkReply::finished, this, &OnlineSearchGoogleScholar::doneFetchingBibTeX); d->listBibTeXurls.erase(listBibTeXurlsFront); } else stopSearch(resultNoError); } } refreshBusyProperty(); } QString OnlineSearchGoogleScholar::label() const { #ifdef HAVE_KF5 return i18n("Google Scholar"); #else // HAVE_KF5 //= onlinesearch-googlescholar-label return QObject::tr("Google Scholar"); #endif // HAVE_KF5 } QString OnlineSearchGoogleScholar::favIconUrl() const { return QStringLiteral("https://scholar.google.com/favicon-png.ico"); } QUrl OnlineSearchGoogleScholar::homepage() const { return QUrl(QStringLiteral("https://scholar.google.com/")); } diff --git a/src/networking/onlinesearch/onlinesearchieeexplore.cpp b/src/networking/onlinesearch/onlinesearchieeexplore.cpp index 6130b05f..083d85cb 100644 --- a/src/networking/onlinesearch/onlinesearchieeexplore.cpp +++ b/src/networking/onlinesearch/onlinesearchieeexplore.cpp @@ -1,199 +1,199 @@ /*************************************************************************** * Copyright (C) 2004-2018 by Thomas Fischer <fischer@unix-ag.uni-kl.de> * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, see <https://www.gnu.org/licenses/>. * ***************************************************************************/ #include "onlinesearchieeexplore.h" #include <QNetworkReply> #include <QUrl> #include <QUrlQuery> #ifdef HAVE_KF5 #include <KLocalizedString> #else // HAVE_KF5 #define i18n(text) QObject::tr(text) #endif // HAVE_KF5 #include <XSLTransform> #include <EncoderXML> #include <FileImporterBibTeX> #include "internalnetworkaccessmanager.h" #include "logging_networking.h" class OnlineSearchIEEEXplore::OnlineSearchIEEEXplorePrivate { private: static const QString xsltFilenameBase; public: static const QUrl apiUrl; const XSLTransform xslt; OnlineSearchIEEEXplorePrivate(OnlineSearchIEEEXplore *) : xslt(XSLTransform::locateXSLTfile(xsltFilenameBase)) { if (!xslt.isValid()) qCWarning(LOG_KBIBTEX_NETWORKING) << "Failed to initialize XSL transformation based on file '" << xsltFilenameBase << "'"; } QUrl buildQueryUrl(const QMap<QString, QString> &query, int numResults) { QUrl queryUrl = apiUrl; QUrlQuery q(queryUrl.query()); /// Free text const QStringList freeTextFragments = OnlineSearchAbstract::splitRespectingQuotationMarks(query[queryKeyFreeText]); if (!freeTextFragments.isEmpty()) q.addQueryItem(QStringLiteral("querytext"), QStringLiteral("\"") + freeTextFragments.join(QStringLiteral("\"+\"")) + QStringLiteral("\"")); /// Title const QStringList title = OnlineSearchAbstract::splitRespectingQuotationMarks(query[queryKeyTitle]); if (!title.isEmpty()) q.addQueryItem(QStringLiteral("article_title"), QStringLiteral("\"") + title.join(QStringLiteral("\"+\"")) + QStringLiteral("\"")); /// Author const QStringList authors = OnlineSearchAbstract::splitRespectingQuotationMarks(query[queryKeyAuthor]); if (!authors.isEmpty()) q.addQueryItem(QStringLiteral("author"), QStringLiteral("\"") + authors.join(QStringLiteral("\"+\"")) + QStringLiteral("\"")); /// Year if (!query[queryKeyYear].isEmpty()) { q.addQueryItem(QStringLiteral("start_year"), query[queryKeyYear]); q.addQueryItem(QStringLiteral("end_year"), query[queryKeyYear]); } /// Sort order of results: newest publications first q.addQueryItem(QStringLiteral("sort_field"), QStringLiteral("publication_year")); q.addQueryItem(QStringLiteral("sort_order"), QStringLiteral("desc")); /// Request numResults many entries q.addQueryItem(QStringLiteral("start_record"), QStringLiteral("1")); q.addQueryItem(QStringLiteral("max_records"), QString::number(numResults)); queryUrl.setQuery(q); return queryUrl; } }; const QString OnlineSearchIEEEXplore::OnlineSearchIEEEXplorePrivate::xsltFilenameBase = QStringLiteral("ieeexploreapiv1-to-bibtex.xsl"); const QUrl OnlineSearchIEEEXplore::OnlineSearchIEEEXplorePrivate::apiUrl(QStringLiteral("https://ieeexploreapi.ieee.org/api/v1/search/articles?format=xml&apikey=") + InternalNetworkAccessManager::reverseObfuscate("\x15\x65\x4b\x2a\x37\x5f\x78\x12\x44\x70\xf8\x8e\x85\xe0\xdb\xae\xb\x7a\x7e\x46\xab\x93\xbc\xc8\xdb\xa8\xa5\xd2\xee\x96\x7e\x7\x37\x54\xa3\xd4\x2b\x5e\x81\xe6\x6f\x17\xb3\xd6\x7b\x1f\x1a\x60")); OnlineSearchIEEEXplore::OnlineSearchIEEEXplore(QObject *parent) : OnlineSearchAbstract(parent), d(new OnlineSearchIEEEXplore::OnlineSearchIEEEXplorePrivate(this)) { /// nothing } OnlineSearchIEEEXplore::~OnlineSearchIEEEXplore() { delete d; } void OnlineSearchIEEEXplore::startSearch(const QMap<QString, QString> &query, int numResults) { m_hasBeenCanceled = false; emit progress(curStep = 0, numSteps = 1); QNetworkRequest request(d->buildQueryUrl(query, numResults)); // FIXME 'ieeexploreapi.ieee.org' uses a SSL/TLS certificate only valid for 'mashery.com' // TODO re-enable certificate validation once problem has been fix (already reported) QSslConfiguration requestSslConfig = request.sslConfiguration(); requestSslConfig.setPeerVerifyMode(QSslSocket::VerifyNone); request.setSslConfiguration(requestSslConfig); QNetworkReply *reply = InternalNetworkAccessManager::instance().get(request); InternalNetworkAccessManager::instance().setNetworkReplyTimeout(reply); connect(reply, &QNetworkReply::finished, this, &OnlineSearchIEEEXplore::doneFetchingXML); refreshBusyProperty(); } void OnlineSearchIEEEXplore::doneFetchingXML() { emit progress(++curStep, numSteps); QNetworkReply *reply = static_cast<QNetworkReply *>(sender()); QUrl redirUrl; if (handleErrors(reply, redirUrl)) { if (redirUrl.isValid()) { /// redirection to another url ++numSteps; QNetworkRequest request(redirUrl); // FIXME 'ieeexploreapi.ieee.org' uses a SSL/TLS certificate only valid for 'mashery.com' // TODO re-enable certificate validation once problem has been fix (already reported) QSslConfiguration requestSslConfig = request.sslConfiguration(); requestSslConfig.setPeerVerifyMode(QSslSocket::VerifyNone); request.setSslConfiguration(requestSslConfig); QNetworkReply *reply = InternalNetworkAccessManager::instance().get(request); InternalNetworkAccessManager::instance().setNetworkReplyTimeout(reply); connect(reply, &QNetworkReply::finished, this, &OnlineSearchIEEEXplore::doneFetchingXML); } else { /// ensure proper treatment of UTF-8 characters const QString xmlCode = QString::fromUtf8(reply->readAll().constData()); /// use XSL transformation to get BibTeX document from XML result const QString bibTeXcode = EncoderXML::instance().decode(d->xslt.transform(xmlCode)); if (bibTeXcode.isEmpty()) { qCWarning(LOG_KBIBTEX_NETWORKING) << "XSL tranformation failed for data from " << InternalNetworkAccessManager::removeApiKey(reply->url()).toDisplayString(); stopSearch(resultInvalidArguments); } else { FileImporterBibTeX importer(this); File *bibtexFile = importer.fromString(bibTeXcode); bool hasEntries = false; if (bibtexFile != nullptr) { for (const auto &element : const_cast<const File &>(*bibtexFile)) { QSharedPointer<Entry> entry = element.dynamicCast<Entry>(); hasEntries |= publishEntry(entry); } stopSearch(resultNoError); delete bibtexFile; } else { qCWarning(LOG_KBIBTEX_NETWORKING) << "No valid BibTeX file results returned on request on" << InternalNetworkAccessManager::removeApiKey(reply->url()).toDisplayString(); stopSearch(resultUnspecifiedError); } } } } refreshBusyProperty(); } QString OnlineSearchIEEEXplore::label() const { #ifdef HAVE_KF5 return i18n("IEEEXplore"); #else // HAVE_KF5 //= onlinesearch-ieeexplore-label return QObject::tr("IEEEXplore"); #endif // HAVE_KF5 } QString OnlineSearchIEEEXplore::favIconUrl() const { - return QStringLiteral("http://ieeexplore.ieee.org/favicon.ico"); + return QStringLiteral("https://ieeexplore.ieee.org/favicon.ico"); } QUrl OnlineSearchIEEEXplore::homepage() const { return QUrl(QStringLiteral("https://ieeexplore.ieee.org/")); } diff --git a/src/networking/onlinesearch/onlinesearchingentaconnect.cpp b/src/networking/onlinesearch/onlinesearchingentaconnect.cpp index c0bb57e5..76d2c643 100644 --- a/src/networking/onlinesearch/onlinesearchingentaconnect.cpp +++ b/src/networking/onlinesearch/onlinesearchingentaconnect.cpp @@ -1,425 +1,425 @@ /*************************************************************************** * Copyright (C) 2004-2019 by Thomas Fischer <fischer@unix-ag.uni-kl.de> * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, see <https://www.gnu.org/licenses/>. * ***************************************************************************/ #include "onlinesearchingentaconnect.h" #include <QBuffer> #ifdef HAVE_QTWIDGETS #include <QLabel> #include <QFormLayout> #include <QSpinBox> #include <QLineEdit> #include <QIcon> #endif // HAVE_QTWIDGETS #include <QNetworkReply> #include <QUrlQuery> #ifdef HAVE_KF5 #include <KLocalizedString> #include <KConfigGroup> #else // HAVE_KF5 #define i18n(text) QObject::tr(text) #endif // HAVE_KF5 #include <File> #include <Entry> #include <FileImporterBibTeX> #include "internalnetworkaccessmanager.h" #include "onlinesearchabstract_p.h" #include "logging_networking.h" #ifdef HAVE_QTWIDGETS class OnlineSearchIngentaConnect::Form : public OnlineSearchAbstract::Form { Q_OBJECT private: QString configGroupName; void loadState() { KConfigGroup configGroup(d->config, configGroupName); lineEditFullText->setText(configGroup.readEntry(QStringLiteral("fullText"), QString())); lineEditTitle->setText(configGroup.readEntry(QStringLiteral("title"), QString())); lineEditAuthor->setText(configGroup.readEntry(QStringLiteral("author"), QString())); lineEditAbstractKeywords->setText(configGroup.readEntry(QStringLiteral("abstractKeywords"), QString())); lineEditPublication->setText(configGroup.readEntry(QStringLiteral("publication"), QString())); lineEditISSNDOIISBN->setText(configGroup.readEntry(QStringLiteral("ISSNDOIISBN"), QString())); lineEditVolume->setText(configGroup.readEntry(QStringLiteral("volume"), QString())); lineEditIssue->setText(configGroup.readEntry(QStringLiteral("issue"), QString())); numResultsField->setValue(configGroup.readEntry(QStringLiteral("numResults"), 10)); } public: QLineEdit *lineEditFullText; QLineEdit *lineEditTitle; QLineEdit *lineEditAuthor; QLineEdit *lineEditAbstractKeywords; QLineEdit *lineEditPublication; QLineEdit *lineEditISSNDOIISBN; QLineEdit *lineEditVolume; QLineEdit *lineEditIssue; QSpinBox *numResultsField; Form(QWidget *widget) : OnlineSearchAbstract::Form(widget), configGroupName(QStringLiteral("Search Engine IngentaConnect")) { QFormLayout *layout = new QFormLayout(this); layout->setMargin(0); lineEditFullText = new QLineEdit(this); lineEditFullText->setClearButtonEnabled(true); layout->addRow(i18n("Full text:"), lineEditFullText); connect(lineEditFullText, &QLineEdit::returnPressed, this, &OnlineSearchIngentaConnect::Form::returnPressed); lineEditTitle = new QLineEdit(this); lineEditTitle->setClearButtonEnabled(true); layout->addRow(i18n("Title:"), lineEditTitle); connect(lineEditTitle, &QLineEdit::returnPressed, this, &OnlineSearchIngentaConnect::Form::returnPressed); lineEditAuthor = new QLineEdit(this); lineEditAuthor->setClearButtonEnabled(true); layout->addRow(i18n("Author:"), lineEditAuthor); connect(lineEditAuthor, &QLineEdit::returnPressed, this, &OnlineSearchIngentaConnect::Form::returnPressed); lineEditAbstractKeywords = new QLineEdit(this); lineEditAbstractKeywords->setClearButtonEnabled(true); layout->addRow(i18n("Abstract/Keywords:"), lineEditAbstractKeywords); connect(lineEditAbstractKeywords, &QLineEdit::returnPressed, this, &OnlineSearchIngentaConnect::Form::returnPressed); lineEditPublication = new QLineEdit(this); lineEditPublication->setClearButtonEnabled(true); layout->addRow(i18n("Publication:"), lineEditPublication); connect(lineEditPublication, &QLineEdit::returnPressed, this, &OnlineSearchIngentaConnect::Form::returnPressed); lineEditISSNDOIISBN = new QLineEdit(this); lineEditISSNDOIISBN->setClearButtonEnabled(true); layout->addRow(i18n("ISSN/ISBN/DOI:"), lineEditISSNDOIISBN); connect(lineEditISSNDOIISBN, &QLineEdit::returnPressed, this, &OnlineSearchIngentaConnect::Form::returnPressed); lineEditVolume = new QLineEdit(this); lineEditVolume->setClearButtonEnabled(true); layout->addRow(i18n("Volume:"), lineEditVolume); connect(lineEditVolume, &QLineEdit::returnPressed, this, &OnlineSearchIngentaConnect::Form::returnPressed); lineEditIssue = new QLineEdit(this); lineEditIssue->setClearButtonEnabled(true); layout->addRow(i18n("Issue/Number:"), lineEditIssue); connect(lineEditIssue, &QLineEdit::returnPressed, this, &OnlineSearchIngentaConnect::Form::returnPressed); numResultsField = new QSpinBox(this); layout->addRow(i18n("Number of Results:"), numResultsField); numResultsField->setMinimum(3); numResultsField->setMaximum(100); numResultsField->setValue(10); } bool readyToStart() const override { return !(lineEditFullText->text().isEmpty() && lineEditTitle->text().isEmpty() && lineEditAuthor->text().isEmpty() && lineEditAbstractKeywords->text().isEmpty() && lineEditPublication->text().isEmpty() && lineEditISSNDOIISBN->text().isEmpty() && lineEditVolume->text().isEmpty() && lineEditIssue->text().isEmpty()); } void copyFromEntry(const Entry &entry) override { lineEditTitle->setText(PlainTextValue::text(entry[Entry::ftTitle])); lineEditAuthor->setText(d->authorLastNames(entry).join(QStringLiteral(" "))); lineEditVolume->setText(PlainTextValue::text(entry[Entry::ftVolume])); lineEditIssue->setText(PlainTextValue::text(entry[Entry::ftNumber])); QString issnDoiIsbn = PlainTextValue::text(entry[Entry::ftDOI]); if (issnDoiIsbn.isEmpty()) issnDoiIsbn = PlainTextValue::text(entry[Entry::ftISBN]); if (issnDoiIsbn.isEmpty()) issnDoiIsbn = PlainTextValue::text(entry[Entry::ftISSN]); lineEditISSNDOIISBN->setText(issnDoiIsbn); QString publication = PlainTextValue::text(entry[Entry::ftJournal]); if (publication.isEmpty()) publication = PlainTextValue::text(entry[Entry::ftBookTitle]); lineEditPublication->setText(publication); // TODO lineEditAbstractKeywords->setText(QString()); } void saveState() { KConfigGroup configGroup(d->config, configGroupName); configGroup.writeEntry(QStringLiteral("fullText"), lineEditFullText->text()); configGroup.writeEntry(QStringLiteral("title"), lineEditTitle->text()); configGroup.writeEntry(QStringLiteral("author"), lineEditAuthor->text()); configGroup.writeEntry(QStringLiteral("abstractKeywords"), lineEditAbstractKeywords->text()); configGroup.writeEntry(QStringLiteral("publication"), lineEditPublication->text()); configGroup.writeEntry(QStringLiteral("ISSNDOIISBN"), lineEditISSNDOIISBN->text()); configGroup.writeEntry(QStringLiteral("volume"), lineEditVolume->text()); configGroup.writeEntry(QStringLiteral("issue"), lineEditIssue->text()); configGroup.writeEntry(QStringLiteral("numResults"), numResultsField->value()); d->config->sync(); } }; #endif // HAVE_QTWIDGETS class OnlineSearchIngentaConnect::OnlineSearchIngentaConnectPrivate { private: const QString ingentaConnectBaseUrl; public: #ifdef HAVE_QTWIDGETS OnlineSearchIngentaConnect::Form *form; #endif // HAVE_QTWIDGETS OnlineSearchIngentaConnectPrivate(OnlineSearchIngentaConnect *) : ingentaConnectBaseUrl(QStringLiteral("https://www.ingentaconnect.com/search?format=bib")) #ifdef HAVE_QTWIDGETS , form(nullptr) #endif // HAVE_QTWIDGETS { /// nothing } #ifdef HAVE_QTWIDGETS QUrl buildQueryUrl() { if (form == nullptr) { qCWarning(LOG_KBIBTEX_NETWORKING) << "Cannot build query url if no form is specified"; return QUrl(); } QUrl queryUrl(ingentaConnectBaseUrl); QUrlQuery query(queryUrl); int index = 1; const QStringList chunksFullText = OnlineSearchAbstract::splitRespectingQuotationMarks(form->lineEditFullText->text()); for (const QString &chunk : chunksFullText) { if (index > 1) query.addQueryItem(QString(QStringLiteral("operator%1")).arg(index), QStringLiteral("AND")); ///< join search terms with an AND operation query.addQueryItem(QString(QStringLiteral("option%1")).arg(index), QStringLiteral("fulltext")); query.addQueryItem(QString(QStringLiteral("value%1")).arg(index), chunk); ++index; } const QStringList chunksAuthor = OnlineSearchAbstract::splitRespectingQuotationMarks(form->lineEditAuthor->text()); for (const QString &chunk : chunksAuthor) { if (index > 1) query.addQueryItem(QString(QStringLiteral("operator%1")).arg(index), QStringLiteral("AND")); query.addQueryItem(QString(QStringLiteral("option%1")).arg(index), QStringLiteral("author")); query.addQueryItem(QString(QStringLiteral("value%1")).arg(index), chunk); ++index; } const QStringList chunksTitle = OnlineSearchAbstract::splitRespectingQuotationMarks(form->lineEditTitle->text()); for (const QString &chunk : chunksTitle) { if (index > 1) query.addQueryItem(QString(QStringLiteral("operator%1")).arg(index), QStringLiteral("AND")); query.addQueryItem(QString(QStringLiteral("option%1")).arg(index), QStringLiteral("title")); query.addQueryItem(QString(QStringLiteral("value%1")).arg(index), chunk); ++index; } const QStringList chunksPublication = OnlineSearchAbstract::splitRespectingQuotationMarks(form->lineEditPublication->text()); for (const QString &chunk : chunksPublication) { if (index > 1) query.addQueryItem(QString(QStringLiteral("operator%1")).arg(index), QStringLiteral("AND")); query.addQueryItem(QString(QStringLiteral("option%1")).arg(index), QStringLiteral("journalbooktitle")); query.addQueryItem(QString(QStringLiteral("value%1")).arg(index), chunk); ++index; } const QStringList chunksIssue = OnlineSearchAbstract::splitRespectingQuotationMarks(form->lineEditIssue->text()); for (const QString &chunk : chunksIssue) { if (index > 1) query.addQueryItem(QString(QStringLiteral("operator%1")).arg(index), QStringLiteral("AND")); query.addQueryItem(QString(QStringLiteral("option%1")).arg(index), QStringLiteral("issue")); query.addQueryItem(QString(QStringLiteral("value%1")).arg(index), chunk); ++index; } const QStringList chunksVolume = OnlineSearchAbstract::splitRespectingQuotationMarks(form->lineEditVolume->text()); for (const QString &chunk : chunksVolume) { if (index > 1) query.addQueryItem(QString(QStringLiteral("operator%1")).arg(index), QStringLiteral("AND")); query.addQueryItem(QString(QStringLiteral("option%1")).arg(index), QStringLiteral("volume")); query.addQueryItem(QString(QStringLiteral("value%1")).arg(index), chunk); ++index; } const QStringList chunksKeywords = OnlineSearchAbstract::splitRespectingQuotationMarks(form->lineEditAbstractKeywords->text()); for (const QString &chunk : chunksKeywords) { if (index > 1) query.addQueryItem(QString(QStringLiteral("operator%1")).arg(index), QStringLiteral("AND")); query.addQueryItem(QString(QStringLiteral("option%1")).arg(index), QStringLiteral("tka")); query.addQueryItem(QString(QStringLiteral("value%1")).arg(index), chunk); ++index; } query.addQueryItem(QStringLiteral("pageSize"), QString::number(form->numResultsField->value())); query.addQueryItem(QStringLiteral("sortDescending"), QStringLiteral("true")); query.addQueryItem(QStringLiteral("subscribed"), QStringLiteral("false")); query.addQueryItem(QStringLiteral("sortField"), QStringLiteral("default")); queryUrl.setQuery(query); return queryUrl; } #endif // HAVE_QTWIDGETS QUrl buildQueryUrl(const QMap<QString, QString> &query, int numResults) { QUrl queryUrl(ingentaConnectBaseUrl); QUrlQuery q(queryUrl); int index = 1; const QStringList chunksFreeText = OnlineSearchAbstract::splitRespectingQuotationMarks(query[queryKeyFreeText]); for (const QString &chunk : chunksFreeText) { if (index > 1) q.addQueryItem(QString(QStringLiteral("operator%1")).arg(index), QStringLiteral("AND")); q.addQueryItem(QString(QStringLiteral("option%1")).arg(index), QStringLiteral("fulltext")); q.addQueryItem(QString(QStringLiteral("value%1")).arg(index), chunk); ++index; } const QStringList chunksAuthor = OnlineSearchAbstract::splitRespectingQuotationMarks(query[queryKeyAuthor]); for (const QString &chunk : chunksAuthor) { if (index > 1) q.addQueryItem(QString(QStringLiteral("operator%1")).arg(index), QStringLiteral("AND")); q.addQueryItem(QString(QStringLiteral("option%1")).arg(index), QStringLiteral("author")); q.addQueryItem(QString(QStringLiteral("value%1")).arg(index), chunk); ++index; } const QStringList chunksTitle = OnlineSearchAbstract::splitRespectingQuotationMarks(query[queryKeyTitle]); for (const QString &chunk : chunksTitle) { if (index > 1) q.addQueryItem(QString(QStringLiteral("operator%1")).arg(index), QStringLiteral("AND")); q.addQueryItem(QString(QStringLiteral("option%1")).arg(index), QStringLiteral("title")); q.addQueryItem(QString(QStringLiteral("value%1")).arg(index), chunk); ++index; } /// Field "year" not supported in IngentaConnect's search q.addQueryItem(QStringLiteral("pageSize"), QString::number(numResults)); q.addQueryItem(QStringLiteral("sortDescending"), QStringLiteral("true")); q.addQueryItem(QStringLiteral("subscribed"), QStringLiteral("false")); q.addQueryItem(QStringLiteral("sortField"), QStringLiteral("default")); queryUrl.setQuery(q); return queryUrl; } }; OnlineSearchIngentaConnect::OnlineSearchIngentaConnect(QObject *parent) : OnlineSearchAbstract(parent), d(new OnlineSearchIngentaConnectPrivate(this)) { /// nothing } OnlineSearchIngentaConnect::~OnlineSearchIngentaConnect() { delete d; } void OnlineSearchIngentaConnect::startSearch(const QMap<QString, QString> &query, int numResults) { m_hasBeenCanceled = false; emit progress(curStep = 0, numSteps = 1); QNetworkRequest request(d->buildQueryUrl(query, numResults)); QNetworkReply *reply = InternalNetworkAccessManager::instance().get(request); InternalNetworkAccessManager::instance().setNetworkReplyTimeout(reply); connect(reply, &QNetworkReply::finished, this, &OnlineSearchIngentaConnect::downloadDone); refreshBusyProperty(); } #ifdef HAVE_QTWIDGETS void OnlineSearchIngentaConnect::startSearchFromForm() { m_hasBeenCanceled = false; emit progress(curStep = 0, numSteps = 1); QNetworkRequest request(d->buildQueryUrl()); QNetworkReply *reply = InternalNetworkAccessManager::instance().get(request); InternalNetworkAccessManager::instance().setNetworkReplyTimeout(reply); connect(reply, &QNetworkReply::finished, this, &OnlineSearchIngentaConnect::downloadDone); d->form->saveState(); refreshBusyProperty(); } #endif // HAVE_QTWIDGETS QString OnlineSearchIngentaConnect::label() const { #ifdef HAVE_KF5 return i18n("IngentaConnect"); #else // HAVE_KF5 //= onlinesearch-ingentaconnect-label return QObject::tr("IngentaConnect"); #endif // HAVE_KF5 } QString OnlineSearchIngentaConnect::favIconUrl() const { - return QStringLiteral("http://www.ingentaconnect.com/favicon.ico"); + return QStringLiteral("https://www.ingentaconnect.com/favicon.ico"); } #ifdef HAVE_QTWIDGETS OnlineSearchAbstract::Form *OnlineSearchIngentaConnect::customWidget(QWidget *parent) { if (d->form == nullptr) d->form = new OnlineSearchIngentaConnect::Form(parent); return d->form; } #endif // HAVE_QTWIDGETS QUrl OnlineSearchIngentaConnect::homepage() const { return QUrl(QStringLiteral("https://www.ingentaconnect.com/")); } void OnlineSearchIngentaConnect::downloadDone() { emit progress(curStep = numSteps, numSteps); QNetworkReply *reply = static_cast<QNetworkReply *>(sender()); if (handleErrors(reply)) { /// ensure proper treatment of UTF-8 characters QString bibTeXcode = QString::fromUtf8(reply->readAll().constData()); if (!bibTeXcode.isEmpty()) { FileImporterBibTeX importer(this); File *bibtexFile = importer.fromString(bibTeXcode); bool hasEntries = false; if (bibtexFile != nullptr) { for (const auto &element : const_cast<const File &>(*bibtexFile)) { QSharedPointer<Entry> entry = element.dynamicCast<Entry>(); hasEntries |= publishEntry(entry); } stopSearch(resultNoError); delete bibtexFile; } else { qCWarning(LOG_KBIBTEX_NETWORKING) << "No valid BibTeX file results returned on request on" << InternalNetworkAccessManager::removeApiKey(reply->url()).toDisplayString(); stopSearch(resultUnspecifiedError); } } else { /// returned file is empty stopSearch(resultNoError); } } refreshBusyProperty(); } #include "onlinesearchingentaconnect.moc" diff --git a/src/networking/onlinesearch/onlinesearchinspirehep.cpp b/src/networking/onlinesearch/onlinesearchinspirehep.cpp index 1ae81681..2ce65622 100644 --- a/src/networking/onlinesearch/onlinesearchinspirehep.cpp +++ b/src/networking/onlinesearch/onlinesearchinspirehep.cpp @@ -1,83 +1,83 @@ /*************************************************************************** * Copyright (C) 2004-2017 by Thomas Fischer <fischer@unix-ag.uni-kl.de> * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, see <https://www.gnu.org/licenses/>. * ***************************************************************************/ #include "onlinesearchinspirehep.h" #include <KLocalizedString> OnlineSearchInspireHep::OnlineSearchInspireHep(QObject *parent) : OnlineSearchSimpleBibTeXDownload(parent) { /// nothing } QString OnlineSearchInspireHep::label() const { return i18n("Inspire High-Energy Physics Literature Database"); } QUrl OnlineSearchInspireHep::homepage() const { - return QUrl(QStringLiteral("http://inspirehep.net/")); + return QUrl(QStringLiteral("https://inspirehep.net/")); } QString OnlineSearchInspireHep::favIconUrl() const { - return QStringLiteral("http://inspirehep.net/favicon.ico"); + return QStringLiteral("https://inspirehep.net/favicon.ico"); } QUrl OnlineSearchInspireHep::buildQueryUrl(const QMap<QString, QString> &query, int numResults) { static const QString typedSearch = QStringLiteral("%1 %2"); ///< no quotation marks for search term? const QStringList freeTextWords = splitRespectingQuotationMarks(query[queryKeyFreeText]); const QStringList yearWords = splitRespectingQuotationMarks(query[queryKeyYear]); const QStringList titleWords = splitRespectingQuotationMarks(query[queryKeyTitle]); const QStringList authorWords = splitRespectingQuotationMarks(query[queryKeyAuthor]); /// append search terms QStringList queryFragments; queryFragments.reserve(freeTextWords.size() + yearWords.size() + titleWords.size() + authorWords.size()); /// add words from "free text" field for (const QString &text : freeTextWords) queryFragments.append(typedSearch.arg(QStringLiteral("ft"), text)); /// add words from "year" field for (const QString &text : yearWords) queryFragments.append(typedSearch.arg(QStringLiteral("d"), text)); /// add words from "title" field for (const QString &text : titleWords) queryFragments.append(typedSearch.arg(QStringLiteral("t"), text)); /// add words from "author" field for (const QString &text : authorWords) queryFragments.append(typedSearch.arg(QStringLiteral("a"), text)); /// Build URL - QString urlText = QStringLiteral("http://inspirehep.net/search?ln=en&ln=en&of=hx&action_search=Search&sf=&so=d&rm=&sc=0"); + QString urlText = QStringLiteral("https://inspirehep.net/search?ln=en&ln=en&of=hx&action_search=Search&sf=&so=d&rm=&sc=0"); /// Set number of expected results urlText.append(QString(QStringLiteral("&rg=%1")).arg(numResults)); /// Append actual query urlText.append(QStringLiteral("&p=")); urlText.append(queryFragments.join(QStringLiteral(" and "))); /// URL-encode text urlText = urlText.replace(QLatin1Char(' '), QStringLiteral("%20")).replace(QLatin1Char('"'), QStringLiteral("%22")); return QUrl(urlText); } diff --git a/src/networking/onlinesearch/onlinesearchmathscinet.cpp b/src/networking/onlinesearch/onlinesearchmathscinet.cpp index 4cf85a5f..0bbd7a2b 100644 --- a/src/networking/onlinesearch/onlinesearchmathscinet.cpp +++ b/src/networking/onlinesearch/onlinesearchmathscinet.cpp @@ -1,258 +1,258 @@ /*************************************************************************** * Copyright (C) 2004-2018 by Thomas Fischer <fischer@unix-ag.uni-kl.de> * * 2018 Alexander Dunlap <alexander.dunlap@gmail.com> * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, see <https://www.gnu.org/licenses/>. * ***************************************************************************/ #include "onlinesearchmathscinet.h" #include <QNetworkReply> #include <QNetworkRequest> #include <QNetworkAccessManager> #include <QMap> #include <QUrlQuery> #include <QRegularExpression> #include <KLocalizedString> #include <KBibTeX> #include <FileImporterBibTeX> #include "internalnetworkaccessmanager.h" #include "logging_networking.h" class OnlineSearchMathSciNet::OnlineSearchMathSciNetPrivate { public: QMap<QString, QString> queryParameters; int numResults; static const QString queryFormUrl; static const QString queryUrlStem; OnlineSearchMathSciNetPrivate(OnlineSearchMathSciNet *parent) : numResults(0) { Q_UNUSED(parent) } }; const QString OnlineSearchMathSciNet::OnlineSearchMathSciNetPrivate::queryFormUrl = QStringLiteral("https://mathscinet.ams.org/mathscinet/"); const QString OnlineSearchMathSciNet::OnlineSearchMathSciNetPrivate::queryUrlStem = QStringLiteral("https://mathscinet.ams.org/mathscinet/search/publications.html?client=KBibTeX"); OnlineSearchMathSciNet::OnlineSearchMathSciNet(QObject *parent) : OnlineSearchAbstract(parent), d(new OnlineSearchMathSciNetPrivate(this)) { /// nothing } OnlineSearchMathSciNet::~OnlineSearchMathSciNet() { delete d; } void OnlineSearchMathSciNet::startSearch(const QMap<QString, QString> &query, int numResults) { m_hasBeenCanceled = false; emit progress(curStep = 0, numSteps = 3); d->queryParameters.clear(); d->numResults = qMin(50, numResults); /// limit query to max 50 elements int index = 1; const QString freeText = query[queryKeyFreeText]; const QStringList elementsFreeText = splitRespectingQuotationMarks(freeText); for (const QString &element : elementsFreeText) { d->queryParameters.insert(QString(QStringLiteral("pg%1")).arg(index), QStringLiteral("ALLF")); d->queryParameters.insert(QString(QStringLiteral("s%1")).arg(index), element); ++index; } const QString title = query[queryKeyTitle]; const QStringList elementsTitle = splitRespectingQuotationMarks(title); for (const QString &element : elementsTitle) { d->queryParameters.insert(QString(QStringLiteral("pg%1")).arg(index), QStringLiteral("TI")); d->queryParameters.insert(QString(QStringLiteral("s%1")).arg(index), element); ++index; } const QString authors = query[queryKeyAuthor]; const QStringList elementsAuthor = splitRespectingQuotationMarks(authors); for (const QString &element : elementsAuthor) { d->queryParameters.insert(QString(QStringLiteral("pg%1")).arg(index), QStringLiteral("ICN")); d->queryParameters.insert(QString(QStringLiteral("s%1")).arg(index), element); ++index; } const QString year = query[queryKeyYear]; if (year.isEmpty()) { d->queryParameters.insert(QStringLiteral("dr"), QStringLiteral("all")); } else { d->queryParameters.insert(QStringLiteral("dr"), QStringLiteral("pubyear")); d->queryParameters.insert(QStringLiteral("yrop"), QStringLiteral("eq")); d->queryParameters.insert(QStringLiteral("arg3"), year); } /// issue request for start page QNetworkRequest request(OnlineSearchMathSciNetPrivate::queryFormUrl); QNetworkReply *reply = InternalNetworkAccessManager::instance().get(request); InternalNetworkAccessManager::instance().setNetworkReplyTimeout(reply); connect(reply, &QNetworkReply::finished, this, &OnlineSearchMathSciNet::doneFetchingQueryForm); refreshBusyProperty(); } QString OnlineSearchMathSciNet::label() const { return i18n("MathSciNet"); } QString OnlineSearchMathSciNet::favIconUrl() const { - return QStringLiteral("http://www.ams.org/favicon.ico"); + return QStringLiteral("https://www.ams.org/favicon.ico"); } QUrl OnlineSearchMathSciNet::homepage() const { return QUrl(QStringLiteral("https://mathscinet.ams.org/mathscinet/help/about.html")); } void OnlineSearchMathSciNet::doneFetchingQueryForm() { QNetworkReply *reply = static_cast<QNetworkReply *>(sender()); emit progress(++curStep, numSteps); if (handleErrors(reply)) { // UNUSED const QString htmlText = QString::fromUtf8(reply->readAll().constData()); /// extract form's parameters ... QMap<QString, QString> formParams; /// ... and overwrite them with the query's parameters for (QMap<QString, QString>::ConstIterator it = d->queryParameters.constBegin(); it != d->queryParameters.constEnd(); ++it) formParams.insert(it.key(), it.value()); /// build url by appending parameters QUrl url(OnlineSearchMathSciNetPrivate::queryUrlStem); QUrlQuery query(url); for (QMap<QString, QString>::ConstIterator it = formParams.constBegin(); it != formParams.constEnd(); ++it) query.addQueryItem(it.key(), it.value()); for (int i = 1; i <= d->queryParameters.count(); ++i) query.addQueryItem(QString(QStringLiteral("co%1")).arg(i), QStringLiteral("AND")); ///< join search terms with an AND operation url.setQuery(query); /// issue request for result page QNetworkRequest request(url); QNetworkReply *newReply = InternalNetworkAccessManager::instance().get(request, reply); InternalNetworkAccessManager::instance().setNetworkReplyTimeout(newReply); connect(newReply, &QNetworkReply::finished, this, &OnlineSearchMathSciNet::doneFetchingResultPage); } refreshBusyProperty(); } void OnlineSearchMathSciNet::doneFetchingResultPage() { QNetworkReply *reply = static_cast<QNetworkReply *>(sender()); emit progress(++curStep, numSteps); if (handleErrors(reply)) { const QString htmlText = QString::fromUtf8(reply->readAll().constData()); /// extract form's parameters ... QMap<QString, QString> formParams = formParameters(htmlText, htmlText.indexOf(QStringLiteral("<form name=\"batchDownload\" action="), Qt::CaseInsensitive)); /// build url by appending parameters QUrl url(OnlineSearchMathSciNetPrivate::queryUrlStem); QUrlQuery query(url); static const QStringList copyParameters {QStringLiteral("foo"), QStringLiteral("bdl"), QStringLiteral("reqargs"), QStringLiteral("batch_title")}; for (const QString &param : copyParameters) { query.addQueryItem(param, formParams[param]); } query.addQueryItem(QStringLiteral("fmt"), QStringLiteral("bibtex")); int count = 0; static const QRegularExpression checkBoxRegExp(QStringLiteral("<input class=\"hlCheckBox\" type=\"checkbox\" name=\"b\" value=\"(\\d+)\"")); QRegularExpressionMatchIterator checkBoxRegExpMatchIt = checkBoxRegExp.globalMatch(htmlText); while (count < d->numResults && checkBoxRegExpMatchIt.hasNext()) { const QRegularExpressionMatch checkBoxRegExpMatch = checkBoxRegExpMatchIt.next(); query.addQueryItem(QStringLiteral("b"), checkBoxRegExpMatch.captured(1)); ++count; } url.setQuery(query); if (count > 0) { /// issue request for bibtex code QNetworkRequest request(url); QNetworkReply *newReply = InternalNetworkAccessManager::instance().get(request, reply); InternalNetworkAccessManager::instance().setNetworkReplyTimeout(newReply); connect(newReply, &QNetworkReply::finished, this, &OnlineSearchMathSciNet::doneFetchingBibTeXcode); } else { /// nothing found stopSearch(resultNoError); } } refreshBusyProperty(); } void OnlineSearchMathSciNet::doneFetchingBibTeXcode() { QNetworkReply *reply = static_cast<QNetworkReply *>(sender()); emit progress(curStep = numSteps, numSteps); if (handleErrors(reply)) { /// ensure proper treatment of UTF-8 characters QString htmlCode = QString::fromUtf8(reply->readAll().constData()); QString bibtexCode; int p1 = -1, p2 = -1; while ((p1 = htmlCode.indexOf(QStringLiteral("<pre>"), p2 + 1)) >= 0 && (p2 = htmlCode.indexOf(QStringLiteral("</pre>"), p1 + 1)) >= 0) { bibtexCode += htmlCode.midRef(p1 + 5, p2 - p1 - 5); bibtexCode += QLatin1Char('\n'); } FileImporterBibTeX importer(this); const File *bibtexFile = importer.fromString(bibtexCode); bool hasEntry = false; if (bibtexFile != nullptr) { for (const auto &element : *bibtexFile) { QSharedPointer<Entry> entry = element.dynamicCast<Entry>(); hasEntry |= publishEntry(entry); } delete bibtexFile; } stopSearch(hasEntry ? resultNoError : resultUnspecifiedError); } refreshBusyProperty(); } void OnlineSearchMathSciNet::sanitizeEntry(QSharedPointer<Entry> entry) { OnlineSearchAbstract::sanitizeEntry(entry); const QString ftFJournal = QStringLiteral("fjournal"); if (entry->contains(ftFJournal)) { Value v = entry->value(ftFJournal); entry->remove(Entry::ftJournal); entry->remove(ftFJournal); entry->insert(Entry::ftJournal, v); } } diff --git a/src/networking/onlinesearch/onlinesearchmrlookup.cpp b/src/networking/onlinesearch/onlinesearchmrlookup.cpp index 44aa6a11..81a97ad7 100644 --- a/src/networking/onlinesearch/onlinesearchmrlookup.cpp +++ b/src/networking/onlinesearch/onlinesearchmrlookup.cpp @@ -1,143 +1,143 @@ /*************************************************************************** * Copyright (C) 2004-2018 by Thomas Fischer <fischer@unix-ag.uni-kl.de> * * 2014 Pavel Zorin-Kranich <pzorin@math.uni-bonn.de> * * 2018 Alexander Dunlap <alexander.dunlap@gmail.com> * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, see <https://www.gnu.org/licenses/>. * ***************************************************************************/ #include "onlinesearchmrlookup.h" #include <QNetworkReply> #include <QNetworkRequest> #include <QNetworkAccessManager> #include <QMap> #include <QUrlQuery> #include <KLocalizedString> #include <KBibTeX> #include <FileImporterBibTeX> #include "internalnetworkaccessmanager.h" #include "logging_networking.h" const QString OnlineSearchMRLookup::queryUrlStem = QStringLiteral("https://mathscinet.ams.org/mrlookup"); OnlineSearchMRLookup::OnlineSearchMRLookup(QObject *parent) : OnlineSearchAbstract(parent) { /// nothing } void OnlineSearchMRLookup::startSearch(const QMap<QString, QString> &query, int) { m_hasBeenCanceled = false; emit progress(curStep = 0, numSteps = 1); QUrl url(queryUrlStem); QUrlQuery q(url); const QString title = query[queryKeyTitle]; q.addQueryItem(QStringLiteral("ti"), title); const QString authors = query[queryKeyAuthor]; q.addQueryItem(QStringLiteral("au"), authors); const QString year = query[queryKeyYear]; if (!year.isEmpty()) q.addQueryItem(QStringLiteral("year"), year); q.addQueryItem(QStringLiteral("format"), QStringLiteral("bibtex")); url.setQuery(q); QNetworkRequest request(url); QNetworkReply *reply = InternalNetworkAccessManager::instance().get(request); InternalNetworkAccessManager::instance().setNetworkReplyTimeout(reply); connect(reply, &QNetworkReply::finished, this, &OnlineSearchMRLookup::doneFetchingResultPage); refreshBusyProperty(); } QString OnlineSearchMRLookup::label() const { return i18n("MR Lookup"); } QString OnlineSearchMRLookup::favIconUrl() const { - return QStringLiteral("http://www.ams.org/favicon.ico"); + return QStringLiteral("https://www.ams.org/favicon.ico"); } QUrl OnlineSearchMRLookup::homepage() const { return QUrl(QStringLiteral("https://mathscinet.ams.org/mrlookup")); } void OnlineSearchMRLookup::doneFetchingResultPage() { QNetworkReply *reply = static_cast<QNetworkReply *>(sender()); emit progress(curStep = numSteps, numSteps); if (handleErrors(reply)) { /// ensure proper treatment of UTF-8 characters QString htmlCode = QString::fromUtf8(reply->readAll().constData()); QString bibtexCode; int p1 = -1, p2 = -1; while ((p1 = htmlCode.indexOf(QStringLiteral("<pre>"), p2 + 1)) >= 0 && (p2 = htmlCode.indexOf(QStringLiteral("</pre>"), p1 + 1)) >= 0) { bibtexCode += htmlCode.midRef(p1 + 5, p2 - p1 - 5); bibtexCode += QLatin1Char('\n'); } FileImporterBibTeX importer(this); File *bibtexFile = importer.fromString(bibtexCode); bool hasEntry = false; if (bibtexFile != nullptr) { for (const auto &element : const_cast<const File &>(*bibtexFile)) { const QSharedPointer<Entry> entry = element.dynamicCast<Entry>(); hasEntry |= publishEntry(entry); } delete bibtexFile; } stopSearch(hasEntry ? resultNoError : resultUnspecifiedError); } refreshBusyProperty(); } void OnlineSearchMRLookup::sanitizeEntry(QSharedPointer<Entry> entry) { OnlineSearchAbstract::sanitizeEntry(entry); /// Rewrite 'fjournal' fields to become 'journal' fields /// (overwriting them if necessary) const QString ftFJournal = QStringLiteral("fjournal"); if (entry->contains(ftFJournal)) { Value v = entry->value(ftFJournal); entry->remove(Entry::ftJournal); entry->remove(ftFJournal); entry->insert(Entry::ftJournal, v); } /// Remove URL from entry if contains a DOI and the DOI field is present if (entry->contains(Entry::ftDOI) && entry->contains(Entry::ftUrl)) { Value v = entry->value(Entry::ftUrl); if (v.containsPattern(QStringLiteral("http://dx.doi.org"))) { entry->remove(Entry::ftUrl); } } } diff --git a/src/networking/onlinesearch/onlinesearchspringerlink.cpp b/src/networking/onlinesearch/onlinesearchspringerlink.cpp index 5d545b55..556f487f 100644 --- a/src/networking/onlinesearch/onlinesearchspringerlink.cpp +++ b/src/networking/onlinesearch/onlinesearchspringerlink.cpp @@ -1,358 +1,358 @@ /*************************************************************************** * Copyright (C) 2004-2019 by Thomas Fischer <fischer@unix-ag.uni-kl.de> * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, see <https://www.gnu.org/licenses/>. * ***************************************************************************/ #include "onlinesearchspringerlink.h" #ifdef HAVE_QTWIDGETS #include <QFormLayout> #include <QSpinBox> #include <QLineEdit> #include <QLabel> #endif // HAVE_QTWIDGETS #include <QRegularExpression> #include <QNetworkRequest> #include <QNetworkAccessManager> #include <QNetworkReply> #include <QUrlQuery> #ifdef HAVE_KF5 #include <KLocalizedString> #include <KConfigGroup> #else // HAVE_KF5 #define i18n(text) QObject::tr(text) #endif // HAVE_KF5 #include <Encoder> #include <EncoderXML> #include <FileImporterBibTeX> #include <XSLTransform> #include "internalnetworkaccessmanager.h" #include "onlinesearchabstract_p.h" #include "logging_networking.h" #ifdef HAVE_QTWIDGETS /** * @author Thomas Fischer <fischer@unix-ag.uni-kl.de> */ class OnlineSearchSpringerLink::Form : public OnlineSearchAbstract::Form { Q_OBJECT private: QString configGroupName; void loadState() { KConfigGroup configGroup(d->config, configGroupName); lineEditFreeText->setText(configGroup.readEntry(QStringLiteral("free"), QString())); lineEditTitle->setText(configGroup.readEntry(QStringLiteral("title"), QString())); lineEditBookTitle->setText(configGroup.readEntry(QStringLiteral("bookTitle"), QString())); lineEditAuthorEditor->setText(configGroup.readEntry(QStringLiteral("authorEditor"), QString())); lineEditYear->setText(configGroup.readEntry(QStringLiteral("year"), QString())); numResultsField->setValue(configGroup.readEntry(QStringLiteral("numResults"), 10)); } public: QLineEdit *lineEditFreeText, *lineEditTitle, *lineEditBookTitle, *lineEditAuthorEditor, *lineEditYear; QSpinBox *numResultsField; Form(QWidget *parent) : OnlineSearchAbstract::Form(parent), configGroupName(QStringLiteral("Search Engine SpringerLink")) { QFormLayout *layout = new QFormLayout(this); layout->setMargin(0); lineEditFreeText = new QLineEdit(this); lineEditFreeText->setClearButtonEnabled(true); QLabel *label = new QLabel(i18n("Free Text:"), this); label->setBuddy(lineEditFreeText); layout->addRow(label, lineEditFreeText); connect(lineEditFreeText, &QLineEdit::returnPressed, this, &OnlineSearchSpringerLink::Form::returnPressed); lineEditTitle = new QLineEdit(this); lineEditTitle->setClearButtonEnabled(true); label = new QLabel(i18n("Title:"), this); label->setBuddy(lineEditTitle); layout->addRow(label, lineEditTitle); connect(lineEditTitle, &QLineEdit::returnPressed, this, &OnlineSearchSpringerLink::Form::returnPressed); lineEditBookTitle = new QLineEdit(this); lineEditBookTitle->setClearButtonEnabled(true); label = new QLabel(i18n("Book/Journal title:"), this); label->setBuddy(lineEditBookTitle); layout->addRow(label, lineEditBookTitle); connect(lineEditBookTitle, &QLineEdit::returnPressed, this, &OnlineSearchSpringerLink::Form::returnPressed); lineEditAuthorEditor = new QLineEdit(this); lineEditAuthorEditor->setClearButtonEnabled(true); label = new QLabel(i18n("Author or Editor:"), this); label->setBuddy(lineEditAuthorEditor); layout->addRow(label, lineEditAuthorEditor); connect(lineEditAuthorEditor, &QLineEdit::returnPressed, this, &OnlineSearchSpringerLink::Form::returnPressed); lineEditYear = new QLineEdit(this); lineEditYear->setClearButtonEnabled(true); label = new QLabel(i18n("Year:"), this); label->setBuddy(lineEditYear); layout->addRow(label, lineEditYear); connect(lineEditYear, &QLineEdit::returnPressed, this, &OnlineSearchSpringerLink::Form::returnPressed); numResultsField = new QSpinBox(this); label = new QLabel(i18n("Number of Results:"), this); label->setBuddy(numResultsField); layout->addRow(label, numResultsField); numResultsField->setMinimum(3); numResultsField->setMaximum(100); lineEditFreeText->setFocus(Qt::TabFocusReason); loadState(); } bool readyToStart() const override { return !(lineEditFreeText->text().isEmpty() && lineEditTitle->text().isEmpty() && lineEditBookTitle->text().isEmpty() && lineEditAuthorEditor->text().isEmpty()); } void copyFromEntry(const Entry &entry) override { lineEditTitle->setText(PlainTextValue::text(entry[Entry::ftTitle])); QString bookTitle = PlainTextValue::text(entry[Entry::ftBookTitle]); if (bookTitle.isEmpty()) bookTitle = PlainTextValue::text(entry[Entry::ftJournal]); lineEditBookTitle->setText(bookTitle); lineEditAuthorEditor->setText(d->authorLastNames(entry).join(QStringLiteral(" "))); } void saveState() { KConfigGroup configGroup(d->config, configGroupName); configGroup.writeEntry(QStringLiteral("free"), lineEditFreeText->text()); configGroup.writeEntry(QStringLiteral("title"), lineEditTitle->text()); configGroup.writeEntry(QStringLiteral("bookTitle"), lineEditBookTitle->text()); configGroup.writeEntry(QStringLiteral("authorEditor"), lineEditAuthorEditor->text()); configGroup.writeEntry(QStringLiteral("year"), lineEditYear->text()); configGroup.writeEntry(QStringLiteral("numResults"), numResultsField->value()); d->config->sync(); } }; #endif // HAVE_QTWIDGETS class OnlineSearchSpringerLink::OnlineSearchSpringerLinkPrivate { private: static const QString xsltFilenameBase; public: static const QString springerMetadataKey; const XSLTransform xslt; #ifdef HAVE_QTWIDGETS OnlineSearchSpringerLink::Form *form; #endif // HAVE_QTWIDGETS OnlineSearchSpringerLinkPrivate(OnlineSearchSpringerLink *) : xslt(XSLTransform::locateXSLTfile(xsltFilenameBase)) #ifdef HAVE_QTWIDGETS , form(nullptr) #endif // HAVE_QTWIDGETS { if (!xslt.isValid()) qCWarning(LOG_KBIBTEX_NETWORKING) << "Failed to initialize XSL transformation based on file '" << xsltFilenameBase << "'"; } #ifdef HAVE_QTWIDGETS QUrl buildQueryUrl() { if (form == nullptr) return QUrl(); - QUrl queryUrl = QUrl(QString(QStringLiteral("http://api.springer.com/metadata/pam/?api_key=")).append(springerMetadataKey)); + QUrl queryUrl = QUrl(QString(QStringLiteral("https://api.springer.com/metadata/pam/?api_key=")).append(springerMetadataKey)); QString queryString = form->lineEditFreeText->text(); const QStringList titleChunks = OnlineSearchAbstract::splitRespectingQuotationMarks(form->lineEditTitle->text()); for (const QString &titleChunk : titleChunks) { queryString += QString(QStringLiteral(" title:%1")).arg(Encoder::instance().convertToPlainAscii(titleChunk)); } const QStringList bookTitleChunks = OnlineSearchAbstract::splitRespectingQuotationMarks(form->lineEditBookTitle->text()); for (const QString &titleChunk : bookTitleChunks) { queryString += QString(QStringLiteral(" ( journal:%1 OR book:%1 )")).arg(Encoder::instance().convertToPlainAscii(titleChunk)); } const QStringList authors = OnlineSearchAbstract::splitRespectingQuotationMarks(form->lineEditAuthorEditor->text()); for (const QString &author : authors) { queryString += QString(QStringLiteral(" name:%1")).arg(Encoder::instance().convertToPlainAscii(author)); } const QString year = form->lineEditYear->text(); if (!year.isEmpty()) queryString += QString(QStringLiteral(" year:%1")).arg(year); queryString = queryString.simplified(); QUrlQuery query(queryUrl); query.addQueryItem(QStringLiteral("q"), queryString); queryUrl.setQuery(query); return queryUrl; } #endif // HAVE_QTWIDGETS QUrl buildQueryUrl(const QMap<QString, QString> &query) { - QUrl queryUrl = QUrl(QString(QStringLiteral("http://api.springer.com/metadata/pam/?api_key=")).append(springerMetadataKey)); + QUrl queryUrl = QUrl(QString(QStringLiteral("https://api.springer.com/metadata/pam/?api_key=")).append(springerMetadataKey)); QString queryString = query[queryKeyFreeText]; const QStringList titleChunks = OnlineSearchAbstract::splitRespectingQuotationMarks(query[queryKeyTitle]); for (const QString &titleChunk : titleChunks) { queryString += QString(QStringLiteral(" title:%1")).arg(Encoder::instance().convertToPlainAscii(titleChunk)); } const QStringList authors = OnlineSearchAbstract::splitRespectingQuotationMarks(query[queryKeyAuthor]); for (const QString &author : authors) { queryString += QString(QStringLiteral(" name:%1")).arg(Encoder::instance().convertToPlainAscii(author)); } QString year = query[queryKeyYear]; if (!year.isEmpty()) { static const QRegularExpression yearRegExp("\\b(18|19|20)[0-9]{2}\\b"); const QRegularExpressionMatch yearRegExpMatch = yearRegExp.match(year); if (yearRegExpMatch.hasMatch()) { year = yearRegExpMatch.captured(0); queryString += QString(QStringLiteral(" year:%1")).arg(year); } } queryString = queryString.simplified(); QUrlQuery q(queryUrl); q.addQueryItem(QStringLiteral("q"), queryString); queryUrl.setQuery(q); return queryUrl; } }; const QString OnlineSearchSpringerLink::OnlineSearchSpringerLinkPrivate::xsltFilenameBase = QStringLiteral("pam2bibtex.xsl"); const QString OnlineSearchSpringerLink::OnlineSearchSpringerLinkPrivate::springerMetadataKey(InternalNetworkAccessManager::reverseObfuscate("\xce\xb8\x4d\x2c\x8d\xba\xa9\xc4\x61\x9\x58\x6c\xbb\xde\x86\xb5\xb1\xc6\x15\x71\x76\x45\xd\x79\x12\x65\x95\xe1\x5d\x2f\x1d\x24\x10\x72\x2a\x5e\x69\x4\xdc\xba\xab\xc3\x28\x58\x8a\xfa\x5e\x69")); OnlineSearchSpringerLink::OnlineSearchSpringerLink(QObject *parent) : OnlineSearchAbstract(parent), d(new OnlineSearchSpringerLink::OnlineSearchSpringerLinkPrivate(this)) { /// nothing } OnlineSearchSpringerLink::~OnlineSearchSpringerLink() { delete d; } #ifdef HAVE_QTWIDGETS void OnlineSearchSpringerLink::startSearchFromForm() { m_hasBeenCanceled = false; emit progress(curStep = 0, numSteps = 1); QUrl springerLinkSearchUrl = d->buildQueryUrl(); QNetworkRequest request(springerLinkSearchUrl); QNetworkReply *reply = InternalNetworkAccessManager::instance().get(request); InternalNetworkAccessManager::instance().setNetworkReplyTimeout(reply); connect(reply, &QNetworkReply::finished, this, &OnlineSearchSpringerLink::doneFetchingPAM); if (d->form != nullptr) d->form->saveState(); refreshBusyProperty(); } #endif // HAVE_QTWIDGETS void OnlineSearchSpringerLink::startSearch(const QMap<QString, QString> &query, int numResults) { m_hasBeenCanceled = false; QUrl springerLinkSearchUrl = d->buildQueryUrl(query); QUrlQuery q(springerLinkSearchUrl); q.addQueryItem(QStringLiteral("p"), QString::number(numResults)); springerLinkSearchUrl.setQuery(q); emit progress(curStep = 0, numSteps = 1); QNetworkRequest request(springerLinkSearchUrl); QNetworkReply *reply = InternalNetworkAccessManager::instance().get(request); InternalNetworkAccessManager::instance().setNetworkReplyTimeout(reply); connect(reply, &QNetworkReply::finished, this, &OnlineSearchSpringerLink::doneFetchingPAM); refreshBusyProperty(); } QString OnlineSearchSpringerLink::label() const { #ifdef HAVE_KF5 return i18n("SpringerLink"); #else // HAVE_KF5 //= onlinesearch-springerlink-label return QObject::tr("SpringerLink"); #endif // HAVE_KF5 } QString OnlineSearchSpringerLink::favIconUrl() const { - return QStringLiteral("http://link.springer.com/static/0.6623/sites/link/images/favicon.ico"); + return QStringLiteral("https://link.springer.com/static/0.6623/sites/link/images/favicon.ico"); } #ifdef HAVE_QTWIDGETS OnlineSearchAbstract::Form *OnlineSearchSpringerLink::customWidget(QWidget *parent) { if (d->form == nullptr) d->form = new Form(parent); return d->form; } #endif // HAVE_QTWIDGETS QUrl OnlineSearchSpringerLink::homepage() const { - return QUrl(QStringLiteral("http://www.springerlink.com/")); + return QUrl(QStringLiteral("https://link.springer.com/")); } void OnlineSearchSpringerLink::doneFetchingPAM() { QNetworkReply *reply = static_cast<QNetworkReply *>(sender()); if (handleErrors(reply)) { /// ensure proper treatment of UTF-8 characters const QString xmlSource = QString::fromUtf8(reply->readAll().constData()); const QString bibTeXcode = EncoderXML::instance().decode(d->xslt.transform(xmlSource).remove(QStringLiteral("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"))); if (bibTeXcode.isEmpty()) { - qCWarning(LOG_KBIBTEX_NETWORKING) << "XSL tranformation failed for data from " << InternalNetworkAccessManager::removeApiKey(reply->url()).toDisplayString(); + qCWarning(LOG_KBIBTEX_NETWORKING) << "XSL transformation failed for data from " << InternalNetworkAccessManager::removeApiKey(reply->url()).toDisplayString(); stopSearch(resultInvalidArguments); } else { FileImporterBibTeX importer(this); const File *bibtexFile = importer.fromString(bibTeXcode); bool hasEntries = false; if (bibtexFile != nullptr) { for (const QSharedPointer<Element> &element : *bibtexFile) { QSharedPointer<Entry> entry = element.dynamicCast<Entry>(); hasEntries |= publishEntry(entry); } stopSearch(resultNoError); delete bibtexFile; } else { qCWarning(LOG_KBIBTEX_NETWORKING) << "No valid BibTeX file results returned on request on" << InternalNetworkAccessManager::removeApiKey(reply->url()).toDisplayString(); stopSearch(resultUnspecifiedError); } } } refreshBusyProperty(); } #include "onlinesearchspringerlink.moc" diff --git a/src/networking/onlinesearch/onlinesearchspringerlink.h b/src/networking/onlinesearch/onlinesearchspringerlink.h index 5f9e7fd2..3a0cb60a 100644 --- a/src/networking/onlinesearch/onlinesearchspringerlink.h +++ b/src/networking/onlinesearch/onlinesearchspringerlink.h @@ -1,73 +1,73 @@ /*************************************************************************** * Copyright (C) 2004-2019 by Thomas Fischer <fischer@unix-ag.uni-kl.de> * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, see <https://www.gnu.org/licenses/>. * ***************************************************************************/ #ifndef KBIBTEX_NETWORKING_ONLINESEARCHSPRINGERLINK_H #define KBIBTEX_NETWORKING_ONLINESEARCHSPRINGERLINK_H #include <onlinesearch/OnlineSearchAbstract> #ifdef HAVE_KF5 #include "kbibtexnetworking_export.h" #endif // HAVE_KF5 /** * @author Thomas Fischer <fischer@unix-ag.uni-kl.de> * - * See also: http://dev.springer.com/ + * See also: https://dev.springernature.com/ * * On the subject of having multiple "constraints" (search terms) in * a search, Springer's documentation states: "Each constraint that * appears in your request will automatically be ANDed with all the others * For instance, a request including constraints: "title:bone+name:Jones" * is the equivilent to the request containing constraints concatenated by * the AND operator: "title:bone%20AND%20name:Jones". - * (source: http://dev.springer.com/docs/read/Filters_Facets_and_Constraints) + * (source: https://dev.springernature.com/adding-constraints) */ class KBIBTEXNETWORKING_EXPORT OnlineSearchSpringerLink : public OnlineSearchAbstract { Q_OBJECT public: explicit OnlineSearchSpringerLink(QObject *parent); ~OnlineSearchSpringerLink() override; #ifdef HAVE_QTWIDGETS void startSearchFromForm() override; #endif // HAVE_QTWIDGETS void startSearch(const QMap<QString, QString> &query, int numResults) override; QString label() const override; #ifdef HAVE_QTWIDGETS OnlineSearchAbstract::Form *customWidget(QWidget *parent) override; #endif // HAVE_QTWIDGETS QUrl homepage() const override; protected: QString favIconUrl() const override; private slots: void doneFetchingPAM(); private: #ifdef HAVE_QTWIDGETS class Form; #endif // HAVE_QTWIDGETS class OnlineSearchSpringerLinkPrivate; OnlineSearchSpringerLinkPrivate *d; }; #endif // KBIBTEX_NETWORKING_ONLINESEARCHSPRINGERLINK_H diff --git a/src/processing/bibliographyservice.cpp b/src/processing/bibliographyservice.cpp index 2b0a7a32..27501c8a 100644 --- a/src/processing/bibliographyservice.cpp +++ b/src/processing/bibliographyservice.cpp @@ -1,200 +1,200 @@ /*************************************************************************** * Copyright (C) 2004-2019 by Thomas Fischer <fischer@unix-ag.uni-kl.de> * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, see <https://www.gnu.org/licenses/>. * ***************************************************************************/ #include "bibliographyservice.h" #include <QCoreApplication> #include <QStandardPaths> #include <QProcess> #include <KSharedConfig> #include <KConfigGroup> #include <KLocalizedString> #include <KMessageBox> class BibliographyService::Private { private: /// Representing configuration file "mimeapps.list" - /// see http://www.freedesktop.org/wiki/Specifications/mime-actions-spec/ + /// see https://www.freedesktop.org/wiki/Specifications/mime-apps-spec/ KSharedConfig::Ptr configXDGMimeAppsList; /// Groups inside "mimeapps.list" KConfigGroup configGroupAddedKDEServiceAssociations, configGroupRemovedKDEServiceAssociations; KConfigGroup configGroupAddedAssociations, configGroupRemovedAssociations; /// Names of .desktop files for KBibTeX (application and KPart) static const QString kbibtexApplicationDesktop; static const QString kbibtexPartDesktop; /// Names of .desktop files for Kate (application and KPart) static const QString kateApplicationDesktop; static const QString katePartDesktop; public: QWidget *parentWidget; static const QStringList textBasedMimeTypes; Private(QWidget *w, BibliographyService *parent) : configXDGMimeAppsList(KSharedConfig::openConfig(QStringLiteral("mimeapps.list"), KConfig::NoGlobals, QStandardPaths::ApplicationsLocation)), configGroupAddedKDEServiceAssociations(configXDGMimeAppsList, "Added KDE Service Associations"), configGroupRemovedKDEServiceAssociations(configXDGMimeAppsList, "Removed KDE Service Associations"), configGroupAddedAssociations(configXDGMimeAppsList, "Added Associations"), configGroupRemovedAssociations(configXDGMimeAppsList, "Removed Associations"), parentWidget(w) { Q_UNUSED(parent) } bool setKBibTeXforMimeType(const QString &mimetype, const bool isPlainTextFormat) { /// Check that configuration file is writeable before continuing if (!configXDGMimeAppsList->isConfigWritable(true)) return false; /// Configuration which application to use to open bibliography files QStringList addedAssociations = configGroupAddedAssociations.readXdgListEntry(mimetype, QStringList()); /// Remove KBibTeX from list (will be added later to list's head) addedAssociations.removeAll(kbibtexApplicationDesktop); if (isPlainTextFormat) { /// Remove Kate from list (will be added later to list's second position) addedAssociations.removeAll(kateApplicationDesktop); /// Add Kate to list's head (will turn up as second) addedAssociations.prepend(kateApplicationDesktop); } /// Add KBibTeX to list's head addedAssociations.prepend(kbibtexApplicationDesktop); /// Write out and sync changes configGroupAddedAssociations.writeXdgListEntry(mimetype, addedAssociations); configGroupAddedAssociations.sync(); /// Configuration which part to use to open bibliography files QStringList addedKDEServiceAssociations = configGroupAddedKDEServiceAssociations.readXdgListEntry(mimetype, QStringList()); /// Remove KBibTeX from list (will be added later to list's head) addedKDEServiceAssociations.removeAll(kbibtexPartDesktop); if (isPlainTextFormat) { /// Remove Kate from list (will be added later to list's second position) addedKDEServiceAssociations.removeAll(katePartDesktop); /// Add Kate to list's head (will turn up as second) addedKDEServiceAssociations.prepend(katePartDesktop); } /// Add KBibTeX to list's head addedKDEServiceAssociations.prepend(kbibtexPartDesktop); /// Write out and sync changes configGroupAddedKDEServiceAssociations.writeXdgListEntry(mimetype, addedKDEServiceAssociations); configGroupAddedKDEServiceAssociations.sync(); /// Configuration which application NOT to use to open bibliography files QStringList removedAssociations = configGroupRemovedAssociations.readXdgListEntry(mimetype, QStringList()); /// If list of applications not to use is not empty ... if (!removedAssociations.isEmpty()) { /// Remove KBibTeX from list removedAssociations.removeAll(kbibtexApplicationDesktop); if (isPlainTextFormat) /// Remove Kate from list removedAssociations.removeAll(kateApplicationDesktop); if (removedAssociations.isEmpty()) /// Empty lists can be removed from configuration file configGroupRemovedAssociations.deleteEntry(mimetype); else /// Write out updated list configGroupRemovedAssociations.writeXdgListEntry(mimetype, removedAssociations); /// Sync changes configGroupRemovedAssociations.sync(); } /// Configuration which part NOT to use to open bibliography files QStringList removedKDEServiceAssociations = configGroupRemovedKDEServiceAssociations.readXdgListEntry(mimetype, QStringList()); /// If list of parts not to use is not empty ... if (!removedKDEServiceAssociations.isEmpty()) { /// Remove KBibTeX part from list removedKDEServiceAssociations.removeAll(kbibtexPartDesktop); if (isPlainTextFormat) /// Remove Kate part from list removedKDEServiceAssociations.removeAll(katePartDesktop); if (removedKDEServiceAssociations.isEmpty()) /// Empty lists can be removed from configuration file configGroupRemovedKDEServiceAssociations.deleteEntry(mimetype); else /// Write out updated list configGroupRemovedKDEServiceAssociations.writeXdgListEntry(mimetype, removedKDEServiceAssociations); /// Sync changes configGroupRemovedKDEServiceAssociations.sync(); } return true; } bool isKBibTeXdefaultForMimeType(const QString &mimetype) const { /// Fetch all four configuration groups const QStringList addedAssociations = configGroupAddedAssociations.readXdgListEntry(mimetype, QStringList()); const QStringList addedKDEServiceAssociations = configGroupAddedKDEServiceAssociations.readXdgListEntry(mimetype, QStringList()); const QStringList removedAssociations = configGroupRemovedAssociations.readXdgListEntry(mimetype, QStringList()); const QStringList removedKDEServiceAssociations = configGroupRemovedKDEServiceAssociations.readXdgListEntry(mimetype, QStringList()); /// KBibTeX is default editor for bibliography if ... /// - the list of applications associated to mime type is not empty /// - KBibTeX is head of this list /// - KBibTeX is not named in the list of applications not be used /// - the list of parts associated to mime type is not empty /// - KBibTeX part is head of this list /// - KBibTeX part is not named in the list of parts not be used return !addedAssociations.isEmpty() && addedAssociations.first() == kbibtexApplicationDesktop && !removedAssociations.contains(kbibtexApplicationDesktop) && !addedKDEServiceAssociations.isEmpty() && addedKDEServiceAssociations.first() == kbibtexPartDesktop && !removedKDEServiceAssociations.contains(kbibtexPartDesktop); } }; const QString BibliographyService::Private::kbibtexApplicationDesktop = QStringLiteral("org.kde.kbibtex.desktop"); const QString BibliographyService::Private::kbibtexPartDesktop = QStringLiteral("kbibtexpart.desktop"); const QString BibliographyService::Private::kateApplicationDesktop = QStringLiteral("org.kde.kate.desktop"); const QString BibliographyService::Private::katePartDesktop = QStringLiteral("katepart.desktop"); const QStringList BibliographyService::Private::textBasedMimeTypes { QStringLiteral("text/x-bibtex"), ///< classical BibTeX bibliographies QStringLiteral("application/x-research-info-systems"), ///< Research Information Systems (RIS) bibliographies QStringLiteral("application/x-isi-export-format") ///< Information Sciences Institute (ISI) bibliographies }; BibliographyService::BibliographyService(QWidget *parentWidget) : QObject(parentWidget), d(new BibliographyService::Private(parentWidget, this)) { /// nothing } BibliographyService::~BibliographyService() { delete d; } void BibliographyService::setKBibTeXasDefault() { /// Go through all supported mime types for (const QString &mimeType : BibliographyService::Private::textBasedMimeTypes) { d->setKBibTeXforMimeType(mimeType, true); } /// kbuildsycoca5 has to be run to update the mime type associations QProcess *kbuildsycoca5Process = new QProcess(d->parentWidget); connect(kbuildsycoca5Process, static_cast<void(QProcess::*)(int, QProcess::ExitStatus)>(&QProcess::finished), QCoreApplication::instance(), [this](const int exitCode, const QProcess::ExitStatus exitStatus) { if (exitCode != 0 || exitStatus != QProcess::NormalExit) KMessageBox::error(d->parentWidget, i18n("Failed to run 'kbuildsycoca5' to update mime type associations.\n\nThe system may not know how to use KBibTeX to open bibliography files."), i18n("Failed to run 'kbuildsycoca5'")); }); kbuildsycoca5Process->start(QStringLiteral("kbuildsycoca5"), QStringList()); } bool BibliographyService::isKBibTeXdefault() const { /// Go through all supported mime types for (const QString &mimeType : BibliographyService::Private::textBasedMimeTypes) { /// Test if KBibTeX is default handler for mime type if (!d->isKBibTeXdefaultForMimeType(mimeType)) return false; ///< Failing any test means KBibTeX is not default application/part } return true; ///< All tests passed, KBibTeX is default application/part } diff --git a/src/processing/findduplicates.cpp b/src/processing/findduplicates.cpp index 0b4f0e29..9a8f7616 100644 --- a/src/processing/findduplicates.cpp +++ b/src/processing/findduplicates.cpp @@ -1,547 +1,547 @@ /*************************************************************************** * Copyright (C) 2004-2018 by Thomas Fischer <fischer@unix-ag.uni-kl.de> * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, see <https://www.gnu.org/licenses/>. * ***************************************************************************/ #include "findduplicates.h" #include <typeinfo> #include <QLinkedList> #include <QProgressDialog> #include <QApplication> #include <QDate> #include <QRegularExpression> #include <KLocalizedString> #include <File> #include <models/FileModel> #include <Entry> EntryClique::EntryClique() { /// nothing } int EntryClique::entryCount() const { return checkedEntries.count(); } QList<QSharedPointer<Entry> > EntryClique::entryList() const { return checkedEntries.keys(); } bool EntryClique::isEntryChecked(QSharedPointer<Entry> entry) const { return checkedEntries[entry]; } void EntryClique::setEntryChecked(QSharedPointer<Entry> entry, bool isChecked) { checkedEntries[entry] = isChecked; recalculateValueMap(); } int EntryClique::fieldCount() const { return valueMap.count(); } QList<QString> EntryClique::fieldList() const { return valueMap.keys(); } QVector<Value> EntryClique::values(const QString &field) const { return valueMap[field]; } QVector<Value> &EntryClique::values(const QString &field) { return valueMap[field]; } Value EntryClique::chosenValue(const QString &field) const { Q_ASSERT_X(chosenValueMap[field].count() == 1, "Value EntryClique::chosenValue(const QString &field) const", "Exactly one value expected in chosenValueMap"); return chosenValueMap[field].first(); } QVector<Value> EntryClique::chosenValues(const QString &field) const { return chosenValueMap[field]; } void EntryClique::setChosenValue(const QString &field, const Value &value, ValueOperation valueOperation) { switch (valueOperation) { case SetValue: { chosenValueMap[field].clear(); chosenValueMap[field] << value; break; } case AddValue: { QString text = PlainTextValue::text(value); for (const Value &value : const_cast<const QVector<Value> &>(chosenValueMap[field])) if (PlainTextValue::text(value) == text) return; chosenValueMap[field] << value; break; } case RemoveValue: { QString text = PlainTextValue::text(value); for (QVector<Value>::Iterator it = chosenValueMap[field].begin(); it != chosenValueMap[field].end(); ++it) if (PlainTextValue::text(*it) == text) { chosenValueMap[field].erase(it); return; } break; } } } void EntryClique::addEntry(QSharedPointer<Entry> entry) { checkedEntries.insert(entry, false); /// remember to call recalculateValueMap later } void EntryClique::recalculateValueMap() { valueMap.clear(); chosenValueMap.clear(); /// go through each and every entry ... const QList<QSharedPointer<Entry> > el = entryList(); for (const auto &entry : el) if (isEntryChecked(entry)) { /// cover entry type Value v; v.append(QSharedPointer<VerbatimText>(new VerbatimText(entry->type()))); insertKeyValueToValueMap(QStringLiteral("^type"), v, entry->type(), Qt::CaseInsensitive /** entry types shall be compared case insensitive */); /// cover entry id v.clear(); v.append(QSharedPointer<VerbatimText>(new VerbatimText(entry->id()))); insertKeyValueToValueMap(QStringLiteral("^id"), v, entry->id()); /// go through each and every field of this entry for (Entry::ConstIterator fieldIt = entry->constBegin(); fieldIt != entry->constEnd(); ++fieldIt) { /// store both field name and value for later reference const QString fieldName = fieldIt.key().toLower(); const Value fieldValue = fieldIt.value(); if (fieldName == Entry::ftKeywords || fieldName == Entry::ftUrl) { for (const auto &vi : fieldValue) { const QString text = PlainTextValue::text(*vi); Value v; v << vi; insertKeyValueToValueMap(fieldName, v, text); } } else { const QString fieldValueText = PlainTextValue::text(fieldValue); insertKeyValueToValueMap(fieldName, fieldValue, fieldValueText); } } } const auto fl = fieldList(); for (const QString &fieldName : fl) if (valueMap[fieldName].count() < 2) { valueMap.remove(fieldName); chosenValueMap.remove(fieldName); } } void EntryClique::insertKeyValueToValueMap(const QString &fieldName, const Value &fieldValue, const QString &fieldValueText, const Qt::CaseSensitivity) { if (fieldValueText.isEmpty()) return; if (valueMap.contains(fieldName)) { /// in the list of alternatives, search of a value identical /// to the current (as of fieldIt) value (to avoid duplicates) bool alreadyContained = false; QVector<Value> alternatives = valueMap[fieldName]; for (const Value &v : const_cast<const QVector<Value> &>(alternatives)) if (PlainTextValue::text(v) == fieldValueText) { alreadyContained = true; break; } if (!alreadyContained) { alternatives << fieldValue; valueMap[fieldName] = alternatives; } } else { QVector<Value> alternatives = valueMap[fieldName]; alternatives << fieldValue; valueMap.insert(fieldName, alternatives); QVector<Value> chosen; chosen << fieldValue; chosenValueMap.insert(fieldName, chosen); } } class FindDuplicates::FindDuplicatesPrivate { private: const unsigned int maxDistance; int **d; static const int dsize; public: int sensitivity; QWidget *widget; FindDuplicatesPrivate(int sens, QWidget *w) : maxDistance(10000), sensitivity(sens), widget(w == nullptr ? qApp->activeWindow() : w) { d = new int *[dsize]; for (int i = 0; i < dsize; ++i) d[i] = new int[dsize]; } ~FindDuplicatesPrivate() { for (int i = 0; i < dsize; ++i) delete[] d[i]; delete [] d; } /** * Determine the Levenshtein distance between two words. - * See also http://en.wikipedia.org/wiki/Levenshtein_distance + * See also https://en.wikipedia.org/wiki/Levenshtein_distance * @param s first word, all chars already in lower case * @param t second word, all chars already in lower case * @return distance between both words */ double levenshteinDistanceWord(const QString &s, const QString &t) { const int m = qMin(s.length(), dsize - 1), n = qMin(t.length(), dsize - 1); if (m < 1 && n < 1) return 0.0; if (m < 1 || n < 1) return 1.0; for (int i = 0; i <= m; ++i) d[i][0] = i; for (int i = 0; i <= n; ++i) d[0][i] = i; for (int i = 1; i <= m; ++i) for (int j = 1; j <= n; ++j) { d[i][j] = d[i - 1][j] + 1; int c = d[i][j - 1] + 1; if (c < d[i][j]) d[i][j] = c; c = d[i - 1][j - 1] + (s[i - 1] == t[j - 1] ? 0 : 1); if (c < d[i][j]) d[i][j] = c; } double result = d[m][n]; result = result / qMax(m, n); result *= result; return result; } /** * Determine the Levenshtein distance between two sentences (list of words). - * See also http://en.wikipedia.org/wiki/Levenshtein_distance + * See also https://en.wikipedia.org/wiki/Levenshtein_distance * @param s first sentence * @param t second sentence * @return distance between both sentences */ double levenshteinDistance(const QStringList &s, const QStringList &t) { const int m = s.size(), n = t.size(); if (m < 1 && n < 1) return 0.0; if (m < 1 || n < 1) return 1.0; double **d = new double*[m + 1]; for (int i = 0; i <= m; ++i) { d[i] = new double[n + 1]; d[i][0] = i; } for (int i = 0; i <= n; ++i) d[0][i] = i; for (int i = 1; i <= m; ++i) for (int j = 1; j <= n; ++j) { d[i][j] = d[i - 1][j] + 1; double c = d[i][j - 1] + 1; if (c < d[i][j]) d[i][j] = c; c = d[i - 1][j - 1] + levenshteinDistanceWord(s[i - 1], t[j - 1]); if (c < d[i][j]) d[i][j] = c; } double result = d[m][n]; for (int i = 0; i <= m; ++i) delete[] d[i]; delete [] d; result = result / qMax(m, n); return result; } /** * Determine the Levenshtein distance between two sentences, * where each sentence is in a string (not split into single words). - * See also http://en.wikipedia.org/wiki/Levenshtein_distance + * See also https://en.wikipedia.org/wiki/Levenshtein_distance * @param s first sentence * @param t second sentence * @return distance between both sentences */ double levenshteinDistance(const QString &s, const QString &t) { static const QRegularExpression nonWordRegExp(QStringLiteral("[^a-z']+"), QRegularExpression::CaseInsensitiveOption); if (s.isEmpty() || t.isEmpty()) return 1.0; return levenshteinDistance(s.toLower().split(nonWordRegExp, QString::SkipEmptyParts), t.toLower().split(nonWordRegExp, QString::SkipEmptyParts)); } /** * Distance between two BibTeX entries, scaled by maxDistance. */ int entryDistance(Entry *entryA, Entry *entryB) { /// "distance" to be used if no value for a field is given const double neutralDistance = 0.05; /** * Get both entries' titles. If both are empty, use a "neutral * distance" otherwise compute levenshtein distance (0.0 .. 1.0). */ const QString titleA = PlainTextValue::text(entryA->value(Entry::ftTitle)); const QString titleB = PlainTextValue::text(entryB->value(Entry::ftTitle)); double titleDistance = titleA.isEmpty() && titleB.isEmpty() ? neutralDistance : levenshteinDistance(titleA, titleB); /** * Get both entries' author names. If both are empty, use a * "neutral distance" otherwise compute levenshtein distance * (0.0 .. 1.0). */ const QString authorA = PlainTextValue::text(entryA->value(Entry::ftAuthor)); const QString authorB = PlainTextValue::text(entryB->value(Entry::ftAuthor)); double authorDistance = authorA.isEmpty() && authorB.isEmpty() ? neutralDistance : levenshteinDistance(authorA, authorB); /** * Get both entries' years. If both are empty, use a * "neutral distance" otherwise compute distance as follows: * take square of difference between both years, but impose * a maximum of 100. Divide value by 100.0 to get a distance * value of 0.0 .. 1.0. */ const QString yearA = PlainTextValue::text(entryA->value(Entry::ftYear)); const QString yearB = PlainTextValue::text(entryB->value(Entry::ftYear)); bool yearAok = false, yearBok = false; int yearAint = yearA.toInt(&yearAok); int yearBint = yearB.toInt(&yearBok); double yearDistance = yearAok && yearBok ? qMin((yearBint - yearAint) * (yearBint - yearAint), 100) / 100.0 : neutralDistance; /** * Compute total distance by taking individual distances for * author, title, and year. Weight each individual distance as * follows: title => 60%, author => 30%, year => 10% * Scale distance by maximum distance and round to int; result * will be in range 0 .. maxDistance. */ int distance = static_cast<int>(maxDistance * (titleDistance * 0.6 + authorDistance * 0.3 + yearDistance * 0.1) + 0.5); return distance; } }; const int FindDuplicates::FindDuplicatesPrivate::dsize = 32; FindDuplicates::FindDuplicates(QWidget *parent, int sensitivity) : QObject(parent), d(new FindDuplicatesPrivate(sensitivity, parent)) { /// nothing } FindDuplicates::~FindDuplicates() { delete d; } bool FindDuplicates::findDuplicateEntries(File *file, QVector<EntryClique *> &entryCliqueList) { QApplication::setOverrideCursor(Qt::WaitCursor); QScopedPointer<QProgressDialog> progressDlg(new QProgressDialog(i18n("Searching ..."), i18n("Cancel"), 0, 100000 /* to be set later to actual value */, d->widget)); progressDlg->setModal(true); progressDlg->setWindowTitle(i18n("Finding Duplicates")); progressDlg->setMinimumWidth(d->widget->fontMetrics().averageCharWidth() * 48); progressDlg->setAutoReset(false); entryCliqueList.clear(); /// assemble list of entries only (ignoring comments, macros, ...) QVector<QSharedPointer<Entry> > listOfEntries; listOfEntries.reserve(file->size()); for (const auto &element : const_cast<const File &>(*file)) { QSharedPointer<Entry> e = element.dynamicCast<Entry>(); if (!e.isNull() && !e->isEmpty()) listOfEntries << e; } if (listOfEntries.isEmpty()) { /// no entries to compare found entryCliqueList.clear(); QApplication::restoreOverrideCursor(); return progressDlg->wasCanceled(); } int curProgress = 0, maxProgress = listOfEntries.count() * (listOfEntries.count() - 1) / 2; int progressDelta = 1; progressDlg->setMaximum(maxProgress); progressDlg->show(); emit maximumProgress(maxProgress); /// go through all entries ... for (const auto &entry : const_cast<const QVector<QSharedPointer<Entry> > &>(listOfEntries)) { QApplication::instance()->processEvents(); if (progressDlg->wasCanceled()) { entryCliqueList.clear(); break; } progressDlg->setValue(curProgress); emit currentProgress(curProgress); /// ... and find a "clique" of entries where it will match, i.e. distance is below sensitivity /// assume current entry will match in no clique bool foundClique = false; /// go through all existing cliques for (QVector<EntryClique *>::Iterator cit = entryCliqueList.begin(); cit != entryCliqueList.end(); ++cit) { /// check distance between current entry and clique's first entry if (d->entryDistance(entry.data(), (*cit)->entryList().constFirst().data()) < d->sensitivity) { /// if distance is below sensitivity, add current entry to clique foundClique = true; (*cit)->addEntry(entry); break; } QApplication::instance()->processEvents(); if (progressDlg->wasCanceled()) { entryCliqueList.clear(); break; } } if (!progressDlg->wasCanceled() && !foundClique) { /// no clique matched to current entry, so create and add new clique /// consisting only of the current entry EntryClique *newClique = new EntryClique(); newClique->addEntry(entry); entryCliqueList << newClique; } curProgress += progressDelta; ++progressDelta; progressDlg->setValue(curProgress); emit currentProgress(curProgress); } progressDlg->setValue(progressDlg->maximum()); /// remove cliques with only one element (nothing to merge here) from the list of cliques for (QVector<EntryClique *>::Iterator cit = entryCliqueList.begin(); cit != entryCliqueList.end();) if ((*cit)->entryCount() < 2) { EntryClique *ec = *cit; cit = entryCliqueList.erase(cit); delete ec; } else { /// entries have been inserted as checked, /// therefore recalculate alternatives (*cit)->recalculateValueMap(); ++cit; } QApplication::restoreOverrideCursor(); return progressDlg->wasCanceled(); } MergeDuplicates::MergeDuplicates() { /// nothing } bool MergeDuplicates::mergeDuplicateEntries(const QVector<EntryClique *> &entryCliques, FileModel *fileModel) { bool didMerge = false; for (EntryClique *entryClique : entryCliques) { /// Avoid adding fields 20 lines below /// which have been remove (not added) 10 lines below QSet<QString> coveredFields; Entry *mergedEntry = new Entry(QString(), QString()); const auto fieldList = entryClique->fieldList(); coveredFields.reserve(fieldList.size()); for (const auto &field : fieldList) { coveredFields << field; if (field == QStringLiteral("^id")) mergedEntry->setId(PlainTextValue::text(entryClique->chosenValue(field))); else if (field == QStringLiteral("^type")) mergedEntry->setType(PlainTextValue::text(entryClique->chosenValue(field))); else { Value combined; const auto chosenValues = entryClique->chosenValues(field); for (const Value &v : chosenValues) { combined.append(v); } if (!combined.isEmpty()) mergedEntry->insert(field, combined); } } bool actuallyMerged = false; int preferredInsertionRow = -1; const auto entryList = entryClique->entryList(); for (const auto &entry : entryList) { /// if merging entries with identical ids, the merged entry will not yet have an id (is null) if (mergedEntry->id().isEmpty()) mergedEntry->setId(entry->id()); /// if merging entries with identical types, the merged entry will not yet have an type (is null) if (mergedEntry->type().isEmpty()) mergedEntry->setType(entry->type()); /// add all other fields not covered by user selection /// those fields did only occur in one entry (no conflict) /// may add a lot of bloat to merged entry if (entryClique->isEntryChecked(entry)) { actuallyMerged = true; for (Entry::ConstIterator it = entry->constBegin(); it != entry->constEnd(); ++it) if (!mergedEntry->contains(it.key()) && !coveredFields.contains(it.key())) { mergedEntry->insert(it.key(), it.value()); coveredFields << it.key(); } const int row = fileModel->row(entry); if (preferredInsertionRow < 0) preferredInsertionRow = row; fileModel->removeRow(row); } } if (actuallyMerged) { if (preferredInsertionRow < 0) preferredInsertionRow = fileModel->rowCount(); fileModel->insertRow(QSharedPointer<Entry>(mergedEntry), preferredInsertionRow); } else delete mergedEntry; didMerge |= actuallyMerged; } return didMerge; } diff --git a/src/program/mainwindow.cpp b/src/program/mainwindow.cpp index 79223765..568e05ad 100644 --- a/src/program/mainwindow.cpp +++ b/src/program/mainwindow.cpp @@ -1,481 +1,480 @@ /*************************************************************************** * Copyright (C) 2004-2019 by Thomas Fischer <fischer@unix-ag.uni-kl.de> * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, see <https://www.gnu.org/licenses/>. * ***************************************************************************/ #include "mainwindow.h" #include <QDockWidget> #include <QDragEnterEvent> #include <QDropEvent> #include <QLabel> #include <QMimeData> #include <QPointer> #include <QMenu> #include <QTimer> #include <QApplication> #include <QFileDialog> #include <QAction> #include <KActionMenu> #include <KActionCollection> #include <KPluginFactory> #include <KPluginLoader> #include <KLocalizedString> #include <KMessageBox> #include <KBibTeX> #include <preferences/KBibTeXPreferencesDialog> #include <file/FileView> #include <XSLTransform> #include <BibliographyService> #include <BibUtils> #include "docklets/referencepreview.h" #include "docklets/documentpreview.h" #include "docklets/searchform.h" #include "docklets/searchresults.h" #include "docklets/elementform.h" -#include "docklets/documentpreview.h" #include "docklets/statistics.h" #include "docklets/filesettings.h" #include "docklets/valuelist.h" #include "docklets/zoterobrowser.h" #include "documentlist.h" #include "mdiwidget.h" class KBibTeXMainWindow::KBibTeXMainWindowPrivate { private: KBibTeXMainWindow *p; public: QAction *actionClose; QDockWidget *dockDocumentList; QDockWidget *dockReferencePreview; QDockWidget *dockDocumentPreview; QDockWidget *dockValueList; QDockWidget *dockZotero; QDockWidget *dockStatistics; QDockWidget *dockSearchForm; QDockWidget *dockSearchResults; QDockWidget *dockElementForm; QDockWidget *dockFileSettings; DocumentList *listDocumentList; MDIWidget *mdiWidget; ReferencePreview *referencePreview; DocumentPreview *documentPreview; FileSettings *fileSettings; ValueList *valueList; ZoteroBrowser *zotero; Statistics *statistics; SearchForm *searchForm; SearchResults *searchResults; ElementForm *elementForm; QMenu *actionMenuRecentFilesMenu; KBibTeXMainWindowPrivate(KBibTeXMainWindow *parent) : p(parent) { mdiWidget = new MDIWidget(p); KActionMenu *showPanelsAction = new KActionMenu(i18n("Show Panels"), p); p->actionCollection()->addAction(QStringLiteral("settings_shown_panels"), showPanelsAction); QMenu *showPanelsMenu = new QMenu(showPanelsAction->text(), p->widget()); showPanelsAction->setMenu(showPanelsMenu); KActionMenu *actionMenuRecentFiles = new KActionMenu(QIcon::fromTheme(QStringLiteral("document-open-recent")), i18n("Recently used files"), p); p->actionCollection()->addAction(QStringLiteral("file_open_recent"), actionMenuRecentFiles); actionMenuRecentFilesMenu = new QMenu(actionMenuRecentFiles->text(), p->widget()); actionMenuRecentFiles->setMenu(actionMenuRecentFilesMenu); /** * Docklets (a.k.a. panels) will be added by default to the following * positions unless otherwise configured by the user. * - "List of Values" on the left * - "Statistics" on the left * - "List of Documents" on the left in the same tab * - "Online Search" on the left in a new tab * - "Reference Preview" on the left in the same tab * - "Search Results" on the bottom * - "Document Preview" is hidden * - "Element Editor" is hidden */ dockDocumentList = new QDockWidget(i18n("List of Documents"), p); dockDocumentList->setAllowedAreas(Qt::LeftDockWidgetArea | Qt::RightDockWidgetArea); p->addDockWidget(Qt::LeftDockWidgetArea, dockDocumentList); listDocumentList = new DocumentList(dockDocumentList); dockDocumentList->setWidget(listDocumentList); dockDocumentList->setObjectName(QStringLiteral("dockDocumentList")); dockDocumentList->setFeatures(QDockWidget::DockWidgetClosable | QDockWidget::DockWidgetMovable | QDockWidget::DockWidgetFloatable); connect(listDocumentList, &DocumentList::openFile, p, &KBibTeXMainWindow::openDocument); showPanelsMenu->addAction(dockDocumentList->toggleViewAction()); dockValueList = new QDockWidget(i18n("List of Values"), p); dockValueList->setAllowedAreas(Qt::BottomDockWidgetArea | Qt::TopDockWidgetArea | Qt::LeftDockWidgetArea | Qt::RightDockWidgetArea); p->addDockWidget(Qt::LeftDockWidgetArea, dockValueList); valueList = new ValueList(dockValueList); dockValueList->setWidget(valueList); dockValueList->setObjectName(QStringLiteral("dockValueList")); dockValueList->setFeatures(QDockWidget::DockWidgetClosable | QDockWidget::DockWidgetMovable | QDockWidget::DockWidgetFloatable); showPanelsMenu->addAction(dockValueList->toggleViewAction()); dockStatistics = new QDockWidget(i18n("Statistics"), p); dockStatistics->setAllowedAreas(Qt::BottomDockWidgetArea | Qt::TopDockWidgetArea | Qt::LeftDockWidgetArea | Qt::RightDockWidgetArea); p->addDockWidget(Qt::LeftDockWidgetArea, dockStatistics); statistics = new Statistics(dockStatistics); dockStatistics->setWidget(statistics); dockStatistics->setObjectName(QStringLiteral("dockStatistics")); dockStatistics->setFeatures(QDockWidget::DockWidgetClosable | QDockWidget::DockWidgetMovable | QDockWidget::DockWidgetFloatable); showPanelsMenu->addAction(dockStatistics->toggleViewAction()); dockSearchResults = new QDockWidget(i18n("Search Results"), p); dockSearchResults->setAllowedAreas(Qt::BottomDockWidgetArea | Qt::TopDockWidgetArea | Qt::LeftDockWidgetArea | Qt::RightDockWidgetArea); p->addDockWidget(Qt::BottomDockWidgetArea, dockSearchResults); dockSearchResults->hide(); searchResults = new SearchResults(mdiWidget, dockSearchResults); dockSearchResults->setWidget(searchResults); dockSearchResults->setObjectName(QStringLiteral("dockResultsFrom")); dockSearchResults->setFeatures(QDockWidget::DockWidgetClosable | QDockWidget::DockWidgetMovable | QDockWidget::DockWidgetFloatable); showPanelsMenu->addAction(dockSearchResults->toggleViewAction()); connect(mdiWidget, &MDIWidget::documentSwitched, searchResults, &SearchResults::documentSwitched); dockSearchForm = new QDockWidget(i18n("Online Search"), p); dockSearchForm->setAllowedAreas(Qt::BottomDockWidgetArea | Qt::TopDockWidgetArea | Qt::LeftDockWidgetArea | Qt::RightDockWidgetArea); p->addDockWidget(Qt::LeftDockWidgetArea, dockSearchForm); searchForm = new SearchForm(searchResults, dockSearchForm); connect(searchForm, &SearchForm::doneSearching, p, &KBibTeXMainWindow::showSearchResults); dockSearchForm->setWidget(searchForm); dockSearchForm->setObjectName(QStringLiteral("dockSearchFrom")); dockSearchForm->setFeatures(QDockWidget::DockWidgetClosable | QDockWidget::DockWidgetMovable | QDockWidget::DockWidgetFloatable); showPanelsMenu->addAction(dockSearchForm->toggleViewAction()); dockZotero = new QDockWidget(i18n("Zotero"), p); dockZotero->setAllowedAreas(Qt::BottomDockWidgetArea | Qt::TopDockWidgetArea | Qt::LeftDockWidgetArea | Qt::RightDockWidgetArea); p->addDockWidget(Qt::LeftDockWidgetArea, dockZotero); zotero = new ZoteroBrowser(searchResults, dockZotero); connect(dockZotero, &QDockWidget::visibilityChanged, zotero, &ZoteroBrowser::visibiltyChanged); connect(zotero, &ZoteroBrowser::itemToShow, p, &KBibTeXMainWindow::showSearchResults); dockZotero->setWidget(zotero); dockZotero->setObjectName(QStringLiteral("dockZotero")); dockZotero->setFeatures(QDockWidget::DockWidgetClosable | QDockWidget::DockWidgetMovable | QDockWidget::DockWidgetFloatable); showPanelsMenu->addAction(dockZotero->toggleViewAction()); dockReferencePreview = new QDockWidget(i18n("Reference Preview"), p); dockReferencePreview->setAllowedAreas(Qt::BottomDockWidgetArea | Qt::TopDockWidgetArea | Qt::LeftDockWidgetArea | Qt::RightDockWidgetArea); p->addDockWidget(Qt::LeftDockWidgetArea, dockReferencePreview); referencePreview = new ReferencePreview(dockReferencePreview); dockReferencePreview->setWidget(referencePreview); dockReferencePreview->setObjectName(QStringLiteral("dockReferencePreview")); dockReferencePreview->setFeatures(QDockWidget::DockWidgetClosable | QDockWidget::DockWidgetMovable | QDockWidget::DockWidgetFloatable); showPanelsMenu->addAction(dockReferencePreview->toggleViewAction()); dockDocumentPreview = new QDockWidget(i18n("Document Preview"), p); dockDocumentPreview->setAllowedAreas(Qt::BottomDockWidgetArea | Qt::TopDockWidgetArea | Qt::LeftDockWidgetArea | Qt::RightDockWidgetArea); p->addDockWidget(Qt::RightDockWidgetArea, dockDocumentPreview); dockDocumentPreview->hide(); documentPreview = new DocumentPreview(dockDocumentPreview); dockDocumentPreview->setWidget(documentPreview); dockDocumentPreview->setObjectName(QStringLiteral("dockDocumentPreview")); dockDocumentPreview->setFeatures(QDockWidget::DockWidgetClosable | QDockWidget::DockWidgetMovable | QDockWidget::DockWidgetFloatable); showPanelsMenu->addAction(dockDocumentPreview->toggleViewAction()); p->actionCollection()->setDefaultShortcut(dockDocumentPreview->toggleViewAction(), Qt::CTRL + Qt::SHIFT + Qt::Key_D); dockElementForm = new QDockWidget(i18n("Element Editor"), p); dockElementForm->setAllowedAreas(Qt::BottomDockWidgetArea | Qt::TopDockWidgetArea | Qt::LeftDockWidgetArea | Qt::RightDockWidgetArea); p->addDockWidget(Qt::BottomDockWidgetArea, dockElementForm); dockElementForm->hide(); elementForm = new ElementForm(mdiWidget, dockElementForm); dockElementForm->setWidget(elementForm); dockElementForm->setObjectName(QStringLiteral("dockElementFrom")); dockElementForm->setFeatures(QDockWidget::DockWidgetClosable | QDockWidget::DockWidgetMovable | QDockWidget::DockWidgetFloatable); showPanelsMenu->addAction(dockElementForm->toggleViewAction()); dockFileSettings = new QDockWidget(i18n("File Settings"), p); dockFileSettings->setAllowedAreas(Qt::BottomDockWidgetArea | Qt::TopDockWidgetArea | Qt::LeftDockWidgetArea | Qt::RightDockWidgetArea); p->addDockWidget(Qt::LeftDockWidgetArea, dockFileSettings); fileSettings = new FileSettings(dockFileSettings); dockFileSettings->setWidget(fileSettings); dockFileSettings->setObjectName(QStringLiteral("dockFileSettings")); dockFileSettings->setFeatures(QDockWidget::DockWidgetClosable | QDockWidget::DockWidgetMovable | QDockWidget::DockWidgetFloatable); showPanelsMenu->addAction(dockFileSettings->toggleViewAction()); p->tabifyDockWidget(dockFileSettings, dockSearchForm); p->tabifyDockWidget(dockZotero, dockSearchForm); p->tabifyDockWidget(dockValueList, dockStatistics); p->tabifyDockWidget(dockStatistics, dockFileSettings); p->tabifyDockWidget(dockSearchForm, dockReferencePreview); p->tabifyDockWidget(dockFileSettings, dockDocumentList); QAction *action = p->actionCollection()->addAction(KStandardAction::New); connect(action, &QAction::triggered, p, &KBibTeXMainWindow::newDocument); action = p->actionCollection()->addAction(KStandardAction::Open); connect(action, &QAction::triggered, p, &KBibTeXMainWindow::openDocumentDialog); actionClose = p->actionCollection()->addAction(KStandardAction::Close); connect(actionClose, &QAction::triggered, p, &KBibTeXMainWindow::closeDocument); actionClose->setEnabled(false); action = p->actionCollection()->addAction(KStandardAction::Quit); connect(action, &QAction::triggered, p, &KBibTeXMainWindow::queryCloseAll); action = p->actionCollection()->addAction(KStandardAction::Preferences); connect(action, &QAction::triggered, p, &KBibTeXMainWindow::showPreferences); } ~KBibTeXMainWindowPrivate() { elementForm->deleteLater(); delete mdiWidget; // TODO other deletes } }; KBibTeXMainWindow::KBibTeXMainWindow(QWidget *parent) : KParts::MainWindow(parent, static_cast<Qt::WindowFlags>(KDE_DEFAULT_WINDOWFLAGS)), d(new KBibTeXMainWindowPrivate(this)) { setObjectName(QStringLiteral("KBibTeXShell")); setXMLFile(QStringLiteral("kbibtexui.rc")); setCentralWidget(d->mdiWidget); connect(d->mdiWidget, &MDIWidget::documentSwitched, this, &KBibTeXMainWindow::documentSwitched); connect(d->mdiWidget, &MDIWidget::activePartChanged, this, &KBibTeXMainWindow::createGUI); ///< actually: KParts::MainWindow::createGUI connect(d->mdiWidget, &MDIWidget::documentNew, this, &KBibTeXMainWindow::newDocument); connect(d->mdiWidget, &MDIWidget::documentOpen, this, &KBibTeXMainWindow::openDocumentDialog); connect(d->mdiWidget, &MDIWidget::documentOpenURL, this, &KBibTeXMainWindow::openDocument); connect(&OpenFileInfoManager::instance(), &OpenFileInfoManager::currentChanged, d->mdiWidget, &MDIWidget::setFile); connect(&OpenFileInfoManager::instance(), &OpenFileInfoManager::flagsChanged, this, &KBibTeXMainWindow::documentListsChanged); connect(d->mdiWidget, &MDIWidget::setCaption, this, static_cast<void(KMainWindow::*)(const QString &)>(&KMainWindow::setCaption)); ///< actually: KMainWindow::setCaption documentListsChanged(OpenFileInfo::RecentlyUsed); /// force initialization of menu of recently used files setupControllers(); setupGUI(KXmlGuiWindow::Create | KXmlGuiWindow::Save | KXmlGuiWindow::Keys | KXmlGuiWindow::ToolBar); setCorner(Qt::TopLeftCorner, Qt::LeftDockWidgetArea); setCorner(Qt::TopRightCorner, Qt::RightDockWidgetArea); setCorner(Qt::BottomLeftCorner, Qt::LeftDockWidgetArea); setCorner(Qt::BottomRightCorner, Qt::RightDockWidgetArea); setAcceptDrops(true); QTimer::singleShot(500, this, &KBibTeXMainWindow::delayed); } KBibTeXMainWindow::~KBibTeXMainWindow() { delete d; } void KBibTeXMainWindow::setupControllers() { // TODO } void KBibTeXMainWindow::dragEnterEvent(QDragEnterEvent *event) { if (event->mimeData()->hasUrls()) event->acceptProposedAction(); } void KBibTeXMainWindow::dropEvent(QDropEvent *event) { QList<QUrl> urlList = event->mimeData()->urls(); if (urlList.isEmpty()) { const QUrl url(event->mimeData()->text()); if (url.isValid()) urlList << url; } if (!urlList.isEmpty()) for (const QUrl &url : const_cast<const QList<QUrl> &>(urlList)) openDocument(url); } void KBibTeXMainWindow::newDocument() { const QString mimeType = FileInfo::mimetypeBibTeX; OpenFileInfo *openFileInfo = OpenFileInfoManager::instance().createNew(mimeType); if (openFileInfo) OpenFileInfoManager::instance().setCurrentFile(openFileInfo); else KMessageBox::error(this, i18n("Creating a new document of mime type '%1' failed as no editor component could be instantiated.", mimeType), i18n("Creating document failed")); } void KBibTeXMainWindow::openDocumentDialog() { OpenFileInfo *currFile = OpenFileInfoManager::instance().currentFile(); QUrl currFileUrl = currFile == nullptr ? QUrl() : currFile->url(); QString startDir = currFileUrl.isValid() ? QUrl(currFileUrl.url()).path() : QString(); OpenFileInfo *ofi = OpenFileInfoManager::instance().currentFile(); if (ofi != nullptr) { QUrl url = ofi->url(); if (url.isValid()) startDir = url.path(); } /// Assemble list of supported mimetypes QStringList supportedMimeTypes {QStringLiteral("text/x-bibtex"), QStringLiteral("application/x-research-info-systems"), QStringLiteral("application/xml")}; if (BibUtils::available()) { supportedMimeTypes.append(QStringLiteral("application/x-isi-export-format")); supportedMimeTypes.append(QStringLiteral("application/x-endnote-refer")); } supportedMimeTypes.append(QStringLiteral("application/pdf")); supportedMimeTypes.append(QStringLiteral("all/all")); QPointer<QFileDialog> dlg = new QFileDialog(this, i18n("Open file") /* TODO better text */, startDir); dlg->setMimeTypeFilters(supportedMimeTypes); dlg->setFileMode(QFileDialog::ExistingFile); const bool dialogAccepted = dlg->exec() != 0; const QUrl url = (dialogAccepted && !dlg->selectedUrls().isEmpty()) ? dlg->selectedUrls().first() : QUrl(); delete dlg; if (url.isValid()) openDocument(url); } void KBibTeXMainWindow::openDocument(const QUrl &url) { OpenFileInfo *openFileInfo = OpenFileInfoManager::instance().open(url); OpenFileInfoManager::instance().setCurrentFile(openFileInfo); } void KBibTeXMainWindow::closeDocument() { OpenFileInfoManager::instance().close(OpenFileInfoManager::instance().currentFile()); } void KBibTeXMainWindow::closeEvent(QCloseEvent *event) { KMainWindow::closeEvent(event); if (OpenFileInfoManager::instance().queryCloseAll()) event->accept(); else event->ignore(); } void KBibTeXMainWindow::showPreferences() { QPointer<KBibTeXPreferencesDialog> dlg = new KBibTeXPreferencesDialog(this); dlg->exec(); delete dlg; } void KBibTeXMainWindow::documentSwitched(FileView *oldFileView, FileView *newFileView) { OpenFileInfo *openFileInfo = d->mdiWidget->currentFile(); bool validFile = openFileInfo != nullptr; d->actionClose->setEnabled(validFile); setCaption(validFile ? i18n("%1 - KBibTeX", openFileInfo->shortCaption()) : i18n("KBibTeX")); d->fileSettings->setEnabled(newFileView != nullptr); d->referencePreview->setEnabled(newFileView != nullptr); d->elementForm->setEnabled(newFileView != nullptr); d->documentPreview->setEnabled(newFileView != nullptr); if (oldFileView != nullptr) { disconnect(newFileView, &FileView::currentElementChanged, d->referencePreview, &ReferencePreview::setElement); disconnect(newFileView, &FileView::currentElementChanged, d->elementForm, &ElementForm::setElement); disconnect(newFileView, &FileView::currentElementChanged, d->documentPreview, &DocumentPreview::setElement); disconnect(newFileView, &FileView::currentElementChanged, d->searchForm, &SearchForm::setElement); disconnect(newFileView, &FileView::modified, d->valueList, &ValueList::update); disconnect(newFileView, &FileView::modified, d->statistics, &Statistics::update); // FIXME disconnect(oldEditor, SIGNAL(modified()), d->elementForm, SLOT(refreshElement())); disconnect(d->elementForm, &ElementForm::elementModified, newFileView, &FileView::externalModification); } if (newFileView != nullptr) { connect(newFileView, &FileView::currentElementChanged, d->referencePreview, &ReferencePreview::setElement); connect(newFileView, &FileView::currentElementChanged, d->elementForm, &ElementForm::setElement); connect(newFileView, &FileView::currentElementChanged, d->documentPreview, &DocumentPreview::setElement); connect(newFileView, &FileView::currentElementChanged, d->searchForm, &SearchForm::setElement); connect(newFileView, &FileView::modified, d->valueList, &ValueList::update); connect(newFileView, &FileView::modified, d->statistics, &Statistics::update); // FIXME connect(newEditor, SIGNAL(modified()), d->elementForm, SLOT(refreshElement())); connect(d->elementForm, &ElementForm::elementModified, newFileView, &FileView::externalModification); connect(d->elementForm, &ElementForm::elementModified, newFileView, &FileView::externalModification); } d->documentPreview->setBibTeXUrl(validFile ? openFileInfo->url() : QUrl()); d->referencePreview->setElement(QSharedPointer<Element>(), nullptr); d->elementForm->setElement(QSharedPointer<Element>(), nullptr); d->documentPreview->setElement(QSharedPointer<Element>(), nullptr); d->valueList->setFileView(newFileView); d->fileSettings->setFileView(newFileView); d->statistics->setFileView(newFileView); d->referencePreview->setFileView(newFileView); } void KBibTeXMainWindow::showSearchResults() { d->dockSearchResults->show(); } void KBibTeXMainWindow::documentListsChanged(OpenFileInfo::StatusFlags statusFlags) { if (statusFlags.testFlag(OpenFileInfo::RecentlyUsed)) { const OpenFileInfoManager::OpenFileInfoList list = OpenFileInfoManager::instance().filteredItems(OpenFileInfo::RecentlyUsed); d->actionMenuRecentFilesMenu->clear(); for (OpenFileInfo *cur : list) { /// Fixing bug 19511: too long filenames make menu too large, /// therefore squeeze text if it is longer than squeezeLen. const int squeezeLen = 64; const QString squeezedShortCap = squeeze_text(cur->shortCaption(), squeezeLen); const QString squeezedFullCap = squeeze_text(cur->fullCaption(), squeezeLen); QAction *action = new QAction(QString(QStringLiteral("%1 [%2]")).arg(squeezedShortCap, squeezedFullCap), this); action->setData(cur->url()); action->setIcon(QIcon::fromTheme(cur->mimeType().replace(QLatin1Char('/'), QLatin1Char('-')))); d->actionMenuRecentFilesMenu->addAction(action); connect(action, &QAction::triggered, this, &KBibTeXMainWindow::openRecentFile); } } } void KBibTeXMainWindow::openRecentFile() { QAction *action = static_cast<QAction *>(sender()); QUrl url = action->data().toUrl(); openDocument(url); } void KBibTeXMainWindow::queryCloseAll() { if (OpenFileInfoManager::instance().queryCloseAll()) qApp->quit(); } void KBibTeXMainWindow::delayed() { /// Static variable, memorizes the dynamically created /// BibliographyService instance and allows to tell if /// this slot was called for the first or second time. static BibliographyService *bs = nullptr; if (bs == nullptr) { /// First call to this slot bs = new BibliographyService(this); if (!bs->isKBibTeXdefault() && KMessageBox::questionYesNo(this, i18n("KBibTeX is not the default editor for its bibliography formats like BibTeX or RIS."), i18n("Default Bibliography Editor"), KGuiItem(i18n("Set as Default Editor")), KGuiItem(i18n("Keep settings unchanged"))) == KMessageBox::Yes) { bs->setKBibTeXasDefault(); /// QTimer calls this slot again, but as 'bs' will not be NULL, /// the 'if' construct's 'else' path will be followed. QTimer::singleShot(5000, this, &KBibTeXMainWindow::delayed); } else { /// KBibTeX is default application or user doesn't care, /// therefore clean up memory delete bs; bs = nullptr; } } else { /// Second call to this slot. This time, clean up memory. bs->deleteLater(); bs = nullptr; } }