diff --git a/src/io/encoderlatex.cpp b/src/io/encoderlatex.cpp index 271d4045..c3185ec9 100644 --- a/src/io/encoderlatex.cpp +++ b/src/io/encoderlatex.cpp @@ -1,1463 +1,1493 @@ /*************************************************************************** - * Copyright (C) 2004-2019 by Thomas Fischer * + * Copyright (C) 2004-2020 by Thomas Fischer * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, see . * ***************************************************************************/ #include "encoderlatex.h" #include #include "logging_io.h" inline bool isAsciiLetter(const QChar c) { return (c.unicode() >= static_cast('A') && c.unicode() <= static_cast('Z')) || (c.unicode() >= static_cast('a') && c.unicode() <= static_cast('z')); } inline int asciiLetterOrDigitToPos(const QChar c) { static const ushort upperCaseLetterA = QLatin1Char('A').unicode(); static const ushort upperCaseLetterZ = QLatin1Char('Z').unicode(); static const ushort lowerCaseLetterA = QLatin1Char('a').unicode(); static const ushort lowerCaseLetterZ = QLatin1Char('z').unicode(); static const ushort digit0 = QLatin1Char('0').unicode(); static const ushort digit9 = QLatin1Char('9').unicode(); const ushort unicode = c.unicode(); if (unicode >= upperCaseLetterA && unicode <= upperCaseLetterZ) return unicode - upperCaseLetterA; else if (unicode >= lowerCaseLetterA && unicode <= lowerCaseLetterZ) return unicode + 26 - lowerCaseLetterA; else if (unicode >= digit0 && unicode <= digit9) return unicode + 52 - digit0; else return -1; } inline bool isIJ(const QChar c) { static const QChar upperCaseLetterI = QLatin1Char('I'); static const QChar upperCaseLetterJ = QLatin1Char('J'); static const QChar lowerCaseLetterI = QLatin1Char('i'); static const QChar lowerCaseLetterJ = QLatin1Char('j'); return c == upperCaseLetterI || c == upperCaseLetterJ || c == lowerCaseLetterI || c == lowerCaseLetterJ; } enum EncoderLaTeXCommandDirection { DirectionCommandToUnicode = 1, DirectionUnicodeToCommand = 2, DirectionBoth = DirectionCommandToUnicode | DirectionUnicodeToCommand }; /** * General documentation on this topic: * https://www.latex-project.org/help/documentation/encguide.pdf * https://mirror.hmc.edu/ctan/macros/xetex/latex/xecjk/xunicode-symbols.pdf * ftp://ftp.dante.de/tex-archive/biblio/biber/documentation/utf8-macro-map.html */ /** * This structure contains information how escaped characters * such as \"a are translated to an Unicode character and back. * The structure is a table with three columns: (1) the modifier * (in the example before the quotation mark) (2) the ASCII * character ((in the example before the 'a') (3) the Unicode * character described by a hexcode. * This data structure is used both directly and indirectly via * the LookupTable structure which is initialized when the * EncoderLaTeX object is created. */ static const struct EncoderLaTeXEscapedCharacter { const QChar modifier; const QChar letter; const ushort unicode; const EncoderLaTeXCommandDirection direction; } encoderLaTeXEscapedCharacters[] = { {QLatin1Char('`'), QLatin1Char('A'), 0x00C0, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('A'), 0x00C1, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('A'), 0x00C2, DirectionBoth}, {QLatin1Char('~'), QLatin1Char('A'), 0x00C3, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('A'), 0x00C4, DirectionBoth}, {QLatin1Char('r'), QLatin1Char('A'), 0x00C5, DirectionBoth}, /** 0x00C6: see EncoderLaTeXCharacterCommand */ {QLatin1Char('c'), QLatin1Char('C'), 0x00C7, DirectionBoth}, {QLatin1Char('`'), QLatin1Char('E'), 0x00C8, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('E'), 0x00C9, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('E'), 0x00CA, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('E'), 0x00CB, DirectionBoth}, {QLatin1Char('`'), QLatin1Char('I'), 0x00CC, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('I'), 0x00CD, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('I'), 0x00CE, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('I'), 0x00CF, DirectionBoth}, /** 0x00D0: see EncoderLaTeXCharacterCommand */ {QLatin1Char('~'), QLatin1Char('N'), 0x00D1, DirectionBoth}, {QLatin1Char('`'), QLatin1Char('O'), 0x00D2, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('O'), 0x00D3, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('O'), 0x00D4, DirectionBoth}, {QLatin1Char('~'), QLatin1Char('O'), 0x00D5, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('O'), 0x00D6, DirectionBoth}, /** 0x00D7: see EncoderLaTeXCharacterCommand */ /** 0x00D8: see EncoderLaTeXCharacterCommand */ {QLatin1Char('`'), QLatin1Char('U'), 0x00D9, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('U'), 0x00DA, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('U'), 0x00DB, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('U'), 0x00DC, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('Y'), 0x00DD, DirectionBoth}, /** 0x00DE: see EncoderLaTeXCharacterCommand */ {QLatin1Char('"'), QLatin1Char('s'), 0x00DF, DirectionBoth}, {QLatin1Char('`'), QLatin1Char('a'), 0x00E0, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('a'), 0x00E1, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('a'), 0x00E2, DirectionBoth}, {QLatin1Char('~'), QLatin1Char('a'), 0x00E3, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('a'), 0x00E4, DirectionBoth}, {QLatin1Char('r'), QLatin1Char('a'), 0x00E5, DirectionBoth}, /** 0x00E6: see EncoderLaTeXCharacterCommand */ {QLatin1Char('c'), QLatin1Char('c'), 0x00E7, DirectionBoth}, {QLatin1Char('`'), QLatin1Char('e'), 0x00E8, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('e'), 0x00E9, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('e'), 0x00EA, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('e'), 0x00EB, DirectionBoth}, {QLatin1Char('`'), QLatin1Char('i'), 0x00EC, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('i'), 0x00ED, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('i'), 0x00EE, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('i'), 0x00EF, DirectionBoth}, /** 0x00F0: see EncoderLaTeXCharacterCommand */ {QLatin1Char('~'), QLatin1Char('n'), 0x00F1, DirectionBoth}, {QLatin1Char('`'), QLatin1Char('o'), 0x00F2, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('o'), 0x00F3, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('o'), 0x00F4, DirectionBoth}, {QLatin1Char('~'), QLatin1Char('o'), 0x00F5, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('o'), 0x00F6, DirectionBoth}, /** 0x00F7: see EncoderLaTeXCharacterCommand */ /** 0x00F8: see EncoderLaTeXCharacterCommand */ {QLatin1Char('`'), QLatin1Char('u'), 0x00F9, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('u'), 0x00FA, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('u'), 0x00FB, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('u'), 0x00FC, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('y'), 0x00FD, DirectionBoth}, /** 0x00FE: see EncoderLaTeXCharacterCommand */ {QLatin1Char('"'), QLatin1Char('y'), 0x00FF, DirectionBoth}, {QLatin1Char('='), QLatin1Char('A'), 0x0100, DirectionBoth}, {QLatin1Char('='), QLatin1Char('a'), 0x0101, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('A'), 0x0102, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('a'), 0x0103, DirectionBoth}, {QLatin1Char('k'), QLatin1Char('A'), 0x0104, DirectionBoth}, {QLatin1Char('k'), QLatin1Char('a'), 0x0105, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('C'), 0x0106, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('c'), 0x0107, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('C'), 0x0108, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('c'), 0x0109, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('C'), 0x010A, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('c'), 0x010B, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('C'), 0x010C, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('c'), 0x010D, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('D'), 0x010E, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('d'), 0x010F, DirectionBoth}, {QLatin1Char('B'), QLatin1Char('D'), 0x0110, DirectionCommandToUnicode}, {QLatin1Char('B'), QLatin1Char('d'), 0x0111, DirectionCommandToUnicode}, {QLatin1Char('='), QLatin1Char('E'), 0x0112, DirectionBoth}, {QLatin1Char('='), QLatin1Char('e'), 0x0113, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('E'), 0x0114, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('e'), 0x0115, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('E'), 0x0116, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('e'), 0x0117, DirectionBoth}, {QLatin1Char('k'), QLatin1Char('E'), 0x0118, DirectionBoth}, {QLatin1Char('k'), QLatin1Char('e'), 0x0119, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('E'), 0x011A, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('e'), 0x011B, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('G'), 0x011C, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('g'), 0x011D, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('G'), 0x011E, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('g'), 0x011F, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('G'), 0x0120, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('g'), 0x0121, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('G'), 0x0122, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('g'), 0x0123, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('H'), 0x0124, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('h'), 0x0125, DirectionBoth}, {QLatin1Char('B'), QLatin1Char('H'), 0x0126, DirectionCommandToUnicode}, {QLatin1Char('B'), QLatin1Char('h'), 0x0127, DirectionCommandToUnicode}, {QLatin1Char('~'), QLatin1Char('I'), 0x0128, DirectionBoth}, {QLatin1Char('~'), QLatin1Char('i'), 0x0129, DirectionBoth}, {QLatin1Char('='), QLatin1Char('I'), 0x012A, DirectionBoth}, {QLatin1Char('='), QLatin1Char('i'), 0x012B, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('I'), 0x012C, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('i'), 0x012D, DirectionBoth}, {QLatin1Char('k'), QLatin1Char('I'), 0x012E, DirectionBoth}, {QLatin1Char('k'), QLatin1Char('i'), 0x012F, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('I'), 0x0130, DirectionBoth}, /** 0x0131: see EncoderLaTeXCharacterCommand */ /** 0x0132: see EncoderLaTeXCharacterCommand */ /** 0x0133: see EncoderLaTeXCharacterCommand */ {QLatin1Char('^'), QLatin1Char('J'), 0x012E, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('j'), 0x012F, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('K'), 0x0136, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('k'), 0x0137, DirectionBoth}, /** 0x0138: see EncoderLaTeXCharacterCommand */ {QLatin1Char('\''), QLatin1Char('L'), 0x0139, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('l'), 0x013A, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('L'), 0x013B, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('l'), 0x013C, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('L'), 0x013D, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('l'), 0x013E, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('L'), 0x013F, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('l'), 0x0140, DirectionBoth}, {QLatin1Char('B'), QLatin1Char('L'), 0x0141, DirectionCommandToUnicode}, {QLatin1Char('B'), QLatin1Char('l'), 0x0142, DirectionCommandToUnicode}, {QLatin1Char('\''), QLatin1Char('N'), 0x0143, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('n'), 0x0144, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('n'), 0x0145, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('n'), 0x0146, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('N'), 0x0147, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('n'), 0x0148, DirectionBoth}, /** 0x0149: TODO n preceded by apostrophe */ {QLatin1Char('m'), QLatin1Char('N'), 0x014A, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('n'), 0x014B, DirectionCommandToUnicode}, {QLatin1Char('='), QLatin1Char('O'), 0x014C, DirectionBoth}, {QLatin1Char('='), QLatin1Char('o'), 0x014D, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('O'), 0x014E, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('o'), 0x014F, DirectionBoth}, {QLatin1Char('H'), QLatin1Char('O'), 0x0150, DirectionBoth}, {QLatin1Char('H'), QLatin1Char('o'), 0x0151, DirectionBoth}, /** 0x0152: see EncoderLaTeXCharacterCommand */ /** 0x0153: see EncoderLaTeXCharacterCommand */ {QLatin1Char('\''), QLatin1Char('R'), 0x0154, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('r'), 0x0155, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('R'), 0x0156, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('r'), 0x0157, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('R'), 0x0158, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('r'), 0x0159, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('S'), 0x015A, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('s'), 0x015B, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('S'), 0x015C, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('s'), 0x015D, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('S'), 0x015E, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('s'), 0x015F, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('S'), 0x0160, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('s'), 0x0161, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('T'), 0x0162, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('t'), 0x0163, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('T'), 0x0164, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('t'), 0x0165, DirectionBoth}, {QLatin1Char('B'), QLatin1Char('T'), 0x0166, DirectionCommandToUnicode}, {QLatin1Char('B'), QLatin1Char('t'), 0x0167, DirectionCommandToUnicode}, {QLatin1Char('~'), QLatin1Char('U'), 0x0168, DirectionBoth}, {QLatin1Char('~'), QLatin1Char('u'), 0x0169, DirectionBoth}, {QLatin1Char('='), QLatin1Char('U'), 0x016A, DirectionBoth}, {QLatin1Char('='), QLatin1Char('u'), 0x016B, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('U'), 0x016C, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('u'), 0x016D, DirectionBoth}, {QLatin1Char('r'), QLatin1Char('U'), 0x016E, DirectionBoth}, {QLatin1Char('r'), QLatin1Char('u'), 0x016F, DirectionBoth}, {QLatin1Char('H'), QLatin1Char('U'), 0x0170, DirectionBoth}, {QLatin1Char('H'), QLatin1Char('u'), 0x0171, DirectionBoth}, {QLatin1Char('k'), QLatin1Char('U'), 0x0172, DirectionBoth}, {QLatin1Char('k'), QLatin1Char('u'), 0x0173, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('W'), 0x0174, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('w'), 0x0175, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('Y'), 0x0176, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('y'), 0x0177, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('Y'), 0x0178, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('Z'), 0x0179, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('z'), 0x017A, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('Z'), 0x017B, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('z'), 0x017C, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('Z'), 0x017D, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('z'), 0x017E, DirectionBoth}, /** 0x017F: TODO long s */ {QLatin1Char('B'), QLatin1Char('b'), 0x0180, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('B'), 0x0181, DirectionCommandToUnicode}, /** 0x0182 */ /** 0x0183 */ /** 0x0184 */ /** 0x0185 */ {QLatin1Char('m'), QLatin1Char('O'), 0x0186, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('C'), 0x0187, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('c'), 0x0188, DirectionCommandToUnicode}, {QLatin1Char('M'), QLatin1Char('D'), 0x0189, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('D'), 0x018A, DirectionCommandToUnicode}, /** 0x018B */ /** 0x018C */ /** 0x018D */ {QLatin1Char('M'), QLatin1Char('E'), 0x018E, DirectionCommandToUnicode}, /** 0x018F */ {QLatin1Char('m'), QLatin1Char('E'), 0x0190, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('F'), 0x0191, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('f'), 0x0192, DirectionCommandToUnicode}, /** 0x0193 */ {QLatin1Char('m'), QLatin1Char('G'), 0x0194, DirectionCommandToUnicode}, /** 0x0195: see EncoderLaTeXCharacterCommand */ {QLatin1Char('m'), QLatin1Char('I'), 0x0196, DirectionCommandToUnicode}, {QLatin1Char('B'), QLatin1Char('I'), 0x0197, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('K'), 0x0198, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('k'), 0x0199, DirectionCommandToUnicode}, {QLatin1Char('B'), QLatin1Char('l'), 0x019A, DirectionCommandToUnicode}, /** 0x019B */ /** 0x019C */ {QLatin1Char('m'), QLatin1Char('J'), 0x019D, DirectionCommandToUnicode}, /** 0x019E */ /** 0x019F */ /** 0x01A0 */ /** 0x01A1 */ /** 0x01A2 */ /** 0x01A3 */ {QLatin1Char('m'), QLatin1Char('P'), 0x01A4, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('p'), 0x01A5, DirectionCommandToUnicode}, /** 0x01A6 */ /** 0x01A7 */ /** 0x01A8 */ /** 0x01A9: see EncoderLaTeXCharacterCommand */ /** 0x01AA */ /** 0x01AB */ {QLatin1Char('m'), QLatin1Char('T'), 0x01AC, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('t'), 0x01AD, DirectionCommandToUnicode}, {QLatin1Char('M'), QLatin1Char('T'), 0x01AE, DirectionCommandToUnicode}, /** 0x01AF */ /** 0x01B0 */ {QLatin1Char('m'), QLatin1Char('U'), 0x01B1, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('V'), 0x01B2, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('Y'), 0x01B3, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('y'), 0x01B4, DirectionCommandToUnicode}, {QLatin1Char('B'), QLatin1Char('Z'), 0x01B5, DirectionCommandToUnicode}, {QLatin1Char('B'), QLatin1Char('z'), 0x01B6, DirectionCommandToUnicode}, {QLatin1Char('m'), QLatin1Char('Z'), 0x01B7, DirectionCommandToUnicode}, /** 0x01B8 */ /** 0x01B9 */ /** 0x01BA */ {QLatin1Char('B'), QLatin1Char('2'), 0x01BB, DirectionCommandToUnicode}, /** 0x01BC */ /** 0x01BD */ /** 0x01BE */ /** 0x01BF */ /** 0x01C0 */ /** 0x01C1 */ /** 0x01C2 */ /** 0x01C3 */ /** 0x01C4 */ /** 0x01C5 */ /** 0x01C6 */ /** 0x01C7 */ /** 0x01C8 */ /** 0x01C9 */ /** 0x01CA */ /** 0x01CB */ /** 0x01CC */ {QLatin1Char('v'), QLatin1Char('A'), 0x01CD, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('a'), 0x01CE, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('G'), 0x01E6, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('g'), 0x01E7, DirectionBoth}, {QLatin1Char('k'), QLatin1Char('O'), 0x01EA, DirectionBoth}, {QLatin1Char('k'), QLatin1Char('o'), 0x01EB, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('F'), 0x01F4, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('f'), 0x01F5, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('A'), 0x0226, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('a'), 0x0227, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('E'), 0x0228, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('e'), 0x0229, DirectionBoth}, {QLatin1Char('='), QLatin1Char('Y'), 0x0232, DirectionBoth}, {QLatin1Char('='), QLatin1Char('y'), 0x0233, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('O'), 0x022E, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('o'), 0x022F, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('B'), 0x1E02, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('b'), 0x1E03, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('B'), 0x1E04, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('b'), 0x1E05, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('D'), 0x1E0A, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('d'), 0x1E0B, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('D'), 0x1E0C, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('d'), 0x1E0D, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('D'), 0x1E10, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('d'), 0x1E11, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('E'), 0x1E1E, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('e'), 0x1E1F, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('H'), 0x1E22, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('h'), 0x1E23, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('H'), 0x1E24, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('h'), 0x1E25, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('H'), 0x1E26, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('h'), 0x1E27, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('H'), 0x1E28, DirectionBoth}, {QLatin1Char('c'), QLatin1Char('h'), 0x1E29, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('K'), 0x1E32, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('k'), 0x1E33, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('L'), 0x1E36, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('l'), 0x1E37, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('M'), 0x1E40, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('m'), 0x1E41, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('M'), 0x1E42, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('m'), 0x1E43, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('N'), 0x1E44, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('n'), 0x1E45, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('N'), 0x1E46, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('n'), 0x1E47, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('P'), 0x1E56, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('p'), 0x1E57, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('R'), 0x1E58, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('r'), 0x1E59, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('R'), 0x1E5A, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('r'), 0x1E5B, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('S'), 0x1E60, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('s'), 0x1E61, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('S'), 0x1E62, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('s'), 0x1E63, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('T'), 0x1E6A, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('t'), 0x1E6B, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('T'), 0x1E6C, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('t'), 0x1E6D, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('V'), 0x1E7E, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('v'), 0x1E7F, DirectionBoth}, {QLatin1Char('`'), QLatin1Char('W'), 0x1E80, DirectionBoth}, {QLatin1Char('`'), QLatin1Char('w'), 0x1E81, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('W'), 0x1E82, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('w'), 0x1E83, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('W'), 0x1E84, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('w'), 0x1E85, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('W'), 0x1E86, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('w'), 0x1E87, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('W'), 0x1E88, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('w'), 0x1E88, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('X'), 0x1E8A, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('x'), 0x1E8B, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('X'), 0x1E8C, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('x'), 0x1E8D, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('Y'), 0x1E8E, DirectionBoth}, {QLatin1Char('.'), QLatin1Char('y'), 0x1E8F, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('Z'), 0x1E92, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('z'), 0x1E93, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('t'), 0x1E97, DirectionBoth}, {QLatin1Char('r'), QLatin1Char('w'), 0x1E98, DirectionBoth}, {QLatin1Char('r'), QLatin1Char('y'), 0x1E99, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('A'), 0x1EA0, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('a'), 0x1EA1, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('E'), 0x1EB8, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('e'), 0x1EB9, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('I'), 0x1ECA, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('i'), 0x1ECB, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('O'), 0x1ECC, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('o'), 0x1ECD, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('U'), 0x1EE4, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('u'), 0x1EE5, DirectionBoth}, {QLatin1Char('`'), QLatin1Char('Y'), 0x1EF2, DirectionBoth}, {QLatin1Char('`'), QLatin1Char('y'), 0x1EF3, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('Y'), 0x1EF4, DirectionBoth}, {QLatin1Char('d'), QLatin1Char('y'), 0x1EF5, DirectionBoth}, {QLatin1Char('r'), QLatin1Char('q'), 0x2019, DirectionCommandToUnicode} ///< tricky: this is \rq }; /** * This structure contains information on the usage of dotless i * and dotless j in combination with accent-like modifiers. * Combinations such as \"{\i} are translated to an Unicode character * and back. The structure is a table with three columns: (1) the * modified (in the example before the quotation mark) (2) the ASCII * character (in the example before the 'i') (3) the Unicode * character described by a hexcode. */ // TODO other cases of \i and \j? static const struct DotlessIJCharacter { const QChar modifier; const QChar letter; const ushort unicode; const EncoderLaTeXCommandDirection direction; } dotlessIJCharacters[] = { {QLatin1Char('`'), QLatin1Char('i'), 0x00EC, DirectionBoth}, {QLatin1Char('\''), QLatin1Char('i'), 0x00ED, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('i'), 0x00EE, DirectionBoth}, {QLatin1Char('"'), QLatin1Char('i'), 0x00EF, DirectionBoth}, {QLatin1Char('~'), QLatin1Char('i'), 0x0129, DirectionBoth}, {QLatin1Char('='), QLatin1Char('i'), 0x012B, DirectionBoth}, {QLatin1Char('u'), QLatin1Char('i'), 0x012D, DirectionBoth}, {QLatin1Char('k'), QLatin1Char('i'), 0x012F, DirectionBoth}, {QLatin1Char('^'), QLatin1Char('j'), 0x0135, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('i'), 0x01D0, DirectionBoth}, {QLatin1Char('v'), QLatin1Char('j'), 0x01F0, DirectionBoth}, {QLatin1Char('G'), QLatin1Char('i'), 0x0209, DirectionCommandToUnicode} }; /** * This lookup allows to quickly find hits in the * EncoderLaTeXEscapedCharacter table. This data structure here * consists of a number of rows. Each row consists of a * modifier (like '"' or 'v') and an array of Unicode chars. * Letters 'A'..'Z','a'..'z','0'..'9' are used as index to this * array by invocing asciiLetterOrDigitToPos(). * This data structure is built in the constructor. */ static const int lookupTableNumModifiers = 32; static const int lookupTableNumCharacters = 26 * 2 + 10; static struct EncoderLaTeXEscapedCharacterLookupTableRow { QChar modifier; QChar unicode[lookupTableNumCharacters]; } *lookupTable[lookupTableNumModifiers]; /** * This data structure keeps track of math commands, which * have to be treated differently in text and math mode. * The math command like "subset of" could be used directly * in math mode, but must be enclosed in \ensuremath{...} * in text mode. */ static const struct MathCommand { const QString command; const ushort unicode; const EncoderLaTeXCommandDirection direction; } mathCommands[] = { {QStringLiteral("pm"), 0x00B1, DirectionBoth}, {QStringLiteral("times"), 0x00D7, DirectionBoth}, {QStringLiteral("div"), 0x00F7, DirectionBoth}, {QStringLiteral("phi"), 0x0278, DirectionBoth}, ///< see also 0x03C6 (GREEK SMALL LETTER PHI) {QStringLiteral("Alpha"), 0x0391, DirectionBoth}, {QStringLiteral("Beta"), 0x0392, DirectionBoth}, {QStringLiteral("Gamma"), 0x0393, DirectionBoth}, {QStringLiteral("Delta"), 0x0394, DirectionBoth}, {QStringLiteral("Epsilon"), 0x0395, DirectionBoth}, {QStringLiteral("Zeta"), 0x0396, DirectionBoth}, {QStringLiteral("Eta"), 0x0397, DirectionBoth}, {QStringLiteral("Theta"), 0x0398, DirectionBoth}, {QStringLiteral("Iota"), 0x0399, DirectionBoth}, {QStringLiteral("Kappa"), 0x039A, DirectionBoth}, {QStringLiteral("Lamda"), 0x039B, DirectionCommandToUnicode}, ///< \Lamda does not exist, this is mostly for spelling errors {QStringLiteral("Lambda"), 0x039B, DirectionBoth}, {QStringLiteral("Mu"), 0x039C, DirectionBoth}, {QStringLiteral("Nu"), 0x039D, DirectionBoth}, {QStringLiteral("Xi"), 0x039E, DirectionBoth}, {QStringLiteral("Omicron"), 0x039F, DirectionBoth}, {QStringLiteral("Pi"), 0x03A0, DirectionBoth}, {QStringLiteral("Rho"), 0x03A1, DirectionBoth}, {QStringLiteral("Sigma"), 0x03A3, DirectionBoth}, {QStringLiteral("Tau"), 0x03A4, DirectionBoth}, {QStringLiteral("Upsilon"), 0x03A5, DirectionBoth}, {QStringLiteral("Phi"), 0x03A6, DirectionBoth}, {QStringLiteral("Chi"), 0x03A7, DirectionBoth}, {QStringLiteral("Psi"), 0x03A8, DirectionBoth}, {QStringLiteral("Omega"), 0x03A9, DirectionBoth}, {QStringLiteral("alpha"), 0x03B1, DirectionBoth}, {QStringLiteral("beta"), 0x03B2, DirectionBoth}, {QStringLiteral("gamma"), 0x03B3, DirectionBoth}, {QStringLiteral("delta"), 0x03B4, DirectionBoth}, {QStringLiteral("varepsilon"), 0x03B5, DirectionBoth}, {QStringLiteral("zeta"), 0x03B6, DirectionBoth}, {QStringLiteral("eta"), 0x03B7, DirectionBoth}, {QStringLiteral("theta"), 0x03B8, DirectionBoth}, {QStringLiteral("iota"), 0x03B9, DirectionBoth}, {QStringLiteral("kappa"), 0x03BA, DirectionBoth}, {QStringLiteral("lamda"), 0x03BB, DirectionBoth}, ///< \lamda does not exist, this is mostly for spelling errors {QStringLiteral("lambda"), 0x03BB, DirectionBoth}, {QStringLiteral("mu"), 0x03BC, DirectionBoth}, {QStringLiteral("nu"), 0x03BD, DirectionBoth}, {QStringLiteral("xi"), 0x03BE, DirectionBoth}, {QStringLiteral("omicron"), 0x03BF, DirectionBoth}, {QStringLiteral("pi"), 0x03C0, DirectionBoth}, {QStringLiteral("rho"), 0x03C1, DirectionBoth}, {QStringLiteral("varsigma"), 0x03C2, DirectionBoth}, {QStringLiteral("sigma"), 0x03C3, DirectionBoth}, {QStringLiteral("tau"), 0x03C4, DirectionBoth}, {QStringLiteral("upsilon"), 0x03C5, DirectionBoth}, {QStringLiteral("varphi"), 0x03C6, DirectionBoth}, ///< see also 0x0278 (LATIN SMALL LETTER PHI) {QStringLiteral("chi"), 0x03C7, DirectionBoth}, {QStringLiteral("psi"), 0x03C8, DirectionBoth}, {QStringLiteral("omega"), 0x03C9, DirectionBoth}, {QStringLiteral("vartheta"), 0x03D1, DirectionBoth}, {QStringLiteral("varpi"), 0x03D6, DirectionBoth}, {QStringLiteral("digamma"), 0x03DC, DirectionBoth}, {QStringLiteral("varkappa"), 0x03F0, DirectionBoth}, {QStringLiteral("varrho"), 0x03F1, DirectionBoth}, {QStringLiteral("epsilon"), 0x03F5, DirectionBoth}, {QStringLiteral("backepsilon"), 0x03F6, DirectionBoth}, {QStringLiteral("aleph"), 0x05D0, DirectionBoth}, {QStringLiteral("dagger"), 0x2020, DirectionBoth}, {QStringLiteral("ddagger"), 0x2021, DirectionBoth}, {QStringLiteral("mathbb{C}"), 0x2102, DirectionBoth}, {QStringLiteral("ell"), 0x2113, DirectionBoth}, {QStringLiteral("mho"), 0x2127, DirectionBoth}, {QStringLiteral("beth"), 0x2136, DirectionBoth}, {QStringLiteral("gimel"), 0x2137, DirectionBoth}, {QStringLiteral("daleth"), 0x2138, DirectionBoth}, {QStringLiteral("rightarrow"), 0x2192, DirectionBoth}, {QStringLiteral("forall"), 0x2200, DirectionBoth}, {QStringLiteral("complement"), 0x2201, DirectionBoth}, {QStringLiteral("partial"), 0x2202, DirectionBoth}, {QStringLiteral("exists"), 0x2203, DirectionBoth}, {QStringLiteral("nexists"), 0x2204, DirectionBoth}, {QStringLiteral("varnothing"), 0x2205, DirectionBoth}, {QStringLiteral("nabla"), 0x2207, DirectionBoth}, {QStringLiteral("in"), 0x2208, DirectionBoth}, {QStringLiteral("notin"), 0x2209, DirectionBoth}, {QStringLiteral("ni"), 0x220B, DirectionBoth}, {QStringLiteral("not\\ni"), 0x220C, DirectionBoth}, {QStringLiteral("asterisk"), 0x2217, DirectionCommandToUnicode}, {QStringLiteral("infty"), 0x221E, DirectionBoth}, {QStringLiteral("leq"), 0x2264, DirectionBoth}, {QStringLiteral("geq"), 0x2265, DirectionBoth}, {QStringLiteral("lneq"), 0x2268, DirectionBoth}, {QStringLiteral("gneq"), 0x2269, DirectionBoth}, {QStringLiteral("ll"), 0x226A, DirectionBoth}, {QStringLiteral("gg"), 0x226B, DirectionBoth}, {QStringLiteral("nless"), 0x226E, DirectionBoth}, {QStringLiteral("ngtr"), 0x226F, DirectionBoth}, {QStringLiteral("nleq"), 0x2270, DirectionBoth}, {QStringLiteral("ngeq"), 0x2271, DirectionBoth}, {QStringLiteral("subset"), 0x2282, DirectionBoth}, {QStringLiteral("supset"), 0x2283, DirectionBoth}, {QStringLiteral("subseteq"), 0x2286, DirectionBoth}, {QStringLiteral("supseteq"), 0x2287, DirectionBoth}, {QStringLiteral("nsubseteq"), 0x2288, DirectionBoth}, {QStringLiteral("nsupseteq"), 0x2289, DirectionBoth}, {QStringLiteral("subsetneq"), 0x228A, DirectionBoth}, {QStringLiteral("supsetneq"), 0x228A, DirectionBoth}, {QStringLiteral("Subset"), 0x22D0, DirectionBoth}, {QStringLiteral("Supset"), 0x22D1, DirectionBoth}, {QStringLiteral("lll"), 0x22D8, DirectionBoth}, {QStringLiteral("ggg"), 0x22D9, DirectionBoth}, {QStringLiteral("top"), 0x22A4, DirectionBoth}, {QStringLiteral("bot"), 0x22A5, DirectionBoth}, }; /** * This data structure holds commands representing a single * character. For example, it maps \AA to A with a ring (Nordic * letter) and back. The structure is a table with two columns: * (1) the command's name without a backslash (in the example * before the 'AA') (2) the Unicode character described by a * hexcode. */ static const struct EncoderLaTeXCharacterCommand { const QString command; const ushort unicode; const EncoderLaTeXCommandDirection direction; } encoderLaTeXCharacterCommands[] = { {QStringLiteral("textexclamdown"), 0x00A1, DirectionCommandToUnicode}, {QStringLiteral("textcent"), 0x00A2, DirectionBoth}, {QStringLiteral("pounds"), 0x00A3, DirectionBoth}, {QStringLiteral("textsterling"), 0x00A3, DirectionBoth}, /** 0x00A4 */ {QStringLiteral("textyen"), 0x00A5, DirectionBoth}, {QStringLiteral("textbrokenbar"), 0x00A6, DirectionBoth}, {QStringLiteral("S"), 0x00A7, DirectionBoth}, {QStringLiteral("textsection"), 0x00A7, DirectionBoth}, /** 0x00A8 */ {QStringLiteral("copyright"), 0x00A9, DirectionBoth}, {QStringLiteral("textcopyright"), 0x00A9, DirectionBoth}, {QStringLiteral("textordfeminine"), 0x00AA, DirectionBoth}, {QStringLiteral("guillemotleft"), 0x00AB, DirectionCommandToUnicode}, {QStringLiteral("textflqq"), 0x00AB, DirectionCommandToUnicode}, {QStringLiteral("flqq"), 0x00AB, DirectionBoth}, /** 0x00AC */ /** 0x00AD */ {QStringLiteral("textregistered"), 0x00AE, DirectionBoth}, /** 0x00AF */ {QStringLiteral("textdegree"), 0x00B0, DirectionBoth}, {QStringLiteral("textpm"), 0x00B1, DirectionBoth}, {QStringLiteral("textplusminus"), 0x00B1, DirectionCommandToUnicode}, /** 0x00B2 */ /** 0x00B3 */ /** 0x00B4 */ {QStringLiteral("textmu"), 0x00B5, DirectionBoth}, {QStringLiteral("textparagraph"), 0x00B6, DirectionBoth}, {QStringLiteral("textpilcrow"), 0x00B6, DirectionBoth}, {QStringLiteral("textperiodcentered"), 0x00B7, DirectionCommandToUnicode}, {QStringLiteral("textcdot"), 0x00B7, DirectionBoth}, {QStringLiteral("textcentereddot"), 0x00B7, DirectionCommandToUnicode}, /** 0x00B8 */ /** 0x00B9 */ {QStringLiteral("textordmasculine"), 0x00BA, DirectionBoth}, {QStringLiteral("guillemotright"), 0x00BB, DirectionCommandToUnicode}, {QStringLiteral("textfrqq"), 0x00BB, DirectionCommandToUnicode}, {QStringLiteral("frqq"), 0x00BB, DirectionBoth}, {QStringLiteral("textonequarter"), 0x00BC, DirectionBoth}, {QStringLiteral("textonehalf"), 0x00BD, DirectionBoth}, {QStringLiteral("textthreequarters"), 0x00BE, DirectionBoth}, {QStringLiteral("textquestiondown"), 0x00BF, DirectionCommandToUnicode}, // TODO /// recommended to write ?` instead of \textquestiondown {QStringLiteral("AA"), 0x00C5, DirectionBoth}, {QStringLiteral("AE"), 0x00C6, DirectionBoth}, {QStringLiteral("DH"), 0x00D0, DirectionBoth}, {QStringLiteral("texttimes"), 0x00D7, DirectionBoth}, {QStringLiteral("textmultiply"), 0x00D7, DirectionCommandToUnicode}, {QStringLiteral("O"), 0x00D8, DirectionBoth}, {QStringLiteral("TH"), 0x00DE, DirectionBoth}, {QStringLiteral("Thorn"), 0x00DE, DirectionCommandToUnicode}, {QStringLiteral("textThorn"), 0x00DE, DirectionCommandToUnicode}, {QStringLiteral("ss"), 0x00DF, DirectionBoth}, {QStringLiteral("aa"), 0x00E5, DirectionBoth}, {QStringLiteral("ae"), 0x00E6, DirectionBoth}, {QStringLiteral("dh"), 0x00F0, DirectionBoth}, {QStringLiteral("textdiv"), 0x00F7, DirectionBoth}, {QStringLiteral("textdivide"), 0x00F7, DirectionCommandToUnicode}, {QStringLiteral("o"), 0x00F8, DirectionBoth}, {QStringLiteral("th"), 0x00FE, DirectionBoth}, {QStringLiteral("textthorn"), 0x00FE, DirectionCommandToUnicode}, {QStringLiteral("textthornvari"), 0x00FE, DirectionCommandToUnicode}, {QStringLiteral("textthornvarii"), 0x00FE, DirectionCommandToUnicode}, {QStringLiteral("textthornvariii"), 0x00FE, DirectionCommandToUnicode}, {QStringLiteral("textthornvariv"), 0x00FE, DirectionCommandToUnicode}, {QStringLiteral("Aogonek"), 0x0104, DirectionCommandToUnicode}, {QStringLiteral("aogonek"), 0x0105, DirectionCommandToUnicode}, {QStringLiteral("DJ"), 0x0110, DirectionBoth}, {QStringLiteral("dj"), 0x0111, DirectionBoth}, {QStringLiteral("textcrd"), 0x0111, DirectionCommandToUnicode}, {QStringLiteral("textHslash"), 0x0126, DirectionCommandToUnicode}, {QStringLiteral("textHbar"), 0x0126, DirectionCommandToUnicode}, {QStringLiteral("textcrh"), 0x0127, DirectionCommandToUnicode}, {QStringLiteral("texthbar"), 0x0127, DirectionCommandToUnicode}, {QStringLiteral("i"), 0x0131, DirectionBoth}, {QStringLiteral("IJ"), 0x0132, DirectionBoth}, {QStringLiteral("ij"), 0x0133, DirectionBoth}, {QStringLiteral("textkra"), 0x0138, DirectionCommandToUnicode}, {QStringLiteral("Lcaron"), 0x013D, DirectionCommandToUnicode}, {QStringLiteral("lcaron"), 0x013E, DirectionCommandToUnicode}, {QStringLiteral("L"), 0x0141, DirectionBoth}, {QStringLiteral("Lstroke"), 0x0141, DirectionCommandToUnicode}, {QStringLiteral("l"), 0x0142, DirectionBoth}, {QStringLiteral("lstroke"), 0x0142, DirectionCommandToUnicode}, {QStringLiteral("textbarl"), 0x0142, DirectionCommandToUnicode}, {QStringLiteral("NG"), 0x014A, DirectionBoth}, {QStringLiteral("ng"), 0x014B, DirectionBoth}, {QStringLiteral("OE"), 0x0152, DirectionBoth}, {QStringLiteral("oe"), 0x0153, DirectionBoth}, {QStringLiteral("Racute"), 0x0154, DirectionCommandToUnicode}, {QStringLiteral("racute"), 0x0155, DirectionCommandToUnicode}, {QStringLiteral("Sacute"), 0x015A, DirectionCommandToUnicode}, {QStringLiteral("sacute"), 0x015B, DirectionCommandToUnicode}, {QStringLiteral("Scedilla"), 0x015E, DirectionCommandToUnicode}, {QStringLiteral("scedilla"), 0x015F, DirectionCommandToUnicode}, {QStringLiteral("Scaron"), 0x0160, DirectionCommandToUnicode}, {QStringLiteral("scaron"), 0x0161, DirectionCommandToUnicode}, {QStringLiteral("Tcaron"), 0x0164, DirectionCommandToUnicode}, {QStringLiteral("tcaron"), 0x0165, DirectionCommandToUnicode}, {QStringLiteral("textTstroke"), 0x0166, DirectionCommandToUnicode}, {QStringLiteral("textTbar"), 0x0166, DirectionCommandToUnicode}, {QStringLiteral("textTslash"), 0x0166, DirectionCommandToUnicode}, {QStringLiteral("texttstroke"), 0x0167, DirectionCommandToUnicode}, {QStringLiteral("texttbar"), 0x0167, DirectionCommandToUnicode}, {QStringLiteral("texttslash"), 0x0167, DirectionCommandToUnicode}, {QStringLiteral("Zdotaccent"), 0x017B, DirectionCommandToUnicode}, {QStringLiteral("zdotaccent"), 0x017C, DirectionCommandToUnicode}, {QStringLiteral("Zcaron"), 0x017D, DirectionCommandToUnicode}, {QStringLiteral("zcaron"), 0x017E, DirectionCommandToUnicode}, {QStringLiteral("textlongs"), 0x017F, DirectionCommandToUnicode}, {QStringLiteral("textcrb"), 0x0180, DirectionCommandToUnicode}, {QStringLiteral("textBhook"), 0x0181, DirectionCommandToUnicode}, {QStringLiteral("texthausaB"), 0x0181, DirectionCommandToUnicode}, {QStringLiteral("textOopen"), 0x0186, DirectionCommandToUnicode}, {QStringLiteral("textChook"), 0x0187, DirectionCommandToUnicode}, {QStringLiteral("textchook"), 0x0188, DirectionCommandToUnicode}, {QStringLiteral("texthtc"), 0x0188, DirectionCommandToUnicode}, {QStringLiteral("textDafrican"), 0x0189, DirectionCommandToUnicode}, {QStringLiteral("textDhook"), 0x018A, DirectionCommandToUnicode}, {QStringLiteral("texthausaD"), 0x018A, DirectionCommandToUnicode}, {QStringLiteral("textEreversed"), 0x018E, DirectionCommandToUnicode}, {QStringLiteral("textrevE"), 0x018E, DirectionCommandToUnicode}, {QStringLiteral("textEopen"), 0x0190, DirectionCommandToUnicode}, {QStringLiteral("textFhook"), 0x0191, DirectionCommandToUnicode}, {QStringLiteral("textflorin"), 0x0192, DirectionBoth}, {QStringLiteral("textgamma"), 0x0194, DirectionCommandToUnicode}, {QStringLiteral("textGammaafrican"), 0x0194, DirectionCommandToUnicode}, {QStringLiteral("hv"), 0x0195, DirectionCommandToUnicode}, {QStringLiteral("texthvlig"), 0x0195, DirectionCommandToUnicode}, {QStringLiteral("textIotaafrican"), 0x0196, DirectionCommandToUnicode}, {QStringLiteral("textKhook"), 0x0198, DirectionCommandToUnicode}, {QStringLiteral("texthausaK"), 0x0198, DirectionCommandToUnicode}, {QStringLiteral("texthtk"), 0x0199, DirectionCommandToUnicode}, {QStringLiteral("textkhook"), 0x0199, DirectionCommandToUnicode}, {QStringLiteral("textbarl"), 0x019A, DirectionCommandToUnicode}, {QStringLiteral("textcrlambda"), 0x019B, DirectionCommandToUnicode}, {QStringLiteral("textNhookleft"), 0x019D, DirectionCommandToUnicode}, {QStringLiteral("textnrleg"), 0x019E, DirectionCommandToUnicode}, {QStringLiteral("textPUnrleg"), 0x019E, DirectionCommandToUnicode}, {QStringLiteral("Ohorn"), 0x01A0, DirectionCommandToUnicode}, {QStringLiteral("ohorn"), 0x01A1, DirectionCommandToUnicode}, {QStringLiteral("textPhook"), 0x01A4, DirectionCommandToUnicode}, {QStringLiteral("texthtp"), 0x01A5, DirectionCommandToUnicode}, {QStringLiteral("textphook"), 0x01A5, DirectionCommandToUnicode}, {QStringLiteral("ESH"), 0x01A9, DirectionCommandToUnicode}, {QStringLiteral("textEsh"), 0x01A9, DirectionCommandToUnicode}, {QStringLiteral("textlooptoprevsh"), 0x01AA, DirectionCommandToUnicode}, {QStringLiteral("textlhtlongi"), 0x01AA, DirectionCommandToUnicode}, {QStringLiteral("textlhookt"), 0x01AB, DirectionCommandToUnicode}, {QStringLiteral("textThook"), 0x01AC, DirectionCommandToUnicode}, {QStringLiteral("textthook"), 0x01AD, DirectionCommandToUnicode}, {QStringLiteral("texthtt"), 0x01AD, DirectionCommandToUnicode}, {QStringLiteral("textTretroflexhook"), 0x01AE, DirectionCommandToUnicode}, {QStringLiteral("Uhorn"), 0x01AF, DirectionCommandToUnicode}, {QStringLiteral("uhorn"), 0x01B0, DirectionCommandToUnicode}, {QStringLiteral("textupsilon"), 0x01B1, DirectionCommandToUnicode}, {QStringLiteral("textVhook"), 0x01B2, DirectionCommandToUnicode}, {QStringLiteral("textYhook"), 0x01B3, DirectionCommandToUnicode}, {QStringLiteral("textvhook"), 0x01B4, DirectionCommandToUnicode}, {QStringLiteral("Zbar"), 0x01B5, DirectionCommandToUnicode}, {QStringLiteral("zbar"), 0x01B6, DirectionCommandToUnicode}, {QStringLiteral("EZH"), 0x01B7, DirectionCommandToUnicode}, {QStringLiteral("textEzh"), 0x01B7, DirectionCommandToUnicode}, {QStringLiteral("LJ"), 0x01C7, DirectionCommandToUnicode}, {QStringLiteral("Lj"), 0x01C8, DirectionCommandToUnicode}, {QStringLiteral("lj"), 0x01C9, DirectionCommandToUnicode}, {QStringLiteral("NJ"), 0x01CA, DirectionCommandToUnicode}, {QStringLiteral("Nj"), 0x01CB, DirectionCommandToUnicode}, {QStringLiteral("nj"), 0x01CC, DirectionCommandToUnicode}, {QStringLiteral("DZ"), 0x01F1, DirectionCommandToUnicode}, {QStringLiteral("Dz"), 0x01F2, DirectionCommandToUnicode}, {QStringLiteral("dz"), 0x01F3, DirectionCommandToUnicode}, {QStringLiteral("HV"), 0x01F6, DirectionCommandToUnicode}, {QStringLiteral("j"), 0x0237, DirectionBoth}, {QStringLiteral("ldots"), 0x2026, DirectionBoth}, {QStringLiteral("grqq"), 0x201C, DirectionCommandToUnicode}, {QStringLiteral("textquotedblleft"), 0x201C, DirectionCommandToUnicode}, {QStringLiteral("rqq"), 0x201D, DirectionCommandToUnicode}, {QStringLiteral("textquotedblright"), 0x201D, DirectionCommandToUnicode}, {QStringLiteral("glqq"), 0x201E, DirectionCommandToUnicode}, {QStringLiteral("SS"), 0x1E9E, DirectionBoth}, {QStringLiteral("textendash"), 0x2013, DirectionCommandToUnicode}, {QStringLiteral("textemdash"), 0x2014, DirectionCommandToUnicode}, {QStringLiteral("textquoteleft"), 0x2018, DirectionCommandToUnicode}, {QStringLiteral("lq"), 0x2018, DirectionBoth}, {QStringLiteral("textquoteright"), 0x2019, DirectionCommandToUnicode}, {QStringLiteral("rq"), 0x2019, DirectionBoth}, ///< tricky one: 'r' is a valid modifier {QStringLiteral("quotesinglbase"), 0x201A, DirectionBoth}, {QStringLiteral("quotedblbase"), 0x201E, DirectionBoth}, {QStringLiteral("textbullet "), 0x2022, DirectionBoth}, {QStringLiteral("guilsinglleft "), 0x2039, DirectionBoth}, {QStringLiteral("guilsinglright "), 0x203A, DirectionBoth}, {QStringLiteral("textcelsius"), 0x2103, DirectionBoth}, {QStringLiteral("textleftarrow"), 0x2190, DirectionBoth}, {QStringLiteral("textuparrow"), 0x2191, DirectionBoth}, {QStringLiteral("textrightarrow"), 0x2192, DirectionBoth}, {QStringLiteral("textdownarrow"), 0x2193, DirectionBoth} }; const QChar EncoderLaTeX::encoderLaTeXProtectedSymbols[] = {QLatin1Char('#'), QLatin1Char('&'), QLatin1Char('%')}; const QChar EncoderLaTeX::encoderLaTeXProtectedTextOnlySymbols[] = {QLatin1Char('_')}; /** * This data structure holds LaTeX symbol sequences (without * any backslash) that represent a single Unicode character. * For example, it maps --- to an 'em dash' and back. * The structure is a table with two columns: (1) the symbol * sequence (in the example before the '---') (2) the Unicode * character described by a hexcode. */ static const struct EncoderLaTeXSymbolSequence { const QString latex; const ushort unicode; const EncoderLaTeXCommandDirection direction; } encoderLaTeXSymbolSequences[] = { {QStringLiteral("!`"), 0x00A1, DirectionBoth}, {QStringLiteral("\"<"), 0x00AB, DirectionBoth}, {QStringLiteral("\">"), 0x00BB, DirectionBoth}, {QStringLiteral("?`"), 0x00BF, DirectionBoth}, {QStringLiteral("---"), 0x2014, DirectionBoth}, ///< --- must come before -- {QStringLiteral("--"), 0x2013, DirectionBoth}, {QStringLiteral("``"), 0x201C, DirectionBoth}, {QStringLiteral("''"), 0x201D, DirectionBoth}, {QStringLiteral("ff"), 0xFB00, DirectionUnicodeToCommand}, {QStringLiteral("fi"), 0xFB01, DirectionUnicodeToCommand}, {QStringLiteral("fl"), 0xFB02, DirectionUnicodeToCommand}, {QStringLiteral("ffi"), 0xFB03, DirectionUnicodeToCommand}, {QStringLiteral("ffl"), 0xFB04, DirectionUnicodeToCommand}, {QStringLiteral("ft"), 0xFB05, DirectionUnicodeToCommand}, {QStringLiteral("st"), 0xFB06, DirectionUnicodeToCommand} }; EncoderLaTeX::EncoderLaTeX() : Encoder() { /// Initialize lookup table with NULL pointers for (int i = 0; i < lookupTableNumModifiers; ++i) lookupTable[i] = nullptr; int lookupTableCount = 0; /// Go through all table rows of encoderLaTeXEscapedCharacters for (const EncoderLaTeXEscapedCharacter &encoderLaTeXEscapedCharacter : encoderLaTeXEscapedCharacters) { /// Check if this row's modifier is already known bool knownModifier = false; int j; for (j = lookupTableCount - 1; j >= 0; --j) { knownModifier |= lookupTable[j]->modifier == encoderLaTeXEscapedCharacter.modifier; if (knownModifier) break; } if (!knownModifier) { /// Ok, this row's modifier appeared for the first time, /// therefore initialize memory structure, i.e. row in lookupTable lookupTable[lookupTableCount] = new EncoderLaTeXEscapedCharacterLookupTableRow; lookupTable[lookupTableCount]->modifier = encoderLaTeXEscapedCharacter.modifier; /// If no special character is known for a letter+modifier /// combination, fall back using the ASCII character only for (ushort k = 0; k < 26; ++k) { lookupTable[lookupTableCount]->unicode[k] = QChar(QLatin1Char('A').unicode() + k); lookupTable[lookupTableCount]->unicode[k + 26] = QChar(QLatin1Char('a').unicode() + k); } for (ushort k = 0; k < 10; ++k) lookupTable[lookupTableCount]->unicode[k + 52] = QChar(QLatin1Char('0').unicode() + k); j = lookupTableCount; ++lookupTableCount; } /// Add the letter as of the current row in encoderLaTeXEscapedCharacters /// into Unicode char array in the current modifier's row in the lookup table. int pos = -1; if ((pos = asciiLetterOrDigitToPos(encoderLaTeXEscapedCharacter.letter)) >= 0) lookupTable[j]->unicode[pos] = QChar(encoderLaTeXEscapedCharacter.unicode); else qCWarning(LOG_KBIBTEX_IO) << "Cannot handle letter " << encoderLaTeXEscapedCharacter.letter; } } EncoderLaTeX::~EncoderLaTeX() { /// Clean-up memory for (int i = lookupTableNumModifiers - 1; i >= 0; --i) if (lookupTable[i] != nullptr) delete lookupTable[i]; } QString EncoderLaTeX::decode(const QString &input) const { const int len = input.length(); QString output; output.reserve(len); bool inMathMode = false; int cachedAsciiLetterOrDigitToPos = -1; /// Go through input char by char for (int i = 0; i < len; ++i) { /** * Repeatedly check if input data contains a verbatim command * like \url{...}, copy it to output, and update i to point * to the next character after the verbatim command. */ while (testAndCopyVerbatimCommands(input, i, output)); if (i >= len) break; /// Fetch current input char const QChar c = input[i]; if (c == QLatin1Char('{')) { /// First case: An opening curly bracket, /// which is harmless (see else case), unless ... if (i < len - 3 && input[i + 1] == QLatin1Char('\\')) { /// ... it continues with a backslash /// Next, check if there follows a modifier after the backslash /// For example an quotation mark as used in {\"a} const int lookupTablePos = modifierInLookupTable(input[i + 2].toLatin1()); /// Check for spaces between modifier and character, for example /// like {\H o} int skipSpaces = 0; while (i + 3 + skipSpaces < len && input[i + 3 + skipSpaces] == QLatin1Char(' ') && skipSpaces < 16) ++skipSpaces; + bool found = false; if (lookupTablePos >= 0 && i + skipSpaces < len - 4 && (cachedAsciiLetterOrDigitToPos = asciiLetterOrDigitToPos(input[i + 3 + skipSpaces])) >= 0 && input[i + 4 + skipSpaces] == QLatin1Char('}')) { /// If we found a modifier which is followed by /// a letter followed by a closing curly bracket, /// we are looking at something like {\"A} /// Use lookup table to see what Unicode char this /// represents const QChar unicodeLetter = lookupTable[lookupTablePos]->unicode[cachedAsciiLetterOrDigitToPos]; - if (unicodeLetter.unicode() < 127) { - /// This combination of modifier and letter is not known, - /// so try to preserve it - output.append(input.midRef(i, 5 + skipSpaces)); - qCWarning(LOG_KBIBTEX_IO) << "Don't know how to translate this into Unicode: " << input.mid(i, 5 + skipSpaces); - } else + if (unicodeLetter.unicode() >= 127) { output.append(unicodeLetter); - /// Step over those additional characters - i += 4 + skipSpaces; + /// Step over those additional characters + i += 4 + skipSpaces; + found = true; + } + /// Don't print any warnings yet, as this if-case may got triggered by e.g. \mu + /// ('m' is a potential modifier, yet \mu should be recognized as Greek letter later) } else if (lookupTablePos >= 0 && i + skipSpaces < len - 5 && input[i + 3 + skipSpaces] == QLatin1Char('\\') && isIJ(input[i + 4 + skipSpaces]) && input[i + 5 + skipSpaces] == QLatin1Char('}')) { /// This is the case for {\'\i} or alike. - bool found = false; for (const DotlessIJCharacter &dotlessIJCharacter : dotlessIJCharacters) if (dotlessIJCharacter.letter == input[i + 4 + skipSpaces] && dotlessIJCharacter.modifier == input[i + 2]) { output.append(QChar(dotlessIJCharacter.unicode)); - i += 5 + skipSpaces; found = true; break; } - if (!found) + if (!found) { + /// This combination of modifier and letter is not known, + /// so try to preserve it + output.append(input.midRef(i, 6 + skipSpaces)); qCWarning(LOG_KBIBTEX_IO) << "Cannot interpret BACKSLASH" << input[i + 2] << "BACKSLASH" << input[i + 4 + skipSpaces]; + } + /// Step over those additional characters + i += 5 + skipSpaces; + found = true; } else if (lookupTablePos >= 0 && i + skipSpaces < len - 6 && input[i + 3 + skipSpaces] == QLatin1Char('{') && (cachedAsciiLetterOrDigitToPos = asciiLetterOrDigitToPos(input[i + 4 + skipSpaces])) >= 0 && input[i + 5 + skipSpaces] == QLatin1Char('}') && input[i + 6 + skipSpaces] == QLatin1Char('}')) { /// If we found a modifier which is followed by /// an opening curly bracket followed by a letter /// followed by two closing curly brackets, /// we are looking at something like {\"{A}} /// Use lookup table to see what Unicode char this /// represents const QChar unicodeLetter = lookupTable[lookupTablePos]->unicode[cachedAsciiLetterOrDigitToPos]; if (unicodeLetter.unicode() < 127) { /// This combination of modifier and letter is not known, /// so try to preserve it output.append(input.midRef(i, 7 + skipSpaces)); + qCDebug(LOG_KBIBTEX_IO) << input.mid(qMax(0, i - 5), 10); qCWarning(LOG_KBIBTEX_IO) << "Don't know how to translate this into Unicode: " << input.mid(i, 7 + skipSpaces); } else output.append(unicodeLetter); /// Step over those additional characters i += 6 + skipSpaces; + found = true; } else if (lookupTablePos >= 0 && i + skipSpaces < len - 7 && input[i + 3 + skipSpaces] == QLatin1Char('{') && input[i + 4 + skipSpaces] == QLatin1Char('\\') && isIJ(input[i + 5 + skipSpaces]) && input[i + 6 + skipSpaces] == QLatin1Char('}') && input[i + 7 + skipSpaces] == QLatin1Char('}')) { /// This is the case for {\'{\i}} or alike. - bool found = false; for (const DotlessIJCharacter &dotlessIJCharacter : dotlessIJCharacters) if (dotlessIJCharacter.letter == input[i + 5 + skipSpaces] && dotlessIJCharacter.modifier == input[i + 2]) { output.append(QChar(dotlessIJCharacter.unicode)); - i += 7 + skipSpaces; found = true; break; } - if (!found) + if (!found) { + /// This combination of modifier and letter is not known, + /// so try to preserve it + output.append(input.midRef(i, 8 + skipSpaces)); + qCDebug(LOG_KBIBTEX_IO) << input.mid(qMax(0, i - 5), 10); qCWarning(LOG_KBIBTEX_IO) << "Cannot interpret BACKSLASH" << input[i + 2] << "BACKSLASH {" << input[i + 5 + skipSpaces] << "}"; - } else { - /// Now, the case of something like {\AA} is left - /// to check for + } + /// Step over those additional characters + i += 7 + skipSpaces; + found = true; + } + + if (!found) { + /// Now, either some two-letter command like {\AA} or {\mu} is left + /// to check for or there is completely unsuppored command sequence, + /// but which then should be kept unmodified const QString alpha = readAlphaCharacters(input, i + 2); int nextPosAfterAlpha = i + 2 + alpha.size(); if (nextPosAfterAlpha < input.length() && input[nextPosAfterAlpha] == QLatin1Char('}')) { - /// We are dealing actually with a string like {\AA} - /// Check which command it is, - /// insert corresponding Unicode character - bool foundCommand = false; + /// We may deal with a string like {\AA} or {\mu} + /// Check which command it is, then insert corresponding Unicode character + found = false; for (const EncoderLaTeXCharacterCommand &encoderLaTeXCharacterCommand : encoderLaTeXCharacterCommands) { if (encoderLaTeXCharacterCommand.command == alpha) { output.append(QChar(encoderLaTeXCharacterCommand.unicode)); - foundCommand = true; + found = true; break; } } /// Check if a math command has been read, /// like \subset /// (automatically skipped if command was found above) - for (const MathCommand &mathCommand : mathCommands) { - if (mathCommand.command == alpha) { - if (output.endsWith(QStringLiteral("\\ensuremath"))) { - /// Remove "\ensuremath" right before this math command, - /// it will be re-inserted when exporting/saving the document - output = output.left(output.length() - 11); + if (!found) + for (const MathCommand &mathCommand : mathCommands) { + if (mathCommand.command == alpha) { + output.append(QChar(mathCommand.unicode)); + found = true; + break; } - output.append(QChar(mathCommand.unicode)); - foundCommand = true; - break; } - } - if (foundCommand) - i = nextPosAfterAlpha; - else { - /// Dealing with a string line {\noopsort} + if (!found) { + /// Dealing with a string like {\noopsort} /// (see BibTeX documentation where this gets explained) - output.append(c); + output.append(input.midRef(i, 3 + alpha.size())); } + i = nextPosAfterAlpha; } else { /// Could be something like {\tt filename.txt} /// Keep it as it is output.append(c); } } } else { /// Nothing special, copy input char to output output.append(c); } } else if (c == QLatin1Char('\\') && i < len - 1) { /// Second case: A backslash as in \"o /// Sometimes such command are closed with just {}, /// so remember if to check for that bool checkForExtraCurlyAtEnd = false; /// Check if there follows a modifier after the backslash /// For example an quotation mark as used in \"a const int lookupTablePos = modifierInLookupTable(input[i + 1]); /// Check for spaces between modifier and character, for example /// like \H o int skipSpaces = 0; while (i + 2 + skipSpaces < len && input[i + 2 + skipSpaces] == QLatin1Char(' ') && skipSpaces < 16) ++skipSpaces; + bool found = false; if (lookupTablePos >= 0 && i + skipSpaces <= len - 3 && (cachedAsciiLetterOrDigitToPos = asciiLetterOrDigitToPos(input[i + 2 + skipSpaces])) >= 0 && (i + skipSpaces == len - 3 || input[i + 1] == QLatin1Char('"') || input[i + 1] == QLatin1Char('\'') || input[i + 1] == QLatin1Char('`') || input[i + 1] == QLatin1Char('='))) { // TODO more special cases? /// We found a special modifier which is followed by /// a letter followed by normal text without any /// delimiter, so we are looking at something like /// \"u inside Kr\"uger /// Use lookup table to see what Unicode char this /// represents const QChar unicodeLetter = lookupTable[lookupTablePos]->unicode[cachedAsciiLetterOrDigitToPos]; - if (unicodeLetter.unicode() < 127) { - /// This combination of modifier and letter is not known, - /// so try to preserve it - output.append(input.midRef(i, 3 + skipSpaces)); - qCWarning(LOG_KBIBTEX_IO) << "Don't know how to translate this into Unicode: " << input.mid(i, 3 + skipSpaces); - } else + if (unicodeLetter.unicode() > 127) { output.append(unicodeLetter); - /// Step over those additional characters - i += 2 + skipSpaces; - } else if (lookupTablePos >= 0 && i + skipSpaces <= len - 3 && i + skipSpaces <= len - 3 && (cachedAsciiLetterOrDigitToPos = asciiLetterOrDigitToPos(input[i + 2 + skipSpaces])) >= 0 && (i + skipSpaces == len - 3 || input[i + 3 + skipSpaces] == QLatin1Char('}') || input[i + 3 + skipSpaces] == QLatin1Char('{') || input[i + 3 + skipSpaces] == QLatin1Char(' ') || input[i + 3 + skipSpaces] == QLatin1Char('\t') || input[i + 3 + skipSpaces] == QLatin1Char('\\') || input[i + 3 + skipSpaces] == QLatin1Char('\r') || input[i + 3 + skipSpaces] == QLatin1Char('\n'))) { + /// Step over those additional characters + i += 2 + skipSpaces; + found = true; + } + /// Don't print any warnings yet, as this if-case may got triggered by e.g. \mu + /// ('m' is a potential modifier, yet \mu should be recognized as Greek letter later) + } else if (lookupTablePos >= 0 && i + skipSpaces <= len - 3 && (cachedAsciiLetterOrDigitToPos = asciiLetterOrDigitToPos(input[i + 2 + skipSpaces])) >= 0 && (i + skipSpaces == len - 3 || input[i + 3 + skipSpaces] == QLatin1Char('}') || input[i + 3 + skipSpaces] == QLatin1Char('{') || input[i + 3 + skipSpaces] == QLatin1Char(' ') || input[i + 3 + skipSpaces] == QLatin1Char('\t') || input[i + 3 + skipSpaces] == QLatin1Char('\\') || input[i + 3 + skipSpaces] == QLatin1Char('\r') || input[i + 3 + skipSpaces] == QLatin1Char('\n'))) { /// We found a modifier which is followed by /// a letter followed by a command delimiter such /// as a whitespace, so we are looking at something - /// like \"u followed by a space + /// like \"u followed by a space or another delimiter /// Use lookup table to see what Unicode char this /// represents const QChar unicodeLetter = lookupTable[lookupTablePos]->unicode[cachedAsciiLetterOrDigitToPos]; - if (unicodeLetter.unicode() < 127) { - /// This combination of modifier and letter is not known, - /// so try to preserve it - output.append(input.midRef(i, 3)); - qCWarning(LOG_KBIBTEX_IO) << "Don't know how to translate this into Unicode: " << input.mid(i, 3); - } else + if (unicodeLetter.unicode() >= 127) { output.append(unicodeLetter); - /// Step over those additional characters - i += 2 + skipSpaces; - - /// Now, after this command, a whitespace may follow - /// which has to get "eaten" as it acts as a command - /// delimiter - if (input[i + 1] == QLatin1Char(' ') || input[i + 1] == QLatin1Char('\r') || input[i + 1] == QLatin1Char('\n')) - ++i; - else { - /// If no whitespace follows, still - /// check for extra curly brackets - checkForExtraCurlyAtEnd = true; + /// Step over those additional characters + i += 2 + skipSpaces; + found = true; + + if (input[i + 1] != QLatin1Char(' ') && input[i + 1] != QLatin1Char('\r') && input[i + 1] != QLatin1Char('\n')) { + /// If no whitespace follows, still + /// check for extra curly brackets + checkForExtraCurlyAtEnd = true; + } } + /// Don't print any warnings yet, as this if-case may got triggered by e.g. \mu + /// ('m' is a potential modifier, yet \mu should be recognized as Greek letter later) } else if (lookupTablePos >= 0 && i + skipSpaces < len - 4 && input[i + 2 + skipSpaces] == QLatin1Char('{') && (cachedAsciiLetterOrDigitToPos = asciiLetterOrDigitToPos(input[i + 3 + skipSpaces])) >= 0 && input[i + 4 + skipSpaces] == QLatin1Char('}')) { /// We found a modifier which is followed by an opening /// curly bracket followed a letter followed by a closing /// curly bracket, so we are looking at something /// like \"{u} /// Use lookup table to see what Unicode char this /// represents const QChar unicodeLetter = lookupTable[lookupTablePos]->unicode[cachedAsciiLetterOrDigitToPos]; if (unicodeLetter.unicode() < 127) { /// This combination of modifier and letter is not known, /// so try to preserve it output.append(input.midRef(i, 5 + skipSpaces)); + qCDebug(LOG_KBIBTEX_IO) << input.mid(qMax(0, i - 5), 10); qCWarning(LOG_KBIBTEX_IO) << "Don't know how to translate this into Unicode: " << input.mid(i, 5 + skipSpaces); } else output.append(unicodeLetter); /// Step over those additional characters i += 4 + skipSpaces; + found = true; } else if (lookupTablePos >= 0 && i + skipSpaces < len - 3 && input[i + 2 + skipSpaces] == QLatin1Char('\\') && isIJ(input[i + 3 + skipSpaces])) { /// This is the case for \'\i or alike. - bool found = false; for (const DotlessIJCharacter &dotlessIJCharacter : dotlessIJCharacters) if (dotlessIJCharacter.letter == input[i + 3 + skipSpaces] && dotlessIJCharacter.modifier == input[i + 1]) { output.append(QChar(dotlessIJCharacter.unicode)); - i += 3 + skipSpaces; found = true; break; } - if (!found) + if (!found) { + /// This combination of modifier and letter is not known, + /// so try to preserve it + output.append(input.midRef(i, 4 + skipSpaces)); qCWarning(LOG_KBIBTEX_IO) << "Cannot interpret BACKSLASH" << input[i + 1] << "BACKSLASH" << input[i + 3 + skipSpaces]; + } + /// Step over those additional characters + i += 3 + skipSpaces; + found = true; } else if (lookupTablePos >= 0 && i + skipSpaces < len - 5 && input[i + 2 + skipSpaces] == QLatin1Char('{') && input[i + 3 + skipSpaces] == QLatin1Char('\\') && isIJ(input[i + 4 + skipSpaces]) && input[i + 5 + skipSpaces] == QLatin1Char('}')) { /// This is the case for \'{\i} or alike. - bool found = false; for (const DotlessIJCharacter &dotlessIJCharacter : dotlessIJCharacters) if (dotlessIJCharacter.letter == input[i + 4 + skipSpaces] && dotlessIJCharacter.modifier == input[i + 1]) { output.append(QChar(dotlessIJCharacter.unicode)); - i += 5 + skipSpaces; found = true; break; } - if (!found) + if (!found) { + /// This combination of modifier and letter is not known, + /// so try to preserve it + output.append(input.midRef(i, 6 + skipSpaces)); qCWarning(LOG_KBIBTEX_IO) << "Cannot interpret BACKSLASH" << input[i + 1] << "BACKSLASH {" << input[i + 4 + skipSpaces] << "}"; - } else if (i < len - 1) { + } + /// Step over those additional characters + i += 5 + skipSpaces; + found = true; + } + + if (!found && i < len - 1) { /// Now, the case of something like \AA is left /// to check for const QString alpha = readAlphaCharacters(input, i + 1); - int nextPosAfterAlpha = i + 1 + alpha.size(); + int nextPosAfterAlpha = i + alpha.size(); if (alpha.size() >= 1 && alpha.at(0).isLetter()) { /// We are dealing actually with a string like \AA or \o /// Check which command it is, /// insert corresponding Unicode character - bool foundCommand = false; for (const EncoderLaTeXCharacterCommand &encoderLaTeXCharacterCommand : encoderLaTeXCharacterCommands) { if (encoderLaTeXCharacterCommand.command == alpha) { output.append(QChar(encoderLaTeXCharacterCommand.unicode)); - foundCommand = true; + found = true; break; } } - if (foundCommand) { + /// Check if a math command has been read, + /// like \subset + /// (automatically skipped if command was found above) + if (!found) + for (const MathCommand &mathCommand : mathCommands) { + if (mathCommand.command == alpha) { + output.append(QChar(mathCommand.unicode)); + found = true; + break; + } + } + + if (found) { /// Now, after a command, a whitespace may follow /// which has to get "eaten" as it acts as a command /// delimiter - if (nextPosAfterAlpha < input.length() && (input[nextPosAfterAlpha] == QLatin1Char(' ') || input[nextPosAfterAlpha] == QLatin1Char('\r') || input[nextPosAfterAlpha] == QLatin1Char('\n'))) + if (nextPosAfterAlpha + 1 < input.length() && (input[nextPosAfterAlpha + 1] == QLatin1Char(' ') || input[nextPosAfterAlpha + 1] == QLatin1Char('\r') || input[nextPosAfterAlpha + 1] == QLatin1Char('\n'))) ++nextPosAfterAlpha; else { /// If no whitespace follows, still /// check for extra curly brackets checkForExtraCurlyAtEnd = true; } - i = nextPosAfterAlpha - 1; } else { /// No command found? Just copy input char to output - output.append(c); + output.append(input.midRef(i, 1 + alpha.size())); } + i = nextPosAfterAlpha; } else { /// Maybe we are dealing with a string like \& or \_ /// Check which command it is - bool foundCommand = false; + found = false; for (const QChar &encoderLaTeXProtectedSymbol : encoderLaTeXProtectedSymbols) if (encoderLaTeXProtectedSymbol == input[i + 1]) { output.append(encoderLaTeXProtectedSymbol); - foundCommand = true; + found = true; break; } - if (!foundCommand && !inMathMode) + if (!found && !inMathMode) for (const QChar &encoderLaTeXProtectedTextOnlySymbol : encoderLaTeXProtectedTextOnlySymbols) if (encoderLaTeXProtectedTextOnlySymbol == input[i + 1]) { output.append(encoderLaTeXProtectedTextOnlySymbol); - foundCommand = true; + found = true; break; } /// If command has been found, nothing has to be done /// except for hopping over this backslash - if (foundCommand) + if (found) ++i; else if (i < len - 1 && input[i + 1] == QChar(0x002c /* comma */)) { /// Found a thin space: \, /// Replacing Latex-like thin space with Unicode thin space output.append(QChar(0x2009)); - // foundCommand = true; ///< only necessary if more tests will follow in the future + // found = true; ///< only necessary if more tests will follow in the future ++i; } else { /// Nothing special, copy input char to output output.append(c); } } - } else { + } else if (!found) { /// Nothing special, copy input char to output output.append(c); } /// Finally, check if there may be extra curly brackets /// like {} and hop over them if (checkForExtraCurlyAtEnd && i < len - 2 && input[i + 1] == QLatin1Char('{') && input[i + 2] == QLatin1Char('}')) i += 2; } else { /// So far, no opening curly bracket and no backslash /// May still be a symbol sequence like --- bool isSymbolSequence = false; /// Go through all known symbol sequnces for (const EncoderLaTeXSymbolSequence &encoderLaTeXSymbolSequence : encoderLaTeXSymbolSequences) { /// First, check if read input character matches beginning of symbol sequence /// and input buffer as enough characters left to potentially contain /// symbol sequence const int latexLen = encoderLaTeXSymbolSequence.latex.length(); if ((encoderLaTeXSymbolSequence.direction & DirectionCommandToUnicode) && encoderLaTeXSymbolSequence.latex[0] == c && i <= len - latexLen) { /// Now actually check if symbol sequence is in input buffer isSymbolSequence = true; for (int p = 1; isSymbolSequence && p < latexLen; ++p) isSymbolSequence &= encoderLaTeXSymbolSequence.latex[p] == input[i + p]; if (isSymbolSequence) { /// Ok, found sequence: insert Unicode character in output /// and hop over sequence in input buffer output.append(QChar(encoderLaTeXSymbolSequence.unicode)); i += encoderLaTeXSymbolSequence.latex.length() - 1; break; } } } if (!isSymbolSequence) { /// No symbol sequence found, so just copy input to output output.append(c); /// Still, check if input character is a dollar sign /// without a preceding backslash, means toggling between /// text mode and math mode if (c == QLatin1Char('$') && (i == 0 || input[i - 1] != QLatin1Char('\\'))) inMathMode = !inMathMode; } } } output.squeeze(); return output; } bool EncoderLaTeX::testAndCopyVerbatimCommands(const QString &input, int &pos, QString &output) const { int copyBytesCount = 0; int openedClosedCurlyBrackets = 0; /// check for \url if (pos < input.length() - 6 && input.mid(pos, 5) == QStringLiteral("\\url{")) { copyBytesCount = 5; openedClosedCurlyBrackets = 1; } if (copyBytesCount > 0) { while (openedClosedCurlyBrackets > 0 && pos + copyBytesCount < input.length()) { ++copyBytesCount; if (input[pos + copyBytesCount] == QLatin1Char('{') && input[pos + copyBytesCount - 1] != QLatin1Char('\\')) ++openedClosedCurlyBrackets; else if (input[pos + copyBytesCount] == QLatin1Char('}') && input[pos + copyBytesCount - 1] != QLatin1Char('\\')) --openedClosedCurlyBrackets; } output.append(input.midRef(pos, copyBytesCount)); pos += copyBytesCount; } return copyBytesCount > 0; } QString EncoderLaTeX::encode(const QString &ninput, const TargetEncoding targetEncoding) const { /// Perform Canonical Decomposition followed by Canonical Composition const QString input = ninput.normalized(QString::NormalizationForm_C); int len = input.length(); QString output; output.reserve(len); bool inMathMode = false; /// Go through input char by char for (int i = 0; i < len; ++i) { /** * Repeatedly check if input data contains a verbatim command * like \url{...}, append it to output, and update i to point * to the next character after the verbatim command. */ while (testAndCopyVerbatimCommands(input, i, output)); if (i >= len) break; const QChar c = input[i]; if (targetEncoding == TargetEncoding::ASCII && c.unicode() > 127) { /// If current char is outside ASCII boundaries ... bool found = false; /// Handle special cases of i without a dot (\i) for (const DotlessIJCharacter &dotlessIJCharacter : dotlessIJCharacters) if (c.unicode() == dotlessIJCharacter.unicode && (dotlessIJCharacter.direction & DirectionUnicodeToCommand)) { output.append(QString(QStringLiteral("{\\%1\\%2}")).arg(dotlessIJCharacter.modifier, dotlessIJCharacter.letter)); found = true; break; } if (!found) { /// ... test if there is a symbol sequence like --- /// to encode it for (const EncoderLaTeXSymbolSequence &encoderLaTeXSymbolSequence : encoderLaTeXSymbolSequences) if (encoderLaTeXSymbolSequence.unicode == c.unicode() && (encoderLaTeXSymbolSequence.direction & DirectionUnicodeToCommand)) { for (int l = 0; l < encoderLaTeXSymbolSequence.latex.length(); ++l) output.append(encoderLaTeXSymbolSequence.latex[l]); found = true; break; } } if (!found) { /// Ok, no symbol sequence. Let's test character /// commands like \ss for (const EncoderLaTeXCharacterCommand &encoderLaTeXCharacterCommand : encoderLaTeXCharacterCommands) if (encoderLaTeXCharacterCommand.unicode == c.unicode() && (encoderLaTeXCharacterCommand.direction & DirectionUnicodeToCommand)) { output.append(QString(QStringLiteral("{\\%1}")).arg(encoderLaTeXCharacterCommand.command)); found = true; break; } } if (!found) { /// Ok, neither a character command. Let's test /// escaped characters with modifiers like \"a for (const EncoderLaTeXEscapedCharacter &encoderLaTeXEscapedCharacter : encoderLaTeXEscapedCharacters) if (encoderLaTeXEscapedCharacter.unicode == c.unicode() && (encoderLaTeXEscapedCharacter.direction & DirectionUnicodeToCommand)) { const QString formatString = isAsciiLetter(encoderLaTeXEscapedCharacter.modifier) ? QStringLiteral("{\\%1 %2}") : QStringLiteral("{\\%1%2}"); output.append(formatString.arg(encoderLaTeXEscapedCharacter.modifier).arg(encoderLaTeXEscapedCharacter.letter)); found = true; break; } } if (!found) { /// Ok, test for math commands for (const MathCommand &mathCommand : mathCommands) if (mathCommand.unicode == c.unicode() && (mathCommand.direction & DirectionUnicodeToCommand)) { if (inMathMode) output.append(QString(QStringLiteral("\\%1{}")).arg(mathCommand.command)); else output.append(QString(QStringLiteral("\\ensuremath{\\%1}")).arg(mathCommand.command)); found = true; break; } } if (!found && c.unicode() == 0x2009) { /// Thin space output.append(QStringLiteral("\\,")); found = true; } if (!found) { + qCDebug(LOG_KBIBTEX_IO) << input.mid(qMax(0, i - 5), 10); qCWarning(LOG_KBIBTEX_IO) << "Don't know how to encode Unicode char" << QString(QStringLiteral("0x%1")).arg(c.unicode(), 4, 16, QLatin1Char('0')); output.append(c); } } else { /// Current character is normal ASCII /// and targetEncoding was set to accept only ASCII characters /// -- or -- targetEncoding was set to accept UTF-8 characters /// Still, some characters have special meaning /// in TeX and have to be preceded with a backslash bool found = false; for (const QChar &encoderLaTeXProtectedSymbol : encoderLaTeXProtectedSymbols) if (encoderLaTeXProtectedSymbol == c) { output.append(QLatin1Char('\\')); found = true; break; } if (!found && !inMathMode) for (const QChar &encoderLaTeXProtectedTextOnlySymbol : encoderLaTeXProtectedTextOnlySymbols) if (encoderLaTeXProtectedTextOnlySymbol == c) { output.append(QLatin1Char('\\')); break; } /// Dump character to output output.append(c); /// Finally, check if input character is a dollar sign /// without a preceding backslash, means toggling between /// text mode and math mode if (c == QLatin1Char('$') && (i == 0 || input[i - 1] != QLatin1Char('\\'))) inMathMode = !inMathMode; } } output.squeeze(); return output; } int EncoderLaTeX::modifierInLookupTable(const QChar modifier) const { for (int m = 0; m < lookupTableNumModifiers && lookupTable[m] != nullptr; ++m) if (lookupTable[m]->modifier == modifier) return m; return -1; } QString EncoderLaTeX::readAlphaCharacters(const QString &base, int startFrom) const { const int len = base.size(); for (int j = startFrom; j < len; ++j) { if (!isAsciiLetter(base[j])) return base.mid(startFrom, j - startFrom); } return base.mid(startFrom); } const EncoderLaTeX &EncoderLaTeX::instance() { static const EncoderLaTeX self; return self; } diff --git a/src/test/kbibtexiotest.cpp b/src/test/kbibtexiotest.cpp index 2a943070..e6edb62d 100644 --- a/src/test/kbibtexiotest.cpp +++ b/src/test/kbibtexiotest.cpp @@ -1,724 +1,733 @@ /*************************************************************************** - * Copyright (C) 2004-2019 by Thomas Fischer * + * Copyright (C) 2004-2020 by Thomas Fischer * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, see . * ***************************************************************************/ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "logging_test.h" Q_DECLARE_METATYPE(QMimeType) Q_DECLARE_METATYPE(QSharedPointer) class KBibTeXIOTest : public QObject { Q_OBJECT private slots: void initTestCase(); void encoderConvertToPlainAscii_data(); void encoderConvertToPlainAscii(); void encoderXMLdecode_data(); void encoderXMLdecode(); void encoderXMLencode_data(); void encoderXMLencode(); void encoderLaTeXdecode_data(); void encoderLaTeXdecode(); void encoderLaTeXencode_data(); void encoderLaTeXencode(); void fileImporterSplitName_data(); void fileImporterSplitName(); void fileInfoMimeTypeForUrl_data(); void fileInfoMimeTypeForUrl(); void fileInfoUrlsInText_data(); void fileInfoUrlsInText(); QVector > fileImporterExporterTestCases(); void fileExporterXMLsave_data(); void fileExporterXMLsave(); void fileExporterXSLTstandardSaveFile_data(); void fileExporterXSLTstandardSaveFile(); void fileExporterXSLTstandardSaveElement_data(); void fileExporterXSLTstandardSaveElement(); void fileExporterRISsave_data(); void fileExporterRISsave(); void fileExporterBibTeXsave_data(); void fileExporterBibTeXsave(); void fileImporterRISload_data(); void fileImporterRISload(); void fileImporterBibTeXload_data(); void fileImporterBibTeXload(); void protectiveCasingEntryGeneratedOnTheFly(); void protectiveCasingEntryFromData(); void partialBibTeXInput_data(); void partialBibTeXInput(); void partialRISInput_data(); void partialRISInput(); private: }; void KBibTeXIOTest::encoderConvertToPlainAscii_data() { QTest::addColumn("unicodestring"); /// Depending on the chosen implementation for Encoder::instance().convertToPlainAscii(), /// the ASCII variant may slightly differ (both alternatives are considered valid). /// If both implementations produce the same ASCII output, 'asciialternative2' is /// to be set to be empty. QTest::addColumn("asciialternative1"); QTest::addColumn("asciialternative2"); QTest::newRow("Just 'A'") << QString(QChar(0x00c0)) + QChar(0x00c2) + QChar(0x00c5) << QStringLiteral("AAA") << QString(); QTest::newRow("Just ASCII letters and numbers") << QStringLiteral("qwertyuiopASDFGHJKLzxcvbnm1234567890") << QStringLiteral("qwertyuiopASDFGHJKLzxcvbnm1234567890") << QString(); QTest::newRow("Latin text") << QStringLiteral("Gallia est omnis divisa in partes tres, quarum unam incolunt Belgae, aliam Aquitani, tertiam qui ipsorum lingua Celtae, nostra Galli appellantur.") << QStringLiteral("Gallia est omnis divisa in partes tres, quarum unam incolunt Belgae, aliam Aquitani, tertiam qui ipsorum lingua Celtae, nostra Galli appellantur.") << QString(); QTest::newRow("ASCII low and high bytes") << QStringLiteral("\x00\x01\x09\x0a\x10\x11\x19\x1a\x1f\x20\x7e\x7f") << QStringLiteral(" ~") << QString(); QTest::newRow("European Scripts/Latin-1 Supplement") << QString::fromUtf8("\xc3\x80\xc3\x82\xc3\x84\xc3\x92\xc3\x94\xc3\x96\xc3\xac\xc3\xad\xc3\xae\xc3\xaf") << QStringLiteral("AAAOOOiiii") << QStringLiteral("AAAEOOOEiiii"); QTest::newRow("European Scripts/Latin Extended-A") << QString::fromUtf8("\xc4\x8a\xc4\x8b\xc4\xae\xc4\xaf\xc5\x9c\xc5\x9d\xc5\xbb\xc5\xbc") << QStringLiteral("CcIiSsZz") << QString(); QTest::newRow("European Scripts/Latin Extended-B") << QString::fromUtf8("\xc7\x8a\xc7\x8b\xc7\x8c") << QStringLiteral("NJNjnj") << QString(); QTest::newRow("European Scripts/Latin Extended Additional") << QString::fromUtf8("\xe1\xb8\xbe\xe1\xb8\xbf\xe1\xb9\xa4\xe1\xb9\xa5\xe1\xbb\xae\xe1\xbb\xaf") << QStringLiteral("MmSsUu") << QString(); QTest::newRow("European Scripts/Cyrillic") << QString::fromUtf8("\xd0\x90\xd0\x9e\xd0\x9f") << QStringLiteral("AOP") << QString(); QTest::newRow("European Scripts/Greek and Coptic") << QString::fromUtf8("\xce\xba\xce\xb1\xce\xa4\xcf\xba\xce\x9d") << QStringLiteral("kaTSN") << QStringLiteral("kappaalphaTauSanNu"); QTest::newRow("East Asian Scripts/Katakana") << QString::fromUtf8("\xe3\x82\xb7\xe3\x83\x84") << QStringLiteral("shitsu") << QStringLiteral("situ"); QTest::newRow("East Asian Scripts/Hangul Syllables") << QString::fromUtf8("\xea\xb9\x80\xec\xa0\x95\xec\x9d\x80") << QStringLiteral("gimjeongeun") << QStringLiteral("gimjeong-eun"); QTest::newRow("Non-BMP characters (stay unchanged)") << QString::fromUtf8(/* U+10437 */ "\xf0\x90\x90\xb7" /* U+10E6D */ "\xf0\x90\xb9\xad" /* U+1D11E */ "\xf0\x9d\x84\x9e" /* U+10FFFF */ "") << QString::fromUtf8("\xf0\x90\x90\xb7\xf0\x90\xb9\xad\xf0\x9d\x84\x9e") << QString(); QTest::newRow("Base symbols followed by combining symbols") << QString::fromUtf8("123" /* COMBINING GRAVE ACCENT */ "A\xcc\x80" /* COMBINING DIAERESIS */ "A\xcc\x88" /* COMBINING LOW LINE */ "A\xcc\xb2" "123") << QStringLiteral("123AAA123") << QString(); } void KBibTeXIOTest::encoderConvertToPlainAscii() { QFETCH(QString, unicodestring); QFETCH(QString, asciialternative1); QFETCH(QString, asciialternative2); const QString converted = Encoder::instance().convertToPlainAscii(unicodestring); /// Depending on the chosen implementation for Encoder::instance().convertToPlainAscii(), /// the ASCII variant may slightly differ (both alternatives are considered valid). if (converted != asciialternative1 && converted != asciialternative2) qCWarning(LOG_KBIBTEX_TEST) << "converted=" << converted << " asciialternative1=" << asciialternative1 << " asciialternative2=" << asciialternative2; QVERIFY(converted == asciialternative1 || converted == asciialternative2); } void KBibTeXIOTest::encoderXMLdecode_data() { QTest::addColumn("xml"); QTest::addColumn("unicode"); QTest::newRow("Just ASCII") << QStringLiteral("Gallia est omnis divisa in partes tres, quarum unam incolunt Belgae, aliam Aquitani, tertiam qui ipsorum lingua Celtae, nostra Galli appellantur.") << QStringLiteral("Gallia est omnis divisa in partes tres, quarum unam incolunt Belgae, aliam Aquitani, tertiam qui ipsorum lingua Celtae, nostra Galli appellantur."); QTest::newRow("Quotation marks") << QStringLiteral("Caesar said: "Veni, vidi, vici"") << QStringLiteral("Caesar said: \"Veni, vidi, vici\""); QTest::newRow("Characters from EncoderXMLCharMapping") << QStringLiteral(""&<>") << QStringLiteral("\"\\&<>"); QTest::newRow("Characters from backslashSymbols") << QStringLiteral("&%_") << QStringLiteral("\\&\\%\\_"); for (int start = 0; start < 16; ++start) { QString xmlString, unicodeString; for (int offset = 1561; offset < 6791; offset += 621) { const ushort unicode = static_cast((start * 3671 + offset) & 0x7fff); xmlString += QStringLiteral("&#") + QString::number(unicode) + QStringLiteral(";"); unicodeString += QChar(unicode); } QTest::newRow(QString(QStringLiteral("Some arbitrary Unicode characters (%1): %2")).arg(start).arg(xmlString).toLatin1().constData()) << xmlString << unicodeString; } } void KBibTeXIOTest::encoderXMLdecode() { QFETCH(QString, xml); QFETCH(QString, unicode); QCOMPARE(EncoderXML::instance().decode(xml), unicode); } void KBibTeXIOTest::encoderXMLencode_data() { encoderXMLdecode_data(); } void KBibTeXIOTest::encoderXMLencode() { QFETCH(QString, xml); QFETCH(QString, unicode); QCOMPARE(EncoderXML::instance().encode(unicode, Encoder::TargetEncoding::ASCII), xml); } void KBibTeXIOTest::encoderLaTeXdecode_data() { QTest::addColumn("latex"); QTest::addColumn("unicode"); QTest::addColumn("alternativelatex"); QTest::newRow("Just ASCII") << QStringLiteral("Gallia est omnis divisa in partes tres, quarum unam incolunt Belgae, aliam Aquitani, tertiam qui ipsorum lingua Celtae, nostra Galli appellantur.") << QStringLiteral("Gallia est omnis divisa in partes tres, quarum unam incolunt Belgae, aliam Aquitani, tertiam qui ipsorum lingua Celtae, nostra Galli appellantur.") << QString(); - QTest::newRow("Dotless i and j characters") << QStringLiteral("{\\`\\i}{\\'\\i}{\\^\\i}{\\\"\\i}{\\~\\i}{\\=\\i}{\\u\\i}{\\k\\i}{\\^\\j}{\\v\\i}{\\v\\j}") << QString(QChar(0x00EC)) + QChar(0x00ED) + QChar(0x00EE) + QChar(0x00EF) + QChar(0x0129) + QChar(0x012B) + QChar(0x012D) + QChar(0x012F) + QChar(0x0135) + QChar(0x01D0) + QChar(0x01F0) << QString(); + QTest::newRow("Dotless i and j characters") << QStringLiteral("{\\`\\i}{\\`{\\i}}{\\'\\i}{\\^\\i}{\\\"\\i}{\\~\\i}{\\=\\i}{\\u\\i}{\\k\\i}{\\^\\j}{\\m\\i}{\\v\\i}{\\v\\j}\\m\\i") << QString(QChar(0x00EC)) + QChar(0x00EC) + QChar(0x00ED) + QChar(0x00EE) + QChar(0x00EF) + QChar(0x0129) + QChar(0x012B) + QChar(0x012D) + QChar(0x012F) + QChar(0x0135) + QStringLiteral("{\\m\\i}") + QChar(0x01D0) + QChar(0x01F0) + QStringLiteral("\\m\\i") << QStringLiteral("{\\`\\i}{\\`\\i}{\\'\\i}{\\^\\i}{\\\"\\i}{\\~\\i}{\\=\\i}{\\u\\i}{\\k\\i}{\\^\\j}{\\m\\i}{\\v\\i}{\\v\\j}\\m\\i"); QTest::newRow("\\l and \\ldots") << QStringLiteral("\\l\\ldots\\l\\ldots") << QString(QChar(0x0142)) + QChar(0x2026) + QChar(0x0142) + QChar(0x2026) << QStringLiteral("{\\l}{\\ldots}{\\l}{\\ldots}"); + QTest::newRow("Various two-letter commands (1)") << QStringLiteral("\\AA\\mu") << QString(QChar(0x00c5)) + QChar(0x03bc) << QStringLiteral("{\\AA}\\ensuremath{\\mu}"); + QTest::newRow("Various two-letter commands (2)") << QStringLiteral("{\\AA}{\\mu}") << QString(QChar(0x00c5)) + QChar(0x03bc) << QStringLiteral("{\\AA}\\ensuremath{\\mu}"); + QTest::newRow("Various two-letter commands (3)") << QStringLiteral("\\AA \\mu ") << QString(QChar(0x00c5)) + QChar(0x03bc) << QStringLiteral("{\\AA}\\ensuremath{\\mu}"); + QTest::newRow("Inside curly brackets: modifier plus letter") << QStringLiteral("aa{\\\"A}bb{\\\"T}") << QStringLiteral("aa") + QChar(0x00c4) + QStringLiteral("bb{\\\"T}") << QString(); + QTest::newRow("Inside curly brackets: modifier plus, inside curly brackets, letter") << QStringLiteral("aa{\\\"{A}}bb{\\\"{T}}") << QStringLiteral("aa") + QChar(0x00c4) + QStringLiteral("bb{\\\"{T}}") << QStringLiteral("aa{\\\"A}bb{\\\"{T}}"); + QTest::newRow("Modifier plus letter") << QStringLiteral("\\\"A aa\\\"Abb\\\"T") << QChar(0x00c4) + QStringLiteral(" aa") + QChar(0x00c4) + QStringLiteral("bb\\\"T") << QStringLiteral("{\\\"A} aa{\\\"A}bb\\\"T"); + QTest::newRow("Modifier plus, inside curly brackets, letter") << QStringLiteral("\\\"{A} aa\\\"{A}bb\\\"{T}") << QChar(0x00c4) + QStringLiteral(" aa") + QChar(0x00c4) + QStringLiteral("bb\\\"{T}") << QStringLiteral("{\\\"A} aa{\\\"A}bb\\\"{T}"); + QTest::newRow("Single-letter commands") << QStringLiteral("\\,\\&\\_") << QChar(0x2009) + QStringLiteral("&_") << QString(); + QTest::newRow("\\noopsort{\\noopsort}") << QStringLiteral("\\noopsort{\\noopsort}") << QStringLiteral("\\noopsort{\\noopsort}") << QString(); } void KBibTeXIOTest::encoderLaTeXdecode() { QFETCH(QString, latex); QFETCH(QString, unicode); QCOMPARE(EncoderLaTeX::instance().decode(latex), unicode); } void KBibTeXIOTest::encoderLaTeXencode_data() { encoderLaTeXdecode_data(); } void KBibTeXIOTest::encoderLaTeXencode() { QFETCH(QString, latex); QFETCH(QString, unicode); QFETCH(QString, alternativelatex); const QString generatedLatex = EncoderLaTeX::instance().encode(unicode, Encoder::TargetEncoding::ASCII); if (generatedLatex != latex && !alternativelatex.isEmpty()) QCOMPARE(generatedLatex, alternativelatex); else QCOMPARE(generatedLatex, latex); } void KBibTeXIOTest::fileImporterSplitName_data() { QTest::addColumn("name"); QTest::addColumn("person"); QTest::newRow("Empty name") << QString() << new Person(QString(), QString(), QString()); QTest::newRow("PubMed style") << QStringLiteral("Jones A B C") << new Person(QStringLiteral("A B C"), QStringLiteral("Jones"), QString()); QTest::newRow("Just last name") << QStringLiteral("Dido") << new Person(QString(), QStringLiteral("Dido"), QString()); QTest::newRow("Name with 'von'") << QStringLiteral("Theodor von Sickel") << new Person(QStringLiteral("Theodor"), QStringLiteral("von Sickel"), QString()); QTest::newRow("Name with 'von', reversed") << QStringLiteral("von Sickel, Theodor") << new Person(QStringLiteral("Theodor"), QStringLiteral("von Sickel"), QString()); QTest::newRow("Name with 'van der'") << QStringLiteral("Adriaen van der Werff") << new Person(QStringLiteral("Adriaen"), QStringLiteral("van der Werff"), QString()); QTest::newRow("Name with 'van der', reversed") << QStringLiteral("van der Werff, Adriaen") << new Person(QStringLiteral("Adriaen"), QStringLiteral("van der Werff"), QString()); QTest::newRow("Name with suffix") << QStringLiteral("Anna Eleanor Roosevelt Jr.") << new Person(QStringLiteral("Anna Eleanor"), QStringLiteral("Roosevelt"), QStringLiteral("Jr.")); } void KBibTeXIOTest::fileImporterSplitName() { QFETCH(QString, name); QFETCH(Person *, person); Person *computedPerson = FileImporter::splitName(name); QCOMPARE(*computedPerson, *person); delete person; delete computedPerson; } void KBibTeXIOTest::fileInfoMimeTypeForUrl_data() { QTest::addColumn("url"); QTest::addColumn("mimetype"); static const QMimeDatabase db; QTest::newRow("Invalid URL") << QUrl() << QMimeType(); QTest::newRow("Generic URL") << QUrl(QStringLiteral("https://www.example.com")) << db.mimeTypeForName(QStringLiteral("text/html")); QTest::newRow("Generic local file") << QUrl(QStringLiteral("/usr/bin/who")) << db.mimeTypeForName(QStringLiteral("application/octet-stream")); QTest::newRow("Generic Samba URL") << QUrl(QStringLiteral("smb://fileserver.local/file")) << db.mimeTypeForName(QStringLiteral("application/octet-stream")); QTest::newRow("URL to .bib file") << QUrl(QStringLiteral("https://www.example.com/references.bib")) << db.mimeTypeForName(QStringLiteral("text/x-bibtex")); QTest::newRow("Local .bib file") << QUrl(QStringLiteral("/home/user/references.bib")) << db.mimeTypeForName(QStringLiteral("text/x-bibtex")); QTest::newRow("URL to .pdf file") << QUrl(QStringLiteral("https://www.example.com/references.pdf")) << db.mimeTypeForName(QStringLiteral("application/pdf")); QTest::newRow("Local .pdf file") << QUrl(QStringLiteral("/home/user/references.pdf")) << db.mimeTypeForName(QStringLiteral("application/pdf")); } void KBibTeXIOTest::fileInfoMimeTypeForUrl() { QFETCH(QUrl, url); QFETCH(QMimeType, mimetype); QCOMPARE(FileInfo::mimeTypeForUrl(url), mimetype); } void KBibTeXIOTest::fileInfoUrlsInText_data() { QTest::addColumn("text"); QTest::addColumn>("expectedUrls"); QTest::newRow("Empty text") << QString() << QSet(); QTest::newRow("Lore ipsum with DOI (without URL)") << QStringLiteral("Lore ipsum 10.1000/38-abc Lore ipsum") << QSet {QUrl(KBibTeX::doiUrlPrefix + QStringLiteral("10.1000/38-abc"))}; QTest::newRow("Lore ipsum with DOI (with HTTP URL)") << QStringLiteral("Lore ipsum http://doi.example.org/10.1000/38-abc Lore ipsum") << QSet {QUrl(KBibTeX::doiUrlPrefix + QStringLiteral("10.1000/38-abc"))}; QTest::newRow("Lore ipsum with DOI (with HTTPS URL)") << QStringLiteral("Lore ipsum https://doi.example.org/10.1000/42-XYZ Lore ipsum") << QSet {QUrl(KBibTeX::doiUrlPrefix + QStringLiteral("10.1000/42-XYZ"))}; QTest::newRow("URLs and DOI (without URL), all semicolon-separated") << QStringLiteral("http://www.example.com;10.1000/38-abc ;\nhttps://www.example.com") << QSet {QUrl(QStringLiteral("http://www.example.com")), QUrl(KBibTeX::doiUrlPrefix + QStringLiteral("10.1000/38-abc")), QUrl(QStringLiteral("https://www.example.com"))}; QTest::newRow("URLs and DOI (with URL), all semicolon-separated") << QStringLiteral("http://www.example.com\n; 10.1000/38-abc;https://www.example.com") << QSet {QUrl(QStringLiteral("http://www.example.com")), QUrl(KBibTeX::doiUrlPrefix + QStringLiteral("10.1000/38-abc")), QUrl(QStringLiteral("https://www.example.com"))}; QTest::newRow("URLs with various separators") << QStringLiteral("http://www.example.com/def.pdf https://www.example.com\nhttp://download.example.com/abc") << QSet {QUrl(QStringLiteral("http://www.example.com/def.pdf")), QUrl(QStringLiteral("https://www.example.com")), QUrl(QStringLiteral("http://download.example.com/abc"))}; QTest::newRow("URLs with query strings and anchors") << QStringLiteral("http://www.example.com/def.pdf?a=3&b=1 https://www.example.com#1581584\nhttp://download.example.com/abc,7352,A#abc?gh=352&ghi=1254") << QSet {QUrl(QStringLiteral("http://www.example.com/def.pdf?a=3&b=1")), QUrl(QStringLiteral("https://www.example.com#1581584")), QUrl(QStringLiteral("http://download.example.com/abc,7352,A#abc?gh=352&ghi=1254"))}; } void KBibTeXIOTest::fileInfoUrlsInText() { QFETCH(QString, text); QFETCH(QSet, expectedUrls); QSet extractedUrls; FileInfo::urlsInText(text, FileInfo::TestExistence::No, QString(), extractedUrls); QCOMPARE(extractedUrls.count(), expectedUrls.count()); for (const QUrl &expectedUrl : const_cast &>(expectedUrls)) QCOMPARE(extractedUrls.contains(expectedUrl), true); } static const char *fileImporterExporterTestCases_Label_Empty_file = "Empty file"; static const char *fileImporterExporterTestCases_Label_Moby_Dick = "Moby Dick"; QVector > KBibTeXIOTest::fileImporterExporterTestCases() { /// The vector 'result' is static so that if this function is invoked multiple /// times, the vector will be initialized and filled with File objects only upon /// the function's first invocation. static QVector > result; if (result.isEmpty()) { /// Empty file without any entries result.append(QPair(fileImporterExporterTestCases_Label_Empty_file, new File())); /// File with single entry, inspired by 'Moby Dick' File *f1 = new File(); QSharedPointer entry1(new Entry(Entry::etArticle, QStringLiteral("the-whale-1851"))); f1->append(entry1); entry1->insert(Entry::ftTitle, Value() << QSharedPointer(new PlainText(QStringLiteral("{Call me Ishmael}")))); entry1->insert(Entry::ftAuthor, Value() << QSharedPointer<Person>(new Person(QStringLiteral("Herman"), QStringLiteral("Melville"))) << QSharedPointer<Person>(new Person(QStringLiteral("Moby"), QStringLiteral("Dick")))); entry1->insert(Entry::ftYear, Value() << QSharedPointer<PlainText>(new PlainText(QStringLiteral("1851")))); result.append(QPair<const char *, File *>(fileImporterExporterTestCases_Label_Moby_Dick, f1)); // TODO add more file objects to result vector /// Set various properties to guarantee reproducible results irrespective of local settings for (auto it = result.constBegin(); it != result.constEnd(); ++it) { File *file = it->second; file->setProperty(File::NameFormatting, Preferences::personNameFormatLastFirst); file->setProperty(File::ProtectCasing, static_cast<int>(Qt::Checked)); // TODO more file properties to set? } } return result; } void KBibTeXIOTest::fileExporterXMLsave_data() { QTest::addColumn<File *>("bibTeXfile"); QTest::addColumn<QString>("xmlData"); static const QHash<const char *, QString> keyToXmlData { {fileImporterExporterTestCases_Label_Empty_file, QStringLiteral("<?xml version=\"1.0\" encoding=\"UTF-8\"?>|<!-- XML document written by KBibTeXIO as part of KBibTeX -->|<!-- https://userbase.kde.org/KBibTeX -->|<bibliography>|</bibliography>|")}, {fileImporterExporterTestCases_Label_Moby_Dick, QStringLiteral("<?xml version=\"1.0\" encoding=\"UTF-8\"?>|<!-- XML document written by KBibTeXIO as part of KBibTeX -->|<!-- https://userbase.kde.org/KBibTeX -->|<bibliography>| <entry id=\"the-whale-1851\" type=\"article\">| <authors>|<person><firstname>Herman</firstname><lastname>Melville</lastname></person> <person><firstname>Moby</firstname><lastname>Dick</lastname></person>| </authors>| <title><text>Call me Ishmael</text></title>| <year><text>1851</text></year>| </entry>|</bibliography>|")} }; static const QVector<QPair<const char *, File *> > keyFileTable = fileImporterExporterTestCases(); for (auto it = keyFileTable.constBegin(); it != keyFileTable.constEnd(); ++it) if (keyToXmlData.contains(it->first)) QTest::newRow(it->first) << it->second << keyToXmlData.value(it->first); } void KBibTeXIOTest::fileExporterXMLsave() { QFETCH(File *, bibTeXfile); QFETCH(QString, xmlData); FileExporterXML fileExporterXML(this); QStringList errorLog; const QString generatedData = fileExporterXML.toString(bibTeXfile, &errorLog).remove(QLatin1Char('\r')).replace(QLatin1Char('\n'), QLatin1Char('|')); for (const QString &logLine : const_cast<const QStringList &>(errorLog)) qCDebug(LOG_KBIBTEX_TEST) << logLine; QCOMPARE(generatedData, xmlData); } void KBibTeXIOTest::fileExporterXSLTstandardSaveFile_data() { QTest::addColumn<File *>("bibTeXfile"); QTest::addColumn<QSet<QString>>("expectedFragments"); static const QHash<const char *, QSet<QString>> keyToXsltData { {fileImporterExporterTestCases_Label_Empty_file, {QStringLiteral("<title>Bibliography</title>"), QStringLiteral("<body/>")}}, {fileImporterExporterTestCases_Label_Moby_Dick, {QStringLiteral("<title>Bibliography</title>"), QStringLiteral(">1851<"), QStringLiteral(">Call me Ishmael<"), QStringLiteral("</b>"), QStringLiteral("</body>")}} }; static const QVector<QPair<const char *, File *> > keyFileTable = fileImporterExporterTestCases(); for (auto it = keyFileTable.constBegin(); it != keyFileTable.constEnd(); ++it) if (keyToXsltData.contains(it->first)) QTest::newRow(it->first) << it->second << keyToXsltData.value(it->first); } void KBibTeXIOTest::fileExporterXSLTstandardSaveFile() { QFETCH(File *, bibTeXfile); QFETCH(QSet<QString>, expectedFragments); FileExporterXSLT fileExporterXSLT(QStandardPaths::locate(QStandardPaths::GenericDataLocation, QStringLiteral("kbibtex/standard.xsl")), this); QStringList errorLog; const QString generatedData = fileExporterXSLT.toString(bibTeXfile, &errorLog).remove(QLatin1Char('\r')).replace(QLatin1Char('\n'), QLatin1Char('|')); for (const QString &logLine : const_cast<const QStringList &>(errorLog)) qCDebug(LOG_KBIBTEX_TEST) << logLine; for (const QString &fragment : expectedFragments) QVERIFY2(generatedData.contains(fragment), QString(QStringLiteral("Fragment '%1' not found in generated XML data")).arg(fragment).toLatin1().constData()); } void KBibTeXIOTest::fileExporterXSLTstandardSaveElement_data() { QTest::addColumn<QSharedPointer<Element>>("element"); QTest::addColumn<QSet<QString>>("expectedFragments"); static const QHash<const char *, QSet<QString>> keyToXsltData { {fileImporterExporterTestCases_Label_Moby_Dick, {QStringLiteral("<title>Bibliography</title>"), QStringLiteral(">1851<"), QStringLiteral(">Call me Ishmael<"), QStringLiteral("</b>"), QStringLiteral("</body>")}} }; static const QVector<QPair<const char *, File *> > keyFileTable = fileImporterExporterTestCases(); for (auto it = keyFileTable.constBegin(); it != keyFileTable.constEnd(); ++it) if (!it->second->isEmpty() && keyToXsltData.contains(it->first)) QTest::newRow(it->first) << it->second->first() << keyToXsltData.value(it->first); } void KBibTeXIOTest::fileExporterXSLTstandardSaveElement() { QFETCH(QSharedPointer<Element>, element); QFETCH(QSet<QString>, expectedFragments); FileExporterXSLT fileExporterXSLT(QStandardPaths::locate(QStandardPaths::GenericDataLocation, QStringLiteral("kbibtex/standard.xsl")), this); QStringList errorLog; const QString generatedData = fileExporterXSLT.toString(element, nullptr, &errorLog).remove(QLatin1Char('\r')).replace(QLatin1Char('\n'), QLatin1Char('|')); for (const QString &logLine : const_cast<const QStringList &>(errorLog)) qCDebug(LOG_KBIBTEX_TEST) << logLine; for (const QString &fragment : expectedFragments) QVERIFY2(generatedData.contains(fragment), QString(QStringLiteral("Fragment '%1' not found in generated XML data")).arg(fragment).toLatin1().constData()); } void KBibTeXIOTest::fileExporterRISsave_data() { QTest::addColumn<File *>("bibTeXfile"); QTest::addColumn<QString>("risData"); static const QHash<const char *, QString> keyToRisData { {fileImporterExporterTestCases_Label_Empty_file, QString()}, {fileImporterExporterTestCases_Label_Moby_Dick, QStringLiteral("TY - JOUR|ID - the-whale-1851|AU - Melville, Herman|AU - Dick, Moby|TI - Call me Ishmael|PY - 1851///|ER - ||")} }; static const QVector<QPair<const char *, File *> > keyFileTable = fileImporterExporterTestCases(); for (auto it = keyFileTable.constBegin(); it != keyFileTable.constEnd(); ++it) if (keyToRisData.contains(it->first)) QTest::newRow(it->first) << it->second << keyToRisData.value(it->first); } void KBibTeXIOTest::fileExporterRISsave() { QFETCH(File *, bibTeXfile); QFETCH(QString, risData); FileExporterRIS fileExporterRIS(this); QStringList errorLog; const QString generatedData = fileExporterRIS.toString(bibTeXfile, &errorLog).remove(QLatin1Char('\r')).replace(QLatin1Char('\n'), QLatin1Char('|')); for (const QString &logLine : const_cast<const QStringList &>(errorLog)) qCDebug(LOG_KBIBTEX_TEST) << logLine; QCOMPARE(generatedData, risData); } void KBibTeXIOTest::fileExporterBibTeXsave_data() { QTest::addColumn<File *>("bibTeXfile"); QTest::addColumn<QString>("bibTeXdata"); static const QHash<const char *, QString> keyToBibTeXData { {fileImporterExporterTestCases_Label_Empty_file, QString()}, {fileImporterExporterTestCases_Label_Moby_Dick, QStringLiteral("@article{the-whale-1851,|\tauthor = {Melville, Herman and Dick, Moby},|\ttitle = {{Call me Ishmael}},|\tyear = {1851}|}||")} }; static const QVector<QPair<const char *, File *> > keyFileTable = fileImporterExporterTestCases(); for (auto it = keyFileTable.constBegin(); it != keyFileTable.constEnd(); ++it) if (keyToBibTeXData.contains(it->first)) QTest::newRow(it->first) << it->second << keyToBibTeXData.value(it->first); } void KBibTeXIOTest::fileExporterBibTeXsave() { QFETCH(File *, bibTeXfile); QFETCH(QString, bibTeXdata); FileExporterBibTeX fileExporterBibTeX(this); QStringList errorLog; const QString generatedData = fileExporterBibTeX.toString(bibTeXfile, &errorLog).remove(QLatin1Char('\r')).replace(QLatin1Char('\n'), QLatin1Char('|')); for (const QString &logLine : const_cast<const QStringList &>(errorLog)) qCDebug(LOG_KBIBTEX_TEST) << logLine; QCOMPARE(generatedData, bibTeXdata); } void KBibTeXIOTest::fileImporterRISload_data() { QTest::addColumn<QByteArray>("risData"); QTest::addColumn<File *>("bibTeXfile"); static const QHash<const char *, QString> keyToRisData { {fileImporterExporterTestCases_Label_Empty_file, QString()}, {fileImporterExporterTestCases_Label_Moby_Dick, QStringLiteral("TY - JOUR|ID - the-whale-1851|AU - Melville, Herman|AU - Dick, Moby|TI - Call me Ishmael|PY - 1851///|ER - ||")} }; static const QVector<QPair<const char *, File *> > keyFileTable = fileImporterExporterTestCases(); for (auto it = keyFileTable.constBegin(); it != keyFileTable.constEnd(); ++it) if (keyToRisData.contains(it->first)) QTest::newRow(it->first) << keyToRisData.value(it->first).toUtf8().replace('|', '\n') << it->second; } void KBibTeXIOTest::fileImporterRISload() { QFETCH(QByteArray, risData); QFETCH(File *, bibTeXfile); FileImporterRIS fileImporterRIS(this); fileImporterRIS.setProtectCasing(true); QBuffer buffer(&risData); buffer.open(QBuffer::ReadOnly); QScopedPointer<File> generatedFile(fileImporterRIS.load(&buffer)); QVERIFY(generatedFile->operator ==(*bibTeXfile)); } void KBibTeXIOTest::fileImporterBibTeXload_data() { QTest::addColumn<QByteArray>("bibTeXdata"); QTest::addColumn<File *>("bibTeXfile"); static const QHash<const char *, QString> keyToBibTeXData { {fileImporterExporterTestCases_Label_Empty_file, QString()}, {fileImporterExporterTestCases_Label_Moby_Dick, QStringLiteral("@article{the-whale-1851,|\tauthor = {Melville, Herman and Dick, Moby},|\ttitle = {{Call me Ishmael}},|\tyear = {1851}|}||")} }; static const QVector<QPair<const char *, File *> > keyFileTable = fileImporterExporterTestCases(); for (auto it = keyFileTable.constBegin(); it != keyFileTable.constEnd(); ++it) if (keyToBibTeXData.contains(it->first)) QTest::newRow(it->first) << keyToBibTeXData.value(it->first).toUtf8().replace('|', '\n') << it->second ; } void KBibTeXIOTest::fileImporterBibTeXload() { QFETCH(QByteArray, bibTeXdata); QFETCH(File *, bibTeXfile); FileImporterBibTeX fileImporterBibTeX(this); QBuffer buffer(&bibTeXdata); buffer.open(QBuffer::ReadOnly); QScopedPointer<File> generatedFile(fileImporterBibTeX.load(&buffer)); QVERIFY(generatedFile->operator ==(*bibTeXfile)); } void KBibTeXIOTest::protectiveCasingEntryGeneratedOnTheFly() { static const QString titleText = QStringLiteral("Some Title for a Journal Article"); static const QString singleCurleyBracketTitle = QStringLiteral("{") + titleText + QStringLiteral("}"); static const QString doubleCurleyBracketTitle = QStringLiteral("{{") + titleText + QStringLiteral("}}"); FileExporterBibTeX fileExporterBibTeX(this); /// Create a simple File object with a title field File file; file.setProperty(File::StringDelimiter, QStringLiteral("{}")); QSharedPointer<Entry> entry {new Entry(Entry::etArticle, QStringLiteral("SomeId"))}; Value titleValue = Value() << QSharedPointer<PlainText>(new PlainText(titleText)); entry->insert(Entry::ftTitle, titleValue); file.append(entry); file.setProperty(File::ProtectCasing, Qt::Checked); const QString textWithProtectiveCasing = fileExporterBibTeX.toString(&file); QVERIFY(textWithProtectiveCasing.contains(doubleCurleyBracketTitle)); file.setProperty(File::ProtectCasing, Qt::Unchecked); const QString textWithoutProtectiveCasing = fileExporterBibTeX.toString(&file); QVERIFY(textWithoutProtectiveCasing.contains(singleCurleyBracketTitle) && !textWithoutProtectiveCasing.contains(doubleCurleyBracketTitle)); } void KBibTeXIOTest::protectiveCasingEntryFromData() { static const QString titleText = QStringLiteral("Some Title for a Journal Article"); static const QString singleCurleyBracketTitle = QStringLiteral("{") + titleText + QStringLiteral("}"); static const QString doubleCurleyBracketTitle = QStringLiteral("{{") + titleText + QStringLiteral("}}"); static const QString bibTeXDataDoubleCurleyBracketTitle = QStringLiteral("@articl{doubleCurleyBracketTitle,\ntitle={{") + titleText + QStringLiteral("}}\n}\n"); static const QString bibTeXDataSingleCurleyBracketTitle = QStringLiteral("@articl{singleCurleyBracketTitle,\ntitle={") + titleText + QStringLiteral("}\n}\n"); FileImporterBibTeX fileImporterBibTeX(this); FileExporterBibTeX fileExporterBibTeX(this); QByteArray b1(bibTeXDataDoubleCurleyBracketTitle.toUtf8()); QBuffer bufferDoubleCurleyBracketTitle(&b1, this); QByteArray b2(bibTeXDataSingleCurleyBracketTitle.toUtf8()); QBuffer bufferSingleCurleyBracketTitle(&b2, this); bufferDoubleCurleyBracketTitle.open(QBuffer::ReadOnly); QScopedPointer<File> fileDoubleCurleyBracketTitle(fileImporterBibTeX.load(&bufferDoubleCurleyBracketTitle)); bufferDoubleCurleyBracketTitle.close(); fileDoubleCurleyBracketTitle->setProperty(File::StringDelimiter, QStringLiteral("{}")); bufferSingleCurleyBracketTitle.open(QBuffer::ReadOnly); QScopedPointer<File> fileSingleCurleyBracketTitle(fileImporterBibTeX.load(&bufferSingleCurleyBracketTitle)); bufferSingleCurleyBracketTitle.close(); fileSingleCurleyBracketTitle->setProperty(File::StringDelimiter, QStringLiteral("{}")); fileDoubleCurleyBracketTitle->setProperty(File::ProtectCasing, Qt::PartiallyChecked); const QString textDoubleCurleyBracketTitlePartialProtectiveCasing = fileExporterBibTeX.toString(fileDoubleCurleyBracketTitle.data()); QVERIFY(textDoubleCurleyBracketTitlePartialProtectiveCasing.contains(doubleCurleyBracketTitle)); fileSingleCurleyBracketTitle->setProperty(File::ProtectCasing, Qt::PartiallyChecked); const QString textSingleCurleyBracketTitlePartialProtectiveCasing = fileExporterBibTeX.toString(fileSingleCurleyBracketTitle.data()); QVERIFY(textSingleCurleyBracketTitlePartialProtectiveCasing.contains(singleCurleyBracketTitle) && !textSingleCurleyBracketTitlePartialProtectiveCasing.contains(doubleCurleyBracketTitle)); fileDoubleCurleyBracketTitle->setProperty(File::ProtectCasing, Qt::Checked); const QString textDoubleCurleyBracketTitleWithProtectiveCasing = fileExporterBibTeX.toString(fileDoubleCurleyBracketTitle.data()); QVERIFY(textDoubleCurleyBracketTitleWithProtectiveCasing.contains(doubleCurleyBracketTitle)); fileSingleCurleyBracketTitle->setProperty(File::ProtectCasing, Qt::Checked); const QString textSingleCurleyBracketTitleWithProtectiveCasing = fileExporterBibTeX.toString(fileSingleCurleyBracketTitle.data()); QVERIFY(textSingleCurleyBracketTitleWithProtectiveCasing.contains(doubleCurleyBracketTitle)); fileDoubleCurleyBracketTitle->setProperty(File::ProtectCasing, Qt::Unchecked); const QString textDoubleCurleyBracketTitleWithoutProtectiveCasing = fileExporterBibTeX.toString(fileDoubleCurleyBracketTitle.data()); QVERIFY(textDoubleCurleyBracketTitleWithoutProtectiveCasing.contains(singleCurleyBracketTitle) && !textDoubleCurleyBracketTitleWithoutProtectiveCasing.contains(doubleCurleyBracketTitle)); fileSingleCurleyBracketTitle->setProperty(File::ProtectCasing, Qt::Unchecked); const QString textSingleCurleyBracketTitleWithoutProtectiveCasing = fileExporterBibTeX.toString(fileSingleCurleyBracketTitle.data()); QVERIFY(textSingleCurleyBracketTitleWithoutProtectiveCasing.contains(singleCurleyBracketTitle) && !textSingleCurleyBracketTitleWithoutProtectiveCasing.contains(doubleCurleyBracketTitle)); } void KBibTeXIOTest::partialBibTeXInput_data() { QTest::addColumn<bool>("isValid"); QTest::addColumn<QString>("text"); static const struct BibTeXDataTable { const char *label; const bool isValid; const QString text; } bibTeXDataTable[] = { {"Empty string", false, QString()}, {"Only 'at' sign", false, QStringLiteral("@")}, {"Only 'at' sign followed by element type", false, QStringLiteral("@entry")}, {"Only up to opening curly bracket", false, QStringLiteral("@entry{")}, {"Complete entry but without id", true, QStringLiteral("@entry{,\n title=\"{Abc Def}\",\n month = jan\n}")}, {"Entry without any data", true, QStringLiteral("@entry{}")}, {"Entry up to entry id, but no closing curly bracket", false, QStringLiteral("@entry{test")}, {"Entry up to entry id with opening curly bracket", false, QStringLiteral("@entry{test{")}, {"Entry up to entry id with closing curly bracket", true, QStringLiteral("@entry{test}")}, {"Entry up to comma after entry id", false, QStringLiteral("@entry{test,")}, {"Entry up to comma after entry id, followed by closing curly bracket", true, QStringLiteral("@entry{test,}")}, {"Entry up to first field's key, but nothing more, not even an assign char", false, QStringLiteral("@entry{test,title")}, {"Entry up to first field's key, but nothing more, just a closing curly bracket", false, QStringLiteral("@entry{test,title}")}, {"Entry up to first field's assign char, but nothing more", false, QStringLiteral("@entry{test,title=")}, {"Entry up to first field's assign char, but nothing more, just a closing curly bracket", false, QStringLiteral("@entry{test,title=}")}, {"Invalid combination of curly bracket in a field's value (1)", false, QStringLiteral("@entry{test,title={}")}, {"Invalid combination of curly bracket in a field's value (2)", false, QStringLiteral("@entry{test,title={{}}")}, {"Invalid combination of curly bracket in a field's value (3)", false, QStringLiteral("@entry{test,title={}{}")}, {"Invalid combination of curly bracket in a field's value (4)", false, QStringLiteral("@entry{test,title={}{}}")}, {"Complete entry with empty title (1)", true, QStringLiteral("@entry{test,\n title=\"{}\"\n}")}, {"Complete entry with empty title (2)", true, QStringLiteral("@entry{test,\n title=\"\"\n}")}, {"Complete entry with empty title (3)", true, QStringLiteral("@entry{test,\n title={{}}\n}")}, {"Complete entry with empty title (4)", true, QStringLiteral("@entry{test,\n title={}\n}")}, {"Entry abruptly ending at macro key as field value (1)", false, QStringLiteral("@entry{test,\n month = jan")}, {"Entry abruptly ending at macro key as field value (2)", false, QStringLiteral("@entry{test,\n month = jan\n")}, // TODO more tests {"Complete entry", true, QStringLiteral("@entry{test,\n title=\"{Abc Def}\",\n month = jan\n}")} }; for (const auto &bibTeXDataRow : bibTeXDataTable) QTest::newRow(bibTeXDataRow.label) << bibTeXDataRow.isValid << bibTeXDataRow.text; } void KBibTeXIOTest::partialBibTeXInput() { QFETCH(bool, isValid); QFETCH(QString, text); bool gotErrors = false; FileImporterBibTeX importer(this); connect(&importer, &FileImporter::message, [&gotErrors](const FileImporter::MessageSeverity messageSeverity, const QString &messageText) { gotErrors |= messageSeverity >= FileImporter::MessageSeverity::Error; Q_UNUSED(messageText) //qCDebug(LOG_KBIBTEX_TEST)<<"FileImporterBibTeX issues message during 'partialBibTeXInput' test: "<<messageText; }); QScopedPointer<File> bibTeXfile(importer.fromString(text)); QVERIFY(text.isEmpty() || isValid != gotErrors); QVERIFY(isValid ? (!bibTeXfile.isNull() && bibTeXfile->count() == 1) : (bibTeXfile.isNull() || bibTeXfile->count() == 0)); } void KBibTeXIOTest::partialRISInput_data() { QTest::addColumn<bool>("isValid"); QTest::addColumn<QString>("text"); static const struct RISDataTable { const char *label; const bool isValid; const QString text; } risDataTable[] = { //{"Empty string", false, QString()}, {"Incorrect year", true, QStringLiteral("TY - JOUR\nAU - Shannon, Claude E.\nPY - 5555/07//\nTI - A Mathematical Theory of Communication\nT2 - Bell System Technical Journal\nSP - 379\nEP - 423\nVL - 27\nER -")}, {"Incorrect month", true, QStringLiteral("TY - JOUR\nAU - Shannon, Claude E.\nPY - 1948/17//\nTI - A Mathematical Theory of Communication\nT2 - Bell System Technical Journal\nSP - 379\nEP - 423\nVL - 27\nER -")}, {"Entry does not end with 'ER'", true, QStringLiteral("TY - JOUR\nAU - Shannon, Claude E.\nPY - 1948/07//\nTI - A Mathematical Theory of Communication\nT2 - Bell System Technical Journal\nSP - 379\nEP - 423\nVL - 27")}, // TODO more tests //{"Complete entry", true, QStringLiteral("TY - JOUR\nAU - Shannon, Claude E.\nPY - 1948/07//\nTI - A Mathematical Theory of Communication\nT2 - Bell System Technical Journal\nSP - 379\nEP - 423\nVL - 27\nER -")} }; for (const auto &risDataRow : risDataTable) QTest::newRow(risDataRow.label) << risDataRow.isValid << risDataRow.text; } void KBibTeXIOTest::partialRISInput() { QFETCH(bool, isValid); QFETCH(QString, text); bool gotErrors = false; FileImporterRIS importer(this); connect(&importer, &FileImporter::message, [&gotErrors](const FileImporter::MessageSeverity messageSeverity, const QString &messageText) { gotErrors |= messageSeverity >= FileImporter::MessageSeverity::Error; Q_UNUSED(messageText) //qCDebug(LOG_KBIBTEX_TEST)<<"FileImporterRIS issues message during 'partialBibTeXInput' test: "<<messageText; }); QScopedPointer<File> bibTeXfile(importer.fromString(text)); QVERIFY(text.isEmpty() || isValid != gotErrors); QVERIFY(isValid ? (!bibTeXfile.isNull() && bibTeXfile->count() == 1) : (bibTeXfile.isNull() || bibTeXfile->count() == 0)); } void KBibTeXIOTest::initTestCase() { qRegisterMetaType<FileImporter::MessageSeverity>(); } QTEST_MAIN(KBibTeXIOTest) #include "kbibtexiotest.moc"