diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4cc844f1..2ac31363 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,215 +1,216 @@ # cmake-options : -DCMAKE_DISABLE_FIND_PACKAGE_LibKonq=TRUE or FALSE; default is FALSE add_definitions(-DTRANSLATION_DOMAIN=\"konsole\") # When Qt5.9+ is required, consider using QOperatingSystemVersion ### Too many crashes/issues with detaching on MacOSX IF(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") set(ENABLE_DETACHING 0) else() set(ENABLE_DETACHING 1) endif() ### Handle DragonFlyBSD here instead of using __DragonFly__ IF(${CMAKE_SYSTEM_NAME} MATCHES "DragonFly") set(HAVE_OS_DRAGONFLYBSD 1) else() set(HAVE_OS_DRAGONFLYBSD 0) endif() include(CheckIncludeFiles) include(ECMAddAppIcon) configure_file(config-konsole.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/config-konsole.h) ### Tests if(BUILD_TESTING) find_package(Qt5Test ${QT_MIN_VERSION} CONFIG REQUIRED) add_subdirectory(autotests) add_subdirectory(tests) endif() ### Security concerns about sendText and runCommand dbus methods being public option(REMOVE_SENDTEXT_RUNCOMMAND_DBUS_METHODS "Konsole: remove sendText and runCommand dbus methods" OFF) -### Font Embedder and LineFont.h +### Development tools option(KONSOLE_BUILD_FONTEMBEDDER "Konsole: build fontembedder executable" OFF) option(KONSOLE_GENERATE_LINEFONT "Konsole: regenerate LineFont file" OFF) +option(KONSOLE_BUILD_UNI2CHARACTERWIDTH "Konsole: build uni2characterwidth executable" OFF) ### Konsole source files shared between embedded terminal and main application # qdbuscpp2xml -m Session.h -o org.kde.konsole.Session.xml # qdbuscpp2xml -M -s ViewManager.h -o org.kde.konsole.Konsole.xml # Generate dbus .xml files; do not store .xml in source folder qt5_generate_dbus_interface(Session.h org.kde.konsole.Session.xml OPTIONS -m) qt5_generate_dbus_interface(ViewManager.h org.kde.konsole.Window.xml OPTIONS -m) qt5_add_dbus_adaptor(sessionadaptors_SRCS ${CMAKE_CURRENT_BINARY_DIR}/org.kde.konsole.Session.xml Session.h Konsole::Session) qt5_add_dbus_adaptor(windowadaptors_SRCS ${CMAKE_CURRENT_BINARY_DIR}/org.kde.konsole.Window.xml ViewManager.h Konsole::ViewManager) set(konsoleprivate_SRCS ${sessionadaptors_SRCS} ${windowadaptors_SRCS} BookmarkHandler.cpp ColorScheme.cpp ColorSchemeManager.cpp ColorSchemeEditor.cpp CopyInputDialog.cpp EditProfileDialog.cpp Emulation.cpp DetachableTabBar.cpp Filter.cpp History.cpp HistorySizeDialog.cpp HistorySizeWidget.cpp IncrementalSearchBar.cpp KeyBindingEditor.cpp KeyboardTranslator.cpp KeyboardTranslatorManager.cpp ProcessInfo.cpp Profile.cpp ProfileList.cpp ProfileReader.cpp ProfileWriter.cpp ProfileManager.cpp Pty.cpp RenameTabDialog.cpp RenameTabWidget.cpp Screen.cpp ScreenWindow.cpp ScrollState.cpp Session.cpp SessionController.cpp SessionManager.cpp SessionListModel.cpp ShellCommand.cpp TabTitleFormatButton.cpp TerminalCharacterDecoder.cpp ExtendedCharTable.cpp TerminalDisplay.cpp TerminalDisplayAccessible.cpp ViewContainer.cpp ViewManager.cpp ViewProperties.cpp ViewSplitter.cpp Vt102Emulation.cpp ZModemDialog.cpp PrintOptions.cpp konsole_wcwidth.cpp WindowSystemInfo.cpp ${CMAKE_CURRENT_BINARY_DIR}/org.kde.konsole.Window.xml ${CMAKE_CURRENT_BINARY_DIR}/org.kde.konsole.Session.xml) ecm_qt_declare_logging_category(konsoleprivate_SRCS HEADER konsoledebug.h IDENTIFIER KonsoleDebug CATEGORY_NAME org.kde.konsole) kconfig_add_kcfg_files(konsoleprivate_SRCS settings/KonsoleSettings.kcfgc) set(konsole_LIBS KF5::XmlGui Qt5::PrintSupport Qt5::Xml KF5::Notifications KF5::WindowSystem KF5::TextWidgets KF5::GuiAddons KF5::IconThemes KF5::Bookmarks KF5::I18n KF5::Pty KF5::KIOWidgets KF5::DBusAddons KF5::GlobalAccel KF5::NewStuff ) if(${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD") #kinfo_getfile() is in libutil list(APPEND konsole_LIBS util) endif() ### Konsole Application ki18n_wrap_ui(konsoleprivate_SRCS ColorSchemeEditor.ui CopyInputDialog.ui EditProfileDialog.ui KeyBindingEditor.ui RenameTabDialog.ui RenameTabWidget.ui HistorySizeDialog.ui HistorySizeWidget.ui PrintOptions.ui settings/FileLocationSettings.ui settings/GeneralSettings.ui settings/PartInfo.ui settings/ProfileSettings.ui settings/TabBarSettings.ui) # add the resource files for the ui files qt5_add_resources( konsoleprivate_SRCS ../desktop/konsole.qrc) add_library(konsoleprivate ${konsoleprivate_SRCS}) generate_export_header(konsoleprivate BASE_NAME konsoleprivate) target_link_libraries(konsoleprivate PUBLIC ${konsole_LIBS}) set_target_properties(konsoleprivate PROPERTIES VERSION ${KONSOLEPRIVATE_VERSION_STRING} SOVERSION ${KONSOLEPRIVATE_SOVERSION} ) install(TARGETS konsoleprivate ${KDE_INSTALL_TARGETS_DEFAULT_ARGS} LIBRARY NAMELINK_SKIP) set(konsole_KDEINIT_SRCS Application.cpp MainWindow.cpp main.cpp settings/FileLocationSettings.cpp settings/GeneralSettings.cpp settings/ProfileSettings.cpp settings/TabBarSettings.cpp) # Sets the icon on Windows and OSX file(GLOB ICONS_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/../data/icons/*.png") ecm_add_app_icon(kdeinit_konsole ICONS ${ICONS_SRCS}) kf5_add_kdeinit_executable(konsole ${konsole_KDEINIT_SRCS}) target_link_libraries(kdeinit_konsole konsoleprivate KF5::XmlGui KF5::WindowSystem KF5::Bookmarks KF5::I18n KF5::KIOWidgets KF5::NotifyConfig KF5::Crash ) if(APPLE) set_target_properties(konsole PROPERTIES MACOSX_BUNDLE_GUI_IDENTIFIER "org.kde.konsole" MACOSX_BUNDLE_BUNDLE_NAME "Konsole" MACOSX_BUNDLE_DISPLAY_NAME "Konsole" MACOSX_BUNDLE_INFO_STRING "Konsole, the KDE terminal emulator" MACOSX_BUNDLE_LONG_VERSION_STRING "Konsole ${KDE_APPLICATIONS_VERSION}" MACOSX_BUNDLE_SHORT_VERSION_STRING "${KDE_APPLICATIONS_VERSION_MAJOR}.${KDE_APPLICATIONS_VERSION_MINOR}" MACOSX_BUNDLE_BUNDLE_VERSION "${KDE_APPLICATIONS_VERSION}" MACOSX_BUNDLE_COPYRIGHT "1997-2016 The Konsole Developers") endif() install(TARGETS kdeinit_konsole konsole ${KDE_INSTALL_TARGETS_DEFAULT_ARGS}) ### Embedded Konsole KPart set(konsolepart_PART_SRCS Part.cpp settings/PartInfo.cpp settings/ProfileSettings.cpp) add_library(konsolepart MODULE ${konsolepart_PART_SRCS}) generate_export_header(konsolepart BASE_NAME konsole) kcoreaddons_desktop_to_json(konsolepart ../desktop/konsolepart.desktop) set_target_properties(konsolepart PROPERTIES DEFINE_SYMBOL KONSOLE_PART) target_link_libraries(konsolepart KF5::Parts KF5::XmlGui konsoleprivate) install(TARGETS konsolepart DESTINATION ${KDE_INSTALL_PLUGINDIR}) diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 7fcc7752..284160a1 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -1,19 +1,20 @@ ### konsoleprofile command-line tool install(PROGRAMS konsoleprofile DESTINATION ${KDE_INSTALL_BINDIR}) ### Line graphics font ### Attempting to auto-create LineFont.h for multiple systems is a headache. ### If LineFont.h is needed to be recreated use: ### fontembedder LineFont.src > LineFont.h ### Then commit the new LineFont.h if(KONSOLE_BUILD_FONTEMBEDDER OR KONSOLE_GENERATE_LINEFONT) find_package(Qt5Core ${QT_MIN_VERSION} CONFIG REQUIRED) ### Font Embedder set(fontembedder_SRCS fontembedder.cpp) add_executable(fontembedder ${fontembedder_SRCS}) target_link_libraries(fontembedder Qt5::Core) endif() +add_subdirectory( uni2characterwidth ) diff --git a/tools/uni2characterwidth/CMakeLists.txt b/tools/uni2characterwidth/CMakeLists.txt new file mode 100644 index 00000000..e1af4121 --- /dev/null +++ b/tools/uni2characterwidth/CMakeLists.txt @@ -0,0 +1,30 @@ +### uni2characterwidth +### +### Converts Unicode Character Database files into character width lookup +### tables. Uses a template file to place the tables in a source code file +### together with a function for finding the width for specified character. +### +### See `uni2characterwidth --help` for usage information +if(KONSOLE_BUILD_UNI2CHARACTERWIDTH) + + find_package(Qt5 ${QT_MIN_VERSION} CONFIG REQUIRED + Core + ) + find_package(KF5 ${KF5_MIN_VERSION} REQUIRED + KIO + ) + + set(uni2characterwidth_SRC + uni2characterwidth.cpp + properties.h + template.cpp + template.h + ) + + add_executable(uni2characterwidth ${uni2characterwidth_SRC}) + target_link_libraries(uni2characterwidth + Qt5::Core + KF5::KIOCore + ) + +endif() diff --git a/tools/uni2characterwidth/properties.h b/tools/uni2characterwidth/properties.h new file mode 100644 index 00000000..1706ea9f --- /dev/null +++ b/tools/uni2characterwidth/properties.h @@ -0,0 +1,78 @@ +#ifndef CATEGORY_PROPERTY_VALUE +#define CATEGORY_PROPERTY_VALUE(val, sym, intVal) +#endif +#ifndef CATEGORY_PROPERTY_GROUP +#define CATEGORY_PROPERTY_GROUP(val, sym, intVal) +#endif + +CATEGORY_PROPERTY_VALUE(Lu, UppercaseLetter, 1<<0) // an uppercase letter +CATEGORY_PROPERTY_VALUE(Ll, LowercaseLetter, 1<<1) // a lowercase letter +CATEGORY_PROPERTY_VALUE(Lt, TitlecaseLetter, 1<<2) // a digraphic character, with first part uppercase +CATEGORY_PROPERTY_GROUP(LC, CasedLetter, 1<<0|1<<1|1<<2) +CATEGORY_PROPERTY_VALUE(Lm, ModifierLetter, 1<<3) // a modifier letter +CATEGORY_PROPERTY_VALUE(Lo, OtherLetter, 1<<4) // other letters, including syllables and ideographs +CATEGORY_PROPERTY_GROUP(L, Letter, 1<<0|1<<1|1<<2|1<<3|1<<4) +CATEGORY_PROPERTY_VALUE(Mn, NonspacingMark, 1<<5) // a nonspacing combining mark (zero advance width) +CATEGORY_PROPERTY_VALUE(Mc, SpacingMark, 1<<6) // a spacing combining mark (positive advance width) +CATEGORY_PROPERTY_VALUE(Me, EnclosingMark, 1<<7) // an enclosing combining mark +CATEGORY_PROPERTY_GROUP(M, Mark, 1<<5|1<<6|1<<7) +CATEGORY_PROPERTY_VALUE(Nd, DecimalNumber, 1<<8) // a decimal digit +CATEGORY_PROPERTY_VALUE(Nl, LetterNumber, 1<<9) // a letterlike numeric character +CATEGORY_PROPERTY_VALUE(No, OtherNumber, 1<<10) // a numeric character of other type +CATEGORY_PROPERTY_GROUP(N, Number, 1<<8|1<<9|1<<10) +CATEGORY_PROPERTY_VALUE(Pc, ConnectorPunctuation, 1<<11) // a connecting punctuation mark, like a tie +CATEGORY_PROPERTY_VALUE(Pd, DashPunctuation, 1<<12) // a dash or hyphen punctuation mark +CATEGORY_PROPERTY_VALUE(Ps, OpenPunctuation, 1<<13) // an opening punctuation mark (of a pair) +CATEGORY_PROPERTY_VALUE(Pe, ClosePunctuation, 1<<14) // a closing punctuation mark (of a pair) +CATEGORY_PROPERTY_VALUE(Pi, InitialPunctuation, 1<<15) // an initial quotation mark +CATEGORY_PROPERTY_VALUE(Pf, FinalPunctuation, 1<<16) // a final quotation mark +CATEGORY_PROPERTY_VALUE(Po, OtherPunctuation, 1<<17) // a punctuation mark of other type +CATEGORY_PROPERTY_GROUP(P, Punctuation, 1<<11|1<<12|1<<13|1<<14|1<<15|1<<16|1<<17) +CATEGORY_PROPERTY_VALUE(Sm, MathSymbol, 1<<18) // a symbol of mathematical use +CATEGORY_PROPERTY_VALUE(Sc, CurrencySymbol, 1<<19) // a currency sign +CATEGORY_PROPERTY_VALUE(Sk, ModifierSymbol, 1<<20) // a non-letterlike modifier symbol +CATEGORY_PROPERTY_VALUE(So, OtherSymbol, 1<<21) // a symbol of other type +CATEGORY_PROPERTY_GROUP(S, Symbol, 1<<18|1<<19|1<<20|1<<21) +CATEGORY_PROPERTY_VALUE(Zs, SpaceSeparator, 1<<22) // a space character (of various non-zero widths) +CATEGORY_PROPERTY_VALUE(Zl, LineSeparator, 1<<23) // U+2028 LINE SEPARATOR only +CATEGORY_PROPERTY_VALUE(Zp, ParagraphSeparator, 1<<24) // U+2029 PARAGRAPH SEPARATOR only +CATEGORY_PROPERTY_GROUP(Z, Separator, 1<<22|1<<23|1<<24) +CATEGORY_PROPERTY_VALUE(Cc, Control, 1<<25) // a C0 or C1 control code +CATEGORY_PROPERTY_VALUE(Cf, Format, 1<<26) // a format control character +CATEGORY_PROPERTY_VALUE(Cs, Surrogate, 1<<27) // a surrogate code point +CATEGORY_PROPERTY_VALUE(Co, PrivateUse, 1<<28) // a private-use character +CATEGORY_PROPERTY_VALUE(Cn, Unassigned, 1<<29) // a reserved unassigned code point or a noncharacter +CATEGORY_PROPERTY_GROUP(C, Other, 1<<25|1<<26|1<<27|1<<28|1<<29) + +#undef CATEGORY_PROPERTY_VALUE +#undef CATEGORY_PROPERTY_GROUP + +/**************************************/ + +#ifndef EAST_ASIAN_WIDTH_PROPERTY_VALUE +#define EAST_ASIAN_WIDTH_PROPERTY_VALUE(val, sym, intVal) +#endif + +EAST_ASIAN_WIDTH_PROPERTY_VALUE(A, Ambiguous, 1<<0) +EAST_ASIAN_WIDTH_PROPERTY_VALUE(F, Fullwidth, 1<<1) +EAST_ASIAN_WIDTH_PROPERTY_VALUE(H, Halfwidth, 1<<2) +EAST_ASIAN_WIDTH_PROPERTY_VALUE(N, Neutral, 1<<3) +EAST_ASIAN_WIDTH_PROPERTY_VALUE(Na, Narrow, 1<<4) +EAST_ASIAN_WIDTH_PROPERTY_VALUE(W, Wide, 1<<5) + +#undef EAST_ASIAN_WIDTH_PROPERTY_VALUE + +/**************************************/ + +#ifndef EMOJI_PROPERTY_VALUE +#define EMOJI_PROPERTY_VALUE(val, sym, intVal) +#endif + +EMOJI_PROPERTY_VALUE(, None, 0) +EMOJI_PROPERTY_VALUE(Emoji, Emoji, 1<<0) +EMOJI_PROPERTY_VALUE(Emoji_Presentation, EmojiPresentation, 1<<1) +EMOJI_PROPERTY_VALUE(Emoji_Modifier, EmojiModifier, 1<<2) +EMOJI_PROPERTY_VALUE(Emoji_Modifier_Base, EmojiModifier_Base, 1<<3) +EMOJI_PROPERTY_VALUE(Emoji_Component, EmojiComponent, 1<<4) + +#undef EMOJI_PROPERTY_VALUE diff --git a/tools/uni2characterwidth/template.cpp b/tools/uni2characterwidth/template.cpp new file mode 100644 index 00000000..8198b667 --- /dev/null +++ b/tools/uni2characterwidth/template.cpp @@ -0,0 +1,404 @@ +/* + This file is part of Konsole, a terminal emulator for KDE. + + Copyright 2018 by Mariusz Glebocki + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. +*/ + +#include +#include +#include +#include +#include +#include +#include "template.h" + +static const QString unescape(const QStringRef &str) { + QString result; + result.reserve(str.length()); + for(int i = 0; i < str.length(); ++i) { + if(str[i] == QLatin1Char('\\') && i < str.length() - 1) + result += str[++i]; + else + result += str[i]; + } + return result; +} + +// +// Template::Element +// +const QString Template::Element::findFmt(Var::DataType type) const { + const Template::Element *element; + for(element = this; element != nullptr; element = element->parent) { + if(!element->fmt.isEmpty() && isValidFmt(element->fmt, type)) { + return element->fmt; + } + } + return defaultFmt(type); +} + +QString Template::Element::path() const { + QStringList namesList; + const Template::Element *element; + for(element = this; element != nullptr; element = element->parent) { + if(!element->hasName() && element->parent != nullptr) { + QString anonName = QStringLiteral("[anon]"); + for(int i = 0; i < element->parent->children.size(); ++i) { + if(&element->parent->children[i] == element) { + anonName = QStringLiteral("[%1]").arg(i); + break; + } + } + namesList.prepend(anonName); + } else { + namesList.prepend(element->name); + } + } + return namesList.join(QLatin1Char('.')); +} + +const QString Template::Element::defaultFmt(Var::DataType type) { + switch(type) { + case Var::DataType::Number: return QStringLiteral("%d"); + case Var::DataType::String: return QStringLiteral("%s"); + default: Q_UNREACHABLE(); + } +} + +bool Template::Element::isValidFmt(const QString &fmt, Var::DataType type) { + switch(type) { + case Var::DataType::String: return fmt.endsWith(QLatin1Char('s')); + case Var::DataType::Number: return true; // regexp in parser takes care of it + default: return false; + } +} + +// +// Template +// + +Template::Template(const QString &text): _text(text) { + _root.name = QStringLiteral("[root]"); + _root.outer = QStringRef(&_text); + _root.inner = QStringRef(&_text); + _root.parent = nullptr; + _root.line = 1; + _root.column = 1; +} + +void Template::parse() { + _root.children.clear(); + _root.outer = QStringRef(&_text); + _root.inner = QStringRef(&_text); + parseRecursively(_root); +// dbgDumpTree(_root); +} + +QString Template::generate(const Var &data) { + QString result; + result.reserve(_text.size()); + generateRecursively(result, _root, data); + return result; +} + +static inline void warn(const Template::Element &element, const QString &id, const QString &msg) { + const QString path = id.isEmpty() ? element.path() : Template::Element(&element, id).path(); + qWarning() << QStringLiteral("Warning: %1:%2: %3: %4").arg(element.line).arg(element.column).arg(path, msg); +} +static inline void warn(const Template::Element &element, const QString &msg) { + warn(element, QString(), msg); +} + +void Template::executeCommand(Element &element, const Template::Element &childStub, const QStringList &argv) { + // Insert content N times + if(argv[0] == QStringLiteral("repeat")) { + bool ok; + unsigned count = argv.value(1).toInt(&ok); + if(!ok || count < 1) { + warn(element, QStringLiteral("!") + argv[0], QStringLiteral("invalid repeat count (%1), assuming 0.").arg(argv[1])); + return; + } + + element.children.append(childStub); + Template::Element &cmdElement = element.children.last(); + if(!cmdElement.inner.isEmpty()) { + // Parse children + parseRecursively(cmdElement); + // Remember how many children was there before replication + int originalChildrenCount = cmdElement.children.size(); + // Replicate children + for(unsigned i = 1; i < count; ++i) { + for(int chId = 0; chId < originalChildrenCount; ++chId) { + cmdElement.children.append(cmdElement.children[chId]); + } + } + } + // Set printf-like format (with leading %) applied for strings and numbers + // inside the group + } else if(argv[0] == QStringLiteral("fmt")) { + static const QRegularExpression FMT_RE(QStringLiteral(R":(^%[-0 +#]?(?:[1-9][0-9]*)?\.?[0-9]*[diouxXs]$):"), + QRegularExpression::OptimizeOnFirstUsageOption); + const auto match = FMT_RE.match(argv.value(1)); + QString fmt = QStringLiteral(""); + if(!match.hasMatch()) + warn(element, QStringLiteral("!") + argv[0], QStringLiteral("invalid format (%1), assuming default").arg(argv[1])); + else + fmt = match.captured(); + + element.children.append(childStub); + Template::Element &cmdElement = element.children.last(); + cmdElement.fmt = fmt; + parseRecursively(cmdElement); + } +} + +void Template::parseRecursively(Element &element) { + static const QRegularExpression RE(QStringLiteral(R":((?'comment'«\*(([^:]*):)?.*?(?(-2):\g{-1})\*»)|):" + R":(«(?:(?'name'[-_a-zA-Z0-9]*)|(?:!(?'cmd'[-_a-zA-Z0-9]+(?: +(?:[^\\:]+|(?:\\.)+)+)?)))):" + R":((?::(?:~[ \t]*\n)?(?'inner'(?:[^«]*?|(?R))*))?(?:\n[ \t]*~)?»):"), + QRegularExpression::DotMatchesEverythingOption | QRegularExpression::MultilineOption | + QRegularExpression::OptimizeOnFirstUsageOption); + static const QRegularExpression CMD_SPLIT_RE(QStringLiteral(R":((?:"((?:(?:\\.)*|[^"]*)*)"|(?:[^\\ "]+|(?:\\.)+)+)):"), + QRegularExpression::DotMatchesEverythingOption | QRegularExpression::MultilineOption | + QRegularExpression::OptimizeOnFirstUsageOption); + static const QRegularExpression UNESCAPE_RE(QStringLiteral(R":(\\(.)):"), + QRegularExpression::DotMatchesEverythingOption | QRegularExpression::MultilineOption | + QRegularExpression::OptimizeOnFirstUsageOption); + static const QString nameGroupName = QStringLiteral("name"); + static const QString innerGroupName = QStringLiteral("inner"); + static const QString cmdGroupName = QStringLiteral("cmd"); + static const QString commentGroupName = QStringLiteral("comment"); + + int posOffset = element.outer.position(); + uint posLine = element.line; + uint posColumn = element.column; + + auto matchIter = RE.globalMatch(element.inner); + while(matchIter.hasNext()) { + auto match = matchIter.next(); + auto cmd = match.captured(cmdGroupName); + auto comment = match.captured(commentGroupName); + + const auto localOuterRef = match.capturedRef(0); + const auto localInnerRef = match.capturedRef(innerGroupName); + + auto outerRef = QStringRef(&_text, localOuterRef.position(), localOuterRef.length()); + auto innerRef = QStringRef(&_text, localInnerRef.position(), localInnerRef.length()); + + while(posOffset < outerRef.position() && posOffset < _text.size()) { + if(_text[posOffset++] == QLatin1Char('\n')) { + ++posLine; + posColumn = 1; + } else { + ++posColumn; + } + } + + if(!cmd.isEmpty()) { + QStringList cmdArgv; + auto cmdArgIter = CMD_SPLIT_RE.globalMatch(cmd); + while(cmdArgIter.hasNext()) { + auto cmdArg = cmdArgIter.next(); + cmdArgv += cmdArg.captured(cmdArg.captured(1).isEmpty() ? 0 : 1); + cmdArgv.last().replace(UNESCAPE_RE, QStringLiteral("\1")); + } + + Template::Element childStub = Template::Element(&element); + childStub.outer = outerRef; + childStub.name = QLatin1Char('!') + cmd; + childStub.inner = innerRef; + childStub.line = posLine; + childStub.column = posColumn; + executeCommand(element, childStub, cmdArgv); + } else if (!comment.isEmpty()) { + element.children.append(Element(&element)); + Template::Element &child = element.children.last(); + child.outer = outerRef; + child.name = QString(); + child.inner = QStringRef(); + child.line = posLine; + child.column = posColumn; + child.isComment = true; + } else { + element.children.append(Element(&element)); + Template::Element &child = element.children.last(); + child.outer = outerRef; + child.name = match.captured(nameGroupName); + child.inner = innerRef; + child.line = posLine; + child.column = posColumn; + if(!child.inner.isEmpty()) + parseRecursively(child); + } + } +} + +int Template::generateRecursively(QString &result, const Template::Element &element, const Var &data, int consumed) { + int consumedDataItems = consumed; + + if(!element.children.isEmpty()) { + int totalDataItems; + switch(data.dataType()) { + case Var::DataType::Number: + case Var::DataType::String: + case Var::DataType::Map: + totalDataItems = 1; + break; + case Var::DataType::Vector: + totalDataItems = data.vec.size(); + break; + case Var::DataType::Invalid: + default: + Q_UNREACHABLE(); + } + + while(consumedDataItems < totalDataItems) { + int prevChildEndPosition = element.inner.position(); + for(const auto &child: element.children) { + const int characterCountBetweenChildren = child.outer.position() - prevChildEndPosition; + if(characterCountBetweenChildren > 0) { + // Add text between previous child (or inner beginning) and this child. + result += unescape(_text.midRef(prevChildEndPosition, characterCountBetweenChildren)); + } else if(characterCountBetweenChildren < 0) { + // Repeated item; they overlap and end1 > start2 + result += unescape(element.inner.mid(prevChildEndPosition - element.inner.position())); + result += unescape(element.inner.left(child.outer.position() - element.inner.position())); + } + + switch(data.dataType()) { + case Var::DataType::Number: + case Var::DataType::String: + generateRecursively(result, child, data); + consumedDataItems = 1; // Deepest child always consumes number/string + break; + case Var::DataType::Vector: + if(!data.vec.isEmpty()) { + if(!child.hasName() && !child.isCommand() && consumedDataItems < data.vec.size()) { + consumedDataItems += generateRecursively(result, child, data[consumedDataItems]); + } else { + consumedDataItems += generateRecursively(result, child, data.vec.mid(consumedDataItems)); + } + } else { + warn(child, QStringLiteral("no more items available in parent's list.")); + } + break; + case Var::DataType::Map: + if(!child.hasName()) { + consumedDataItems = generateRecursively(result, child, data); + } else if(data.map.contains(child.name)) { + generateRecursively(result, child, data.map[child.name]); + // Always consume, repeating doesn't change anything + consumedDataItems = 1; + } else { + warn(child, QStringLiteral("missing value for the element in parent's map.")); + } + break; + default: + break; + } + prevChildEndPosition = child.outer.position() + child.outer.length(); + } + + result += unescape(element.inner.mid(prevChildEndPosition - element.inner.position(), -1)); + + if(element.isCommand()) { + break; + } + + const bool isLast = consumedDataItems >= totalDataItems; + if(!isLast) { + // Collapse empty lines between elements + int nlNum = 0; + for(int i = 0; i < element.inner.size() / 2; ++i) { + if(element.inner.at(i) == QLatin1Char('\n') && + element.inner.at(i) == element.inner.at(element.inner.size() - i - 1)) + nlNum++; + else + break; + } + if(nlNum > 0) + result.chop(nlNum); + } + } + } else if (!element.isComment) { + // Handle leaf element + switch(data.dataType()) { + case Var::DataType::Number: { + const QString fmt = element.findFmt(Var::DataType::Number); + result += QString::asprintf(qUtf8Printable(fmt), data.num); + break; + } + case Var::DataType::String: { + const QString fmt = element.findFmt(Var::DataType::String); + result += QString::asprintf(qUtf8Printable(fmt), qUtf8Printable(data.str)); + break; + } + case Var::DataType::Vector: + if(data.vec.isEmpty()) { + warn(element, QStringLiteral("got empty list.")); + } else if(data.vec.at(0).dataType() == Var::DataType::Number) { + const QString fmt = element.findFmt(Var::DataType::Number); + result += QString::asprintf(qUtf8Printable(fmt), data.num); + } else if(data.vec.at(0).dataType() == Var::DataType::String) { + const QString fmt = element.findFmt(Var::DataType::String); + result += QString::asprintf(qUtf8Printable(fmt), qUtf8Printable(data.str)); + } else { + warn(element, QStringLiteral("the list entry data type (%1) is not supported in childrenless elements."). + arg(data.vec.at(0).dataTypeAsString())); + } + break; + case Var::DataType::Map: + warn(element, QStringLiteral("map type is not supported in childrenless elements.")); + break; + case Var::DataType::Invalid: + break; + } + consumedDataItems = 1; + } + + return consumedDataItems; +} + +/* +void dbgDumpTree(const Template::Element &element) { + static int indent = 0; + QString type; + if(element.isCommand()) + type = QStringLiteral("command"); + else if(element.isComment) + type = QStringLiteral("comment"); + else if(element.hasName() && element.inner.isEmpty()) + type = QStringLiteral("empty named"); + else if(element.hasName()) + type = QStringLiteral("named"); + else if(element.inner.isEmpty()) + type = QStringLiteral("empty anonymous"); + else + type = QStringLiteral("anonymous"); + + qDebug().noquote() << QStringLiteral("%1[%2] \"%3\" %4:%5") + .arg(QStringLiteral("· ").repeated(indent), type, element.name) + .arg(element.line) + .arg(element.column); + indent++; + for(const auto &child: element.children) { + dbgDumpTree(child); + } + indent--; +} +*/ diff --git a/tools/uni2characterwidth/template.example b/tools/uni2characterwidth/template.example new file mode 100644 index 00000000..7768c94b --- /dev/null +++ b/tools/uni2characterwidth/template.example @@ -0,0 +1,77 @@ +«*COMMENT:---------------------------------------------------------------------- + +Tags: + +«*anything:comment where everything but closing sequence is allowed:anything*» + +«NAME:any content, including other tags. \: have to be escaped. It is processed +using data passed from code() function under NAME key. It should contain other +tags, without them this text will be replaced with passed data or removed.» + +«NAME» - like before, used when data should replace it, so content is + unnecessary + +EXAMPLE: +data: Map{ "exampleA", Map{ { "Number", 42 }, { "String", "hello" } } } +template: «exampleA:number\: «Number», string\: «String»» +result: number: 42, string: hello + +«» - empty anonymous element. Used in named elements which receive lists. + The element will be replaced with list item, and duplicated if + +«:anonymous container. It should contain some elements which receive data. +The element will disappear when child element will not receive any value. +Useful to add suffixes/prefixes to data» + +EXAMPLE: +data: Map{ "exampleB", Vector{ 1, 2, 3, 4, 5, 6, 7 } } +template: «exampleB:«:[«»] »» +result: [1] [2] [3] [4] [5] [6] [7] + +data: Map{ "exampleC", Vector{ "a", "b", "c" } } +template: «exampleC:«:first = «»»«:, second = «»»«:, third = «»»«:, fourth = «»»» +result: first = a, second = b, third = c + +«!fmt "XXX":a wrapper which sets printf-like format XXX for numbers and +strings inside it. Starts with %.» + +«!repeat N:repeats contents inside N times.» + +EXAMPLE: +data: Map{ "exampleD", Vector{ 1, 2, 3, 4, 10, 11, 12, 13 } } +template: «exampleD:«!fmt "%#.2x":«!repeat 3:«» »«»; »» +result: 0x01 0x02 0x03 0x04; 0x0a 0x0b 0x0c 0x0d; + +D: «exampleD:«!fmt "%#.2x":«!repeat 3:«» »«»; »» +----------------------------------------------------------------------:COMMENT*» +For available data see code() function. Below are usage examples + +Warning about generated file - putting "this is a generated file" text in a +template file could be misleading. +«gen-file-warning» + + +Command used to generate the file: +«cmdline» + + +Direct LUT - widths of the first 256 code points in direct access array: +{«!fmt "% d":«direct-lut: + «!repeat 32:«:«»,»» +»»} + + +Arrays with code point ranges for every width: +«ranges-luts:«: +«name» = {«!fmt "%#.6x":«ranges: + «!repeat 8:«:{«first»,«last»},»» +»»} +Number of elements in the array: «size» + +»» +List of array names, sizes, and widths: +{«ranges-lut-list: + «:{«!fmt "% d":«width»», «!fmt "%-16s":«name»», «size»},» +»} +Number of elements in the array: «ranges-lut-list-size»; + diff --git a/tools/uni2characterwidth/template.h b/tools/uni2characterwidth/template.h new file mode 100644 index 00000000..4d4c0b71 --- /dev/null +++ b/tools/uni2characterwidth/template.h @@ -0,0 +1,184 @@ +/* + This file is part of Konsole, a terminal emulator for KDE. + + Copyright 2018 by Mariusz Glebocki + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. +*/ + +#ifndef TEMPLATE_H +#define TEMPLATE_H + +#include +#include +#include + +// Backward compatibility +#if QT_VERSION < QT_VERSION_CHECK(5, 7, 0) && !defined(qAsConst) +#define qAsConst(code) code +#endif + +// QVariant doesn't offer modification in place. Var does. +class Var { +public: + using Number = qint64; + using String = QString; + using Map = QMap; + using Vector = QVector; + + enum class DataType { + Invalid, + Number, + String, + Vector, + Map, + }; + + const QString dataTypeAsString() const { + switch(dataType()) { + case DataType::Invalid: return QStringLiteral("Invalid"); + case DataType::Number: return QStringLiteral("Number"); + case DataType::String: return QStringLiteral("String"); + case DataType::Vector: return QStringLiteral("Vector"); + case DataType::Map: return QStringLiteral("Map"); + default: return QStringLiteral("Unknown?"); + } + } + + Var(): num(0), _dataType(DataType::Invalid) {} + Var(const Var &other) { *this = other; } + + Var(const Number &newNum): _dataType(DataType::Number) { new(&num) auto(newNum); } + Var(const String &newStr): _dataType(DataType::String) { new(&str) auto(newStr); } + Var(const Vector &newVec): _dataType(DataType::Vector) { new(&vec) auto(newVec); } + Var(const Map &newMap): _dataType(DataType::Map) { new(&map) auto(newMap); } + + // Allow initialization without type name + Var(const char * newStr): _dataType(DataType::String) { new(&str) String(QString::fromUtf8(newStr)); } + Var(std::initializer_list newVec): _dataType(DataType::Vector) { new(&vec) Vector(newVec); } + + ~Var() { + switch(dataType()) { + case DataType::String: str.~String(); break; + case DataType::Vector: vec.~Vector(); break; + case DataType::Map: map.~Map(); break; + default: break; + } + } + + Var & operator=(const Var &other) { + _dataType = other.dataType(); + switch(other.dataType()) { + case DataType::Number: new(&num) auto(other.num); break; + case DataType::String: new(&str) auto(other.str); break; + case DataType::Vector: new(&vec) auto(other.vec); break; + case DataType::Map: new(&map) auto(other.map); break; + default: break; + } + return *this; + } + + Var & operator[](unsigned index) { + Q_ASSERT(_dataType == DataType::Vector); + return vec.data()[index]; + } + const Var & operator[](unsigned index) const { + Q_ASSERT(_dataType == DataType::Vector); + return vec.constData()[index]; + } + Var & operator[](const String &key) { + Q_ASSERT(_dataType == DataType::Map); + return map[key]; + } + const Var & operator[](const String &key) const { + Q_ASSERT(_dataType == DataType::Map); + return *map.find(key); + } + + DataType dataType() const { return _dataType; } + + union { + Number num; + String str; + Vector vec; + Map map; + }; + +private: + DataType _dataType; +}; + +class Template { +public: + Template(const QString &text); + void parse(); + QString generate(const Var &data); + + struct Element { + Element(const Element *parent = nullptr, const QString &name = QString()) + : outer() + , inner() + , name(name) + , fmt() + , line(0) + , column(0) + , isComment(false) + , children() + , parent(parent) {} + + Element(const Element &other) + : outer(other.outer) + , inner(other.inner) + , name(other.name) + , fmt(other.fmt) + , line(other.line) + , column(other.column) + , isComment(other.isComment) + , parent(other.parent) { + for(const auto &child: other.children) { + children.append(child); + } + } + + const QString findFmt(Var::DataType type) const; + QString path() const; + bool isCommand() const { return name.startsWith(QLatin1Char('!')); } + bool hasName() const { return !isCommand() && !name.isEmpty(); } + + static const QString defaultFmt(Var::DataType type); + static bool isValidFmt(const QString &fmt, Var::DataType type); + + QStringRef outer; + QStringRef inner; + QString name; + QString fmt; + uint line; + uint column; + bool isComment; + QList children; + const Element *parent; + }; +private: + + void executeCommand(Element &element, const Element &childStub, const QStringList &argv); + void parseRecursively(Element &element); + int generateRecursively(QString &result, const Element &element, const Var &data, int consumed = 0); + + QString _text; // FIXME: make it pointer (?) + Element _root; // FIXME: make it pointer +}; + +#endif diff --git a/tools/uni2characterwidth/uni2characterwidth.cpp b/tools/uni2characterwidth/uni2characterwidth.cpp new file mode 100644 index 00000000..131df0bc --- /dev/null +++ b/tools/uni2characterwidth/uni2characterwidth.cpp @@ -0,0 +1,1011 @@ +/* + This file is part of Konsole, a terminal emulator for KDE. + + Copyright 2018 by Mariusz Glebocki + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "template.h" + +#include + +// Backward compatibility +#if QT_VERSION < QT_VERSION_CHECK(5, 7, 0) +#define qAsConst(code) code +#endif + + + +static constexpr unsigned int CODE_POINTS_NUM = 0x110000; +static constexpr unsigned int LAST_CODE_POINT = CODE_POINTS_NUM - 1; + +struct UcdEntry { + struct { uint first; uint last; } cp; + QStringList fields; +}; + +class UcdParserBase { +public: + ~UcdParserBase() { + _source->close(); + } + + bool hasNext() { + bool hadNext = _hasNext; + if(!_nextFetched) { + _hasNext = fetchNext(); + _nextFetched = true; + } + return hadNext; + } + +protected: + UcdParserBase(QIODevice *source, UcdEntry *entry) + : _source(source) + , _nextFetched(false) + , _hasNext(true) + , _lineNo(0) + , _entry(entry) + { + Q_ASSERT(_source); + Q_ASSERT(_entry); + } + + bool fetchNext() { + Q_ASSERT(_source->isOpen()); + if(!_source->isOpen()) + return false; + + static const QRegularExpression ENTRY_RE = QRegularExpression(QStringLiteral( + // Match 1: "cp1" - first CP / "cp2" (optional) - last CP + R"#((?:^(?[[:xdigit:]]+)(?:\.\.(?[[:xdigit:]]+))?[ \t]*;)#" + // Match 1: "field0" - first data field" + // "udRangeInd" (UnicodeData.txt only) - if present, the line is either first or last line of a range + R"#([ \t]*(?[^#;\n]*?(?:, (?First|Last)>)?)[ \t]*(?:;|(?:\#.*)?$))|)#" + // Match 2..n: "field" - n-th field + R"#((?:\G(?<=;)[ \t]*(?[^#;\n]*?)[ \t]*(?:;|(?:#.*)?$)))#"), + QRegularExpression::OptimizeOnFirstUsageOption + ); + static const QRegularExpression UD_RANGE_IND_RE(QStringLiteral(", (First|Last)")); + static const QRegularExpression COMMENT_RE(QStringLiteral("^[ \t]*(#.*)?$")); + + QString line; + bool ok; + _entry->fields.clear(); + while(!_source->atEnd()) { + line = QString::fromUtf8(_source->readLine()); + _lineNo++; + auto mit = ENTRY_RE.globalMatch(line); + if(!mit.hasNext()) { + // Do not complain about comments and empty lines + if(!COMMENT_RE.match(line).hasMatch()) + qDebug() << QStringLiteral("Line %1: does not match - skipping").arg(_lineNo); + continue; + } + + auto match = mit.next(); + _entry->cp.first = match.captured(QStringLiteral("cp1")).toUInt(&ok, 16); + if(!ok) { + qDebug() << QStringLiteral("Line %d Invalid cp1 - skipping").arg(_lineNo); + continue; + } + _entry->cp.last = match.captured(QStringLiteral("cp2")).toUInt(&ok, 16); + if(!ok) { + _entry->cp.last = _entry->cp.first; + } + QString field0 = match.captured(QStringLiteral("field0")); + if(field0.isNull()) { + qDebug() << QStringLiteral("Line %d: Missing field0 - skipping").arg(_lineNo); + continue; + } + if(!match.captured(QStringLiteral("udRangeInd")).isNull()) { + if(match.captured(QStringLiteral("udRangeInd")) == QStringLiteral("First")) { + // Fetch next valid line, as it pairs with the current one to form a range + QRegularExpressionMatch nlMatch; + int firstLineNo = _lineNo; + while(!_source->atEnd() && !nlMatch.hasMatch()) { + line = QString::fromUtf8(_source->readLine()); + _lineNo++; + nlMatch = ENTRY_RE.match(line); + if(!nlMatch.hasMatch()) { + qDebug() << QStringLiteral("Line %d: does not match - skipping").arg(_lineNo); + } + } + if(nlMatch.hasMatch()) { + _entry->cp.last = nlMatch.captured(QStringLiteral("cp1")).toUInt(&ok, 16); + if(!ok) { + qDebug() << QStringLiteral("Line %1-%2: Missing or invalid second cp1 (\"Last\" entry) - skipping") + .arg(firstLineNo).arg(_lineNo); + continue; + } + } + } + field0.remove(UD_RANGE_IND_RE); + } + _entry->fields.append(field0); + + while(mit.hasNext()) { + _entry->fields.append(mit.next().captured(QStringLiteral("field"))); + } + + return !_source->atEnd(); + } + return false; + } + + QIODevice *_source; + bool _nextFetched; + bool _hasNext; + +private: + int _lineNo; + UcdEntry *_entry; +}; + +template +class UcdParser: public UcdParserBase { +public: + static_assert(std::is_base_of::value, "'EntryType' has to be derived from UcdParser::Entry"); + + UcdParser(QIODevice *source): UcdParserBase(source, &_typedEntry) {} + + inline const EntryType & next() { + if(!_nextFetched) + fetchNext(); + _nextFetched = false; + return _typedEntry; + } + +private: + EntryType _typedEntry; +}; + +class KIODevice: public QIODevice { +public: + enum Error { + NoError, + UnknownError, + TimeoutError, + UnknownHostError, + MalformedUrlError, + NotFoundError, + }; + + KIODevice(const QUrl &url) + : _url(url) + , _job(nullptr) + , _error(NoError) {} + + ~KIODevice() { + close(); + } + + bool open() { + if(_job) + return false; + + _job = KIO::storedGet(_url); + QObject::connect(_job, &KIO::StoredTransferJob::result, + _job, [&](KJob *) { + if(_job->isErrorPage()) + _eventLoop.exit(KIO::ERR_DOES_NOT_EXIST); + else if(_job->error() != KJob::NoError) + _eventLoop.exit(_job->error()); + else + _data = _job->data(); + + _eventLoop.exit(KJob::NoError); + }); + + _eventLoop.exec(); + switch(_job->error()) { + case KJob::NoError: + _error = NoError; + setErrorString(QStringLiteral("")); + QIODevice::open(QIODevice::ReadOnly | QIODevice::Unbuffered); + break; + case KJob::KilledJobError: _error = TimeoutError; break; + case KIO::ERR_UNKNOWN_HOST: _error = UnknownHostError; break; + case KIO::ERR_DOES_NOT_EXIST: _error = NotFoundError; break; + case KIO::ERR_MALFORMED_URL: _error = MalformedUrlError; break; + default: _error = UnknownError; break; + } + if(_error != NoError) { + setErrorString(QStringLiteral("KIO: ") + _job->errorString()); + delete _job; + _job = nullptr; + _data.clear(); + } + return _error == NoError; + } + bool open(OpenMode mode) override { + Q_ASSERT(mode == QIODevice::ReadOnly); + return open(); + } + void close() override { + if(_job) { + delete _job; + _job = nullptr; + _error = NoError; + setErrorString(QStringLiteral("")); + _data.clear(); + QIODevice::close(); + } + } + + qint64 size() const override { + return _data.size(); + } + + int error() const { return _error; } + void unsetError() { _error = NoError; } + +protected: + qint64 writeData(const char *, qint64) override { return -1; } + qint64 readData(char *data, qint64 maxSize) override { + Q_UNUSED(maxSize); + Q_ASSERT(_job); + Q_ASSERT(_job->error() == NoError); + Q_ASSERT(data != nullptr); + if(maxSize == 0 || pos() >= _data.length()) { + return 0; + } else if(pos() < _data.length()) { + qint64 bytesToCopy = qMin(maxSize, _data.length() - pos()); + memcpy(data, _data.data() + pos(), bytesToCopy); + return bytesToCopy; + } else { + return -1; + } + } + +private: + QUrl _url; + KIO::StoredTransferJob *_job; + Error _error; + QEventLoop _eventLoop; + QByteArray _data; +}; + + + +struct CategoryProperty { + enum Flag: uint32_t { + Invalid = 0, + #define CATEGORY_PROPERTY_VALUE(val, sym, intVal) sym = intVal, + #include "properties.h" + }; + enum Group: uint32_t { + #define CATEGORY_PROPERTY_GROUP(val, sym, intVal) sym = intVal, + #include "properties.h" + }; + + CategoryProperty(uint32_t value = Unassigned): _value(value) {} + CategoryProperty(const QString &string): _value(fromString(string)) {} + operator uint32_t &() { return _value; } + operator const uint32_t &() const { return _value; } + bool isValid() const { return _value != Invalid; } + +private: + static uint32_t fromString(const QString &string) { + static const QMap map = { + #define CATEGORY_PROPERTY_VALUE(val, sym, intVal) { QStringLiteral(#val), sym }, + #include "properties.h" + }; + return map.contains(string) ? map[string] : uint8_t(Invalid); + } + uint32_t _value; +}; + +struct EastAsianWidthProperty { + enum Value: uint8_t { + Invalid = 0x80, + #define EAST_ASIAN_WIDTH_PROPERTY_VALUE(val, sym, intVal) sym = intVal, + #include "properties.h" + }; + + EastAsianWidthProperty(uint8_t value = Neutral): _value(value) {} + EastAsianWidthProperty(const QString &string): _value(fromString(string)) {} + operator uint8_t &() { return _value; } + operator const uint8_t &() const { return _value; } + bool isValid() const { return _value != Invalid; } + +private: + static uint8_t fromString(const QString &string) { + static const QMap map = { + #define EAST_ASIAN_WIDTH_PROPERTY_VALUE(val, sym, intVal) { QStringLiteral(#val), Value::sym }, + #include "properties.h" + }; + return map.contains(string) ? map[string] : Invalid; + } + uint8_t _value; +}; + +struct EmojiProperty { + enum Flag: uint8_t { + Invalid = 0x80, + #define EMOJI_PROPERTY_VALUE(val, sym, intVal) sym = intVal, + #include "properties.h" + }; + + EmojiProperty(uint8_t value = None): _value(value) {} + EmojiProperty(const QString &string): _value(fromString(string)) {} + operator uint8_t &() { return _value; } + operator const uint8_t &() const { return _value; } + bool isValid() const { return !(_value & Invalid); } + +private: + static uint8_t fromString(const QString &string) { + static const QMap map = { + #define EMOJI_PROPERTY_VALUE(val, sym, intVal) { QStringLiteral(#val), sym }, + #include "properties.h" + }; + return map.contains(string) ? map[string] : uint8_t(Invalid); + } + uint8_t _value; +}; + + + +struct CharacterWidth { + enum Width: int8_t { + Invalid = SCHAR_MIN, + _VALID_START = -3, + Ambiguous = -2, + NonPrintable = -1, + // 0 + // 1 + Unassigned = 1, + // 2 + _VALID_END = 3, + }; + + CharacterWidth(const CharacterWidth &other): _width(other._width) {} + CharacterWidth(int8_t width = Invalid): _width(width) {} + CharacterWidth & operator =(const CharacterWidth &other) { _width = other._width; return *this; } + int operator =(const int8_t width) { _width = width; return _width; } + int width() const { return _width; } + operator int() const { return width(); } + + const QString toString() const { + switch(_width) { + case Ambiguous: return QStringLiteral("Ambiguous"); + case NonPrintable: return QStringLiteral("NonPrintable"); + case 0: return QStringLiteral("0"); + case 1: return QStringLiteral("1"); + case 2: return QStringLiteral("2"); + default: + case Invalid: return QStringLiteral("Invalid"); + } + } + + bool isValid() const { return (_width > _VALID_START && _width < _VALID_END); }; + +private: + int8_t _width; +}; + + + +struct CharacterProperties { + CategoryProperty category; + EastAsianWidthProperty eastAsianWidth; + EmojiProperty emoji; + CharacterWidth customWidth; + // For debug purposes in "details" output generator + uint8_t widthFromPropsRule; +}; + + + +struct UnicodeDataEntry: public UcdEntry { + enum FieldId { + NameId = 0, + CategoryId = 1, + }; + CategoryProperty category() const { return CategoryProperty(this->fields.value(CategoryId)); } +}; + +struct EastAsianWidthEntry: public UcdEntry { + enum FieldId { + WidthId = 0, + }; + EastAsianWidthProperty eastAsianWidth() const { return EastAsianWidthProperty(this->fields.value(WidthId)); } +}; + +struct EmojiDataEntry: public UcdEntry { + enum FieldId { + EmojiId = 0, + }; + EmojiProperty emoji() const { return EmojiProperty(this->fields.value(EmojiId)); } +}; + +struct GenericWidthEntry: public UcdEntry { + enum FieldId { + WidthId = 0, + }; + CharacterWidth width() const { + bool ok; + CharacterWidth w = this->fields.value(WidthId).toInt(&ok, 10); + return (ok && w.isValid()) ? w : CharacterWidth::Invalid; + } +}; + +struct WidthsRange { + struct { uint first; uint last; } cp; + CharacterWidth width; +}; + +QVector rangesFromWidths(const QVector &widths, QPair ucsRange = {0, CODE_POINTS_NUM}) { + QVector ranges; + + if(ucsRange.second >= CODE_POINTS_NUM) + ucsRange.second = widths.size() - 1; + + uint first = ucsRange.first; + for(uint cp = first + 1; cp <= uint(ucsRange.second); ++cp) { + if(widths[first] != widths[cp]) { + ranges.append({{first, cp-1}, widths[cp-1]}); + first = cp; + } + } + ranges.append({{first, uint(ucsRange.second)}, widths[ucsRange.second]}); + + return ranges; +} + +// Real ranges look like this (each continuous letter sequence is a range): +// +// D D D D D D D D 8 ranges +// C C C C C C CC C CC 9 ranges +// BBB BBB B B BBB BBBBBB 6 ranges +// A A A A 4 ranges +// ∑: 27 ranges +// +// To reduce total ranges count, the holes in groups can be filled with ranges +// from groups above them: +// +// D D D D D D D D 8 ranges +// CCC C CCCCC CCCCCCC 4 ranges +// BBBBBBB BBBBBBB BBBBBBBBBBBBBBBB 3 ranges +// AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 1 ranges +// ∑: 16 ranges +// +// First range is always without change. Last range (A) can be dropped +// (it always contains everything). Search should be done in order: D, C, B (A). +// For simplicity the funtion returns all ranges, including first and last. +QMap>> mergedRangesFromWidths(const QVector &widths, const QVector widthsSortOrder, + QPair ucsRange = {0, CODE_POINTS_NUM}) { + if(ucsRange.second >= CODE_POINTS_NUM) + ucsRange.second = widths.size() - 1; + QVector ranges = rangesFromWidths(widths, ucsRange); + QMap>> mergedRanges; + + int cmwi; // Currently Merged Width Index + int sri = -1; // Start Range Index (for current width) + int cri; // Currrent Range Index + + // First width ranges are without change. Last one has one range spanning everything, so we can skip this + for(cmwi = 1; cmwi < widthsSortOrder.size() - 1; ++cmwi) { + const CharacterWidth &cmw = widthsSortOrder[cmwi]; // Currently Merged Width + for(cri = 0; cri < ranges.size(); ++cri) { + WidthsRange &cr = ranges[cri]; // Current Range + if(cr.width == cmw) { + // Range is suitable for merge + if(sri < 0) { + // First one, just remember it + sri = cri; + } else { + // Merge + ranges[sri].cp.last = cr.cp.last; + cr.width = CharacterWidth::Invalid; + } + } else { + // Current range has another width - can we continue merging? + if(sri >= 0) { + const int crwi = widthsSortOrder.indexOf(cr.width); // Current Range Width Index + if(!(crwi < cmwi && crwi >= 0)) { + // current range is not above currently merged width - stop merging + sri = -1; + } + } + } + } + } + + for(const auto &range: qAsConst(ranges)) { + if(range.width.isValid() && range.width != widthsSortOrder.last()) + mergedRanges[range.width].append({range.cp.first, range.cp.last}); + } + mergedRanges[widthsSortOrder.last()].append({ucsRange.first, ucsRange.second}); + + return mergedRanges; +} + +namespace generators { + +using GeneratorFunc = bool (*)(QTextStream &, const QVector &, + const QVector &, const QMap &); + +bool code(QTextStream &out, const QVector &props, const QVector &widths, + const QMap &args) { + static constexpr int DIRECT_LUT_SIZE = 256; + + Q_UNUSED(props); + QTextStream eout(stderr, QIODevice::WriteOnly); + + if(args.value(QStringLiteral("param")).isEmpty()) { + eout << QStringLiteral("Template file not specified.") << endl << endl; + return false; + } + QFile templateFile(args.value(QStringLiteral("param"))); + if(!templateFile.open(QIODevice::ReadOnly)) { + eout << QStringLiteral("Could not open file ") << templateFile.fileName() << ": " << templateFile.errorString(); + exit(1); + } + + const QString templateText = QString::fromUtf8(templateFile.readAll()); + templateFile.close(); + + Var::Map data = { + {QStringLiteral("gen-file-warning"), QStringLiteral("THIS IS A GENERATED FILE. DO NOT EDIT.")}, + {QStringLiteral("cmdline"), args.value(QStringLiteral("cmdline"))}, + {QStringLiteral("direct-lut"), Var::Vector(DIRECT_LUT_SIZE)}, + {QStringLiteral("direct-lut-size"), DIRECT_LUT_SIZE}, + {QStringLiteral("ranges-luts"), Var::Vector()}, + {QStringLiteral("ranges-lut-list"), Var::Vector()}, + {QStringLiteral("ranges-lut-list-size"), 0}, + }; + + // Fill direct-lut with widths of 0x00-0xFF + for(unsigned i = 0; i < DIRECT_LUT_SIZE; ++i) { + Q_ASSERT(widths[i].isValid()); + data[QStringLiteral("direct-lut")].vec[i] = int(widths[i]); + } + + static const QVector widthsSortOrder = {CharacterWidth::NonPrintable, 2, CharacterWidth::Ambiguous, 0, 1}; + const QMap>> mergedRanges + = mergedRangesFromWidths(widths, widthsSortOrder, {DIRECT_LUT_SIZE, CODE_POINTS_NUM}); + + // Find last non-empty ranges lut + int lastWidthId = 0; + for(int wi = widthsSortOrder.size() - 1; wi > 0; --wi) { + if(mergedRanges.contains(widthsSortOrder[wi])) { + lastWidthId = wi; + break; + } + } + // Create ranges-luts for all widths except last non-empty one and empty ones + for(int wi = 0; lastWidthId != 0 && wi < lastWidthId; ++wi) { + const CharacterWidth width = widthsSortOrder[wi]; + auto currentMergedRangesIt = mergedRanges.find(width); + if(currentMergedRangesIt == mergedRanges.end() || currentMergedRangesIt.value().isEmpty()) + continue; + const int size = mergedRanges[width].size(); + const QString name = QString(QStringLiteral("LUT_%1")).arg(width.toString().toUpper()); + data[QStringLiteral("ranges-luts")].vec.append(Var::Map { + {QStringLiteral("name"), name}, + {QStringLiteral("ranges"), Var::Vector()}, + {QStringLiteral("size"), size}, + }); + data[QStringLiteral("ranges-lut-list")].vec.append(Var::Map { + {QStringLiteral("width"), int(width)}, + {QStringLiteral("name"), name}, + {QStringLiteral("size"), size}, + }); + auto ¤tLut = data[QStringLiteral("ranges-luts")].vec.last()[QStringLiteral("ranges")].vec; + for(const auto &range: *currentMergedRangesIt) { + Q_ASSERT(range.first <= LAST_CODE_POINT); + Q_ASSERT(range.second <= LAST_CODE_POINT); + currentLut.append(Var(Var::Map {{QStringLiteral("first"), range.first}, {QStringLiteral("last"), range.second}})); + } + } + data[QStringLiteral("ranges-lut-list")].vec.append(Var::Map { + {QStringLiteral("width"), widthsSortOrder[lastWidthId].width()}, + {QStringLiteral("name"), QStringLiteral("nullptr")}, + {QStringLiteral("size"), 1}, + }); + data[QStringLiteral("ranges-lut-list-size")] = mergedRanges.size(); + + Template t(templateText); + t.parse(); + out << t.generate(data); + + return true; +} + +bool list(QTextStream &out, const QVector &props, const QVector &widths, + const QMap &args) { + Q_UNUSED(props); + + out << QStringLiteral("# generated with: ") << args.value(QStringLiteral("cmdline")) << QStringLiteral("\n"); + for(uint cp = 1; cp <= LAST_CODE_POINT; ++cp) { + out << QString::asprintf("%06X ; %2d\n", cp, int(widths[cp])); + } + + return true; +} + +bool ranges(QTextStream &out, const QVector &props, const QVector &widths, + const QMap &args) { + Q_UNUSED(props); + const auto ranges = rangesFromWidths(widths); + + out << QStringLiteral("# generated with: ") << args.value(QStringLiteral("cmdline")) << QStringLiteral("\n"); + for(const WidthsRange &range: ranges) { + if(range.cp.first != range.cp.last) + out << QString::asprintf("%06X..%06X ; %2d\n", range.cp.first, range.cp.last, int(range.width)); + else + out << QString::asprintf("%06X ; %2d\n", range.cp.first, int(range.width)); + } + + return true; +} + +bool compactRanges(QTextStream &out, const QVector &props, const QVector &widths, + const QMap &args) { + Q_UNUSED(props); + static const QVector widthsSortOrder = {CharacterWidth::NonPrintable, 2, CharacterWidth::Ambiguous, 0, 1}; + const auto mergedRanges = mergedRangesFromWidths(widths, widthsSortOrder); + + out << QStringLiteral("# generated with: ") << args.value(QStringLiteral("cmdline")) << QStringLiteral("\n"); + for(const int width: qAsConst(widthsSortOrder)) { + const auto currentMergedRangesIt = mergedRanges.find(width); + if(currentMergedRangesIt == mergedRanges.end() || currentMergedRangesIt.value().isEmpty()) + continue; + for(const auto &range: currentMergedRangesIt.value()) { + if(range.first != range.second) + out << QString::asprintf("%06X..%06X ; %2d\n", range.first, range.second, int(width)); + else + out << QString::asprintf("%06X ; %2d\n", range.first, int(width)); + } + } + + return true; +} + +bool details(QTextStream &out, const QVector &props, const QVector &widths, + const QMap &args) { + out.setFieldAlignment(QTextStream::AlignLeft); + + out << QStringLiteral("# generated with: ") << args.value(QStringLiteral("cmdline")) << QStringLiteral("\n"); + out << QString::asprintf("#%-5s ; %-4s ; %-8s ; %-3s ; %-2s ; %-4s ; %-4s\n", + "CP", "Wdth", "Cat", "EAW", "EM", "CstW", "Rule"); + QMap widthStats; + for(uint cp = 0; cp <= LAST_CODE_POINT; ++cp) { + out << QString::asprintf("%06X ; %4d ; %08X ; %02X ; %02X ; %4d ; %d\n", cp, + int8_t(widths[cp]), uint32_t(props[cp].category), uint8_t(props[cp].eastAsianWidth), + uint8_t(props[cp].emoji), int8_t(props[cp].customWidth), props[cp].widthFromPropsRule); + if(!widthStats.contains(widths[cp])) + widthStats.insert(widths[cp], 0); + widthStats[widths[cp]]++; + } + QMap rangesStats; + const auto ranges = rangesFromWidths(widths); + for(const auto &range: ranges) { + if(!rangesStats.contains(range.width)) + rangesStats.insert(range.width, 0); + rangesStats[range.width]++; + } + out << QStringLiteral("# STATS") << endl; + out << QStringLiteral("#") << endl; + out << QStringLiteral("# Characters count for each width:") << endl; + for(auto wi = widthStats.constBegin(); wi != widthStats.constEnd(); ++wi) { + out << QString::asprintf("# %2d: %7d\n", int(wi.key()), widthStats[wi.key()]); + } + out << QStringLiteral("#") << endl; + out << QStringLiteral("# Ranges count for each width:") << endl; + int howmany = 0; + for(auto wi = rangesStats.constBegin(); wi != rangesStats.constEnd(); ++wi) { + if(howmany >= 20) break; + howmany++; + out << QString::asprintf("# %2d: %7d\n", int(wi.key()), rangesStats[wi.key()]); + } + + return true; +} +} // namespace generators + + + +template +static void processInputFiles(QVector &props, const QStringList &files, const QString &fileTypeName, + void (*cb)(CharacterProperties &prop, const EntryType &entry)) { + static const QRegularExpression PROTOCOL_RE(QStringLiteral(R"#(^[a-z]+://)#"), QRegularExpression::OptimizeOnFirstUsageOption); + for(const QString &fileName: files) { + qInfo().noquote() << QStringLiteral("Parsing as %1: %2").arg(fileTypeName).arg(fileName); + QSharedPointer source = nullptr; + if(PROTOCOL_RE.match(fileName).hasMatch()) { + source.reset(new KIODevice(QUrl(fileName))); + } else { + source.reset(new QFile(fileName)); + } + + if(!source->open(QIODevice::ReadOnly)) { + qCritical() << QStringLiteral("Could not open %1: %2").arg(fileName).arg(source->errorString()); + exit(1); + } + UcdParser p(source.data()); + while(p.hasNext()) { + const auto &e = p.next(); + for(uint cp = e.cp.first; cp <= e.cp.last; ++cp) { + cb(props[cp], e); + } + } + } +} + +static const QString escapeCmdline(const QStringList &args) { + static QString cmdline = QString(); + if(!cmdline.isEmpty()) + return cmdline; + + QTextStream stream(&cmdline, QIODevice::WriteOnly); + + // basename for command name + stream << QFileInfo(args[0]).baseName(); + for(auto it = args.begin() + 1; it != args.end(); ++it) { + if(!it->startsWith(QLatin1Char('-'))) + stream << QStringLiteral(" \"") << QString(*it).replace(QRegularExpression(QStringLiteral(R"(["`$\\])")), QStringLiteral(R"(\\\1)")) << '"'; + else + stream << ' ' << *it; + } + stream.flush(); + return cmdline; +} + +enum ConvertOptions { + AmbiguousWidthOpt = 0, + EmojiOpt = 1, +}; + +// Character width assignment +// +// Rules (from highest to lowest priority): +// +// * Local overlay +// * (not implemented) Character unique properties described in The Unicode Standard, Version 10.0 +// * Unicode category Cc, Cs: -1 +// * Emoji: 2 +// * Unicode category Mn, Me, Cf: 0 +// * East Asian Width W, F: 2 +// * East Asian Width H, N, Na: 1 +// * East Asian Width A: (varies) +// * Unassigned/Undefined/Private Use: 1 +// +// The list is loosely based on character width implementations in Vim 8.1 +// and glibc 2.27. There are a few cases which could look better +// (decomposed Hangul, emoji with modifiers, etc) with different widths, +// but interactive terminal programs (at least vim, zsh, everything based +// on glibc's wcwidth) would see their width as it is implemented now. +static inline CharacterWidth widthFromProps(const CharacterProperties &props, uint cp, const QMap &convertOpts) { + CharacterWidth cw; + auto &widthFromPropsRule = const_cast(props.widthFromPropsRule); + if(props.customWidth.isValid()) { + widthFromPropsRule = 1; + cw = props.customWidth; + + } else if((CategoryProperty::Control | CategoryProperty::Surrogate) & props.category) { + widthFromPropsRule = 2; + cw = CharacterWidth::NonPrintable; + + } else if(convertOpts[EmojiOpt] & props.emoji && !(EmojiProperty::EmojiComponent & props.emoji)) { + widthFromPropsRule = 3; + cw = 2; + + } else if((CategoryProperty::NonspacingMark | CategoryProperty::EnclosingMark | CategoryProperty::Format) & props.category) { + widthFromPropsRule = 4; + cw = 0; + + } else if((EastAsianWidthProperty::Wide | EastAsianWidthProperty::Fullwidth) & props.eastAsianWidth) { + widthFromPropsRule = 5; + cw = 2; + + } else if((EastAsianWidthProperty::Halfwidth | EastAsianWidthProperty::Neutral | EastAsianWidthProperty::Narrow) & props.eastAsianWidth) { + widthFromPropsRule = 6; + cw = 1; + + } else if((CategoryProperty::Unassigned | CategoryProperty::PrivateUse) & props.category) { + widthFromPropsRule = 7; + cw = CharacterWidth::Unassigned; + + } else if((EastAsianWidthProperty::Ambiguous) & props.eastAsianWidth) { + widthFromPropsRule = 8; + cw = convertOpts[AmbiguousWidthOpt]; + + } else if(!props.category.isValid()) { + widthFromPropsRule = 9; + qWarning() << QStringLiteral("Code point U+%1 has invalid category - this should not happen. Assuming \"unassigned\"") + .arg(cp, 4, 16, QLatin1Char('0')); + cw = CharacterWidth::Unassigned; + + } else { + widthFromPropsRule = 10; + qWarning() << QStringLiteral("Code point U+%1 not classified - this should not happen. Assuming non-printable character") + .arg(cp, 4, 16, QLatin1Char('0')); + cw = CharacterWidth::NonPrintable; + } + + return cw; +} + +int main(int argc, char *argv[]) { + static const QMap GENERATOR_FUNCS_MAP = { + {QStringLiteral("code"), generators::code}, + {QStringLiteral("compact-ranges"), generators::compactRanges}, + {QStringLiteral("ranges"), generators::ranges}, + {QStringLiteral("list"), generators::list}, + {QStringLiteral("details"), generators::details}, + {QStringLiteral("dummy"), [](QTextStream &, const QVector &, const QVector &, + const QMap &)->bool {return true;}}, + }; + qSetMessagePattern(QStringLiteral("%{message}")); + + QCoreApplication app(argc, argv); + QCommandLineParser parser; + parser.setApplicationDescription( + QStringLiteral("\nUCD files to characters widths converter.\n") + ); + parser.addHelpOption(); + parser.addOptions({ + {{QStringLiteral("U"), QStringLiteral("unicode-data")}, + QStringLiteral("Path or URL to UnicodeData.txt."), + QStringLiteral("URL|file")}, + {{QStringLiteral("A"), QStringLiteral("east-asian-width")}, + QStringLiteral("Path or URL to EastAsianWidth.txt."), + QStringLiteral("URL|file")}, + {{QStringLiteral("E"), QStringLiteral("emoji-data")}, + QStringLiteral("Path or URL to emoji-data.txt."), + QStringLiteral("URL|file")}, + {{QStringLiteral("W"), QStringLiteral("generic-width")}, + QStringLiteral("Path or URL to generic file with width data. Accepts output from compact-ranges, ranges, list and details generator."), + QStringLiteral("URL|file")}, + + {QStringLiteral("ambiguous-width"), + QStringLiteral("Ambiguous characters width."), + QStringLiteral("separate|1|2"), QString(QStringLiteral("%1")).arg(CharacterWidth::Ambiguous)}, + {QStringLiteral("emoji"), + QStringLiteral("Which emoji emoji subset is treated as emoji."), + QStringLiteral("all|presentation"), QStringLiteral("presentation")}, + + {{QStringLiteral("g"), QStringLiteral("generator")}, + QStringLiteral("Output generator (use \"-\" to list available generators). The code generator requires path to a template file."), + QStringLiteral("generator[:template]"), QStringLiteral("details")}, + }); + parser.addPositionalArgument(QStringLiteral("output"), QStringLiteral("Output file (leave empty for stdout).")); + parser.process(app); + + const QStringList unicodeDataFiles = parser.values(QStringLiteral("unicode-data")); + const QStringList eastAsianWidthFiles = parser.values(QStringLiteral("east-asian-width")); + const QStringList emojiDataFiles = parser.values(QStringLiteral("emoji-data")); + const QStringList genericWidthFiles = parser.values(QStringLiteral("generic-width")); + const QString ambiguousWidthStr = parser.value(QStringLiteral("ambiguous-width")); + const QString emojiStr = parser.value(QStringLiteral("emoji")); + const QString generator = parser.value(QStringLiteral("generator")); + const QString outputFileName = parser.positionalArguments().value(0); + + QTextStream eout(stderr, QIODevice::WriteOnly); + if(unicodeDataFiles.isEmpty() && eastAsianWidthFiles.isEmpty() && emojiDataFiles.isEmpty() && genericWidthFiles.isEmpty()) { + eout << QStringLiteral("Input files not specified.") << endl << endl; + parser.showHelp(1); + } + + static QMap convertOpts = { + {AmbiguousWidthOpt, CharacterWidth::Ambiguous}, + {EmojiOpt, EmojiProperty::EmojiPresentation}, + }; + + if(emojiStr == QStringLiteral("presentation")) + convertOpts[EmojiOpt] = EmojiProperty::EmojiPresentation; + else if(emojiStr == QStringLiteral("all")) + convertOpts[EmojiOpt] = EmojiProperty::Emoji; + else { + convertOpts[EmojiOpt] = EmojiProperty::EmojiPresentation; + qWarning() << QStringLiteral("invalid emoji option value: %1. Assuming \"presentation\".").arg(emojiStr); + } + + if(ambiguousWidthStr == QStringLiteral("separate")) + convertOpts[AmbiguousWidthOpt] = CharacterWidth::Ambiguous; + else if(ambiguousWidthStr == QStringLiteral("1")) + convertOpts[AmbiguousWidthOpt] = 1; + else if(ambiguousWidthStr == QStringLiteral("2")) + convertOpts[AmbiguousWidthOpt] = 2; + else { + convertOpts[AmbiguousWidthOpt] = CharacterWidth::Ambiguous; + qWarning() << QStringLiteral("Invalid ambiguous-width option value: %1. Assuming \"separate\".").arg(emojiStr); + } + + const int sepPos = generator.indexOf(QLatin1Char(':')); + const auto generatorName = generator.left(sepPos); + const auto generatorParam = sepPos >= 0 ? generator.mid(sepPos + 1) : QString(); + + if(!GENERATOR_FUNCS_MAP.contains(generatorName)) { + int status = 0; + if(generatorName != QStringLiteral("-")) { + status = 1; + eout << QStringLiteral("Invalid output generator. Available generators:") << endl; + } + + for(auto it = GENERATOR_FUNCS_MAP.constBegin(); it != GENERATOR_FUNCS_MAP.constEnd(); ++it) { + eout << it.key() << endl; + } + exit(status); + } + auto generatorFunc = GENERATOR_FUNCS_MAP[generatorName]; + + QFile outFile; + if(!outputFileName.isEmpty()) { + outFile.setFileName(outputFileName); + if(!outFile.open(QIODevice::WriteOnly)) { + eout << QStringLiteral("Could not open file ") << outputFileName << QStringLiteral(": ") << outFile.errorString() << endl; + exit(1); + } + } else { + outFile.open(stdout, QIODevice::WriteOnly); + } + QTextStream out(&outFile); + + QVector props(CODE_POINTS_NUM); + + processInputFiles( + props, unicodeDataFiles, QStringLiteral("UnicodeData.txt"), + [](CharacterProperties &prop, const UnicodeDataEntry &entry) { prop.category = entry.category(); }); + + processInputFiles( + props, eastAsianWidthFiles, QStringLiteral("EastAsianWidth.txt"), + [](CharacterProperties &prop, const EastAsianWidthEntry &entry) { prop.eastAsianWidth = entry.eastAsianWidth(); }); + + processInputFiles( + props, emojiDataFiles, QStringLiteral("emoji-data.txt"), + [](CharacterProperties &prop, const EmojiDataEntry &entry) { prop.emoji |= entry.emoji(); }); + + processInputFiles( + props, genericWidthFiles, QStringLiteral("generic width data"), + [](CharacterProperties &prop, const GenericWidthEntry &entry) { prop.customWidth = entry.width(); }); + + qInfo() << "Generating character width data"; + QVector widths(CODE_POINTS_NUM); + widths[0] = 0; // NULL character always has width 0 + for(uint cp = 1; cp <= LAST_CODE_POINT; ++cp) { + widths[cp] = widthFromProps(props[cp], cp, convertOpts); + } + + const QMap generatorArgs = { + {QStringLiteral("cmdline"), escapeCmdline(app.arguments())}, + {QStringLiteral("param"), generatorParam}, + {QStringLiteral("output"), outputFileName.isEmpty() ? QStringLiteral("") : outputFileName}, + }; + + qInfo() << "Generating output"; + if(!generatorFunc(out, props, widths, generatorArgs)) { + parser.showHelp(1); + } + + return 0; +}