diff --git a/autotests/kstringhandlertest.cpp b/autotests/kstringhandlertest.cpp index d0f6a26..9555a93 100644 --- a/autotests/kstringhandlertest.cpp +++ b/autotests/kstringhandlertest.cpp @@ -1,193 +1,196 @@ #include "kstringhandlertest.h" +#include #include QTEST_MAIN(KStringHandlerTest) #include "kstringhandler.h" QString KStringHandlerTest::test = QStringLiteral("The quick brown fox jumped over the lazy bridge. "); void KStringHandlerTest::capwords() { QCOMPARE(KStringHandler::capwords(test), QStringLiteral("The Quick Brown Fox Jumped Over The Lazy Bridge. ")); } void KStringHandlerTest::tagURLs() { QString test = QStringLiteral("Click on https://foo@bar:www.kde.org/yoyo/dyne.html#a1 for info."); QCOMPARE(KStringHandler::tagUrls(test), QStringLiteral("Click on https://foo@bar:www.kde.org/yoyo/dyne.html#a1 for info.")); test = QStringLiteral("http://www.foo.org/story$806"); QCOMPARE(KStringHandler::tagUrls(test), QStringLiteral("http://www.foo.org/story$806")); #if 0 // XFAIL - i.e. this needs to be fixed, but has never been test = "<a href=www.foo.com>"; check("tagURLs()", KStringHandler::tagURLs(test), "<a href=www.foo.com>"); #endif test = QStringLiteral("http://www.foo.org/bla-(bli)"); QCOMPARE(KStringHandler::tagUrls(test), QStringLiteral("http://www.foo.org/bla-(bli)")); test = QStringLiteral("http://www.foo.org/bla-bli"); QCOMPARE(KStringHandler::tagUrls(test), QStringLiteral("http://www.foo.org/bla-bli")); } void KStringHandlerTest::perlSplit() { QStringList expected; expected << QStringLiteral("some") << QStringLiteral("string") << QStringLiteral("for") << QStringLiteral("you__here"); QCOMPARE(KStringHandler::perlSplit(QStringLiteral("__"), QStringLiteral("some__string__for__you__here"), 4), expected); expected.clear(); expected << QStringLiteral("kparts") << QStringLiteral("reaches") << QStringLiteral("the parts other parts can't"); QCOMPARE(KStringHandler::perlSplit(QLatin1Char(' '), QStringLiteral("kparts reaches the parts other parts can't"), 3), expected); expected.clear(); expected << QStringLiteral("Split") << QStringLiteral("me") << QStringLiteral("up ! I'm bored ! OK ?"); QCOMPARE(KStringHandler::perlSplit(QRegExp(QStringLiteral("[! ]")), QStringLiteral("Split me up ! I'm bored ! OK ?"), 3), expected); + QCOMPARE(KStringHandler::perlSplit(QRegularExpression(QStringLiteral("[! ]")), + QStringLiteral("Split me up ! I'm bored ! OK ?"), 3), expected); } void KStringHandlerTest::obscure() { // See bug 167900, obscure() produced chars that could not properly be converted to and from // UTF8. The result was that storing passwords with '!' in them did not work. QString test = QStringLiteral("!TEST!"); QString obscured = KStringHandler::obscure(test); QByteArray obscuredBytes = obscured.toUtf8(); QCOMPARE(KStringHandler::obscure(QString::fromUtf8(obscuredBytes.constData())), test); } void KStringHandlerTest::preProcessWrap_data() { const QChar zwsp(0x200b); QTest::addColumn("string"); QTest::addColumn("expected"); // Should result in no additional breaks QTest::newRow("spaces") << "foo bar baz" << "foo bar baz"; // Should insert a ZWSP after each '_' QTest::newRow("underscores") << "foo_bar_baz" << QString(QStringLiteral("foo_") + zwsp + QStringLiteral("bar_") + zwsp + QStringLiteral("baz")); // Should insert a ZWSP after each '-' QTest::newRow("hyphens") << "foo-bar-baz" << QString(QStringLiteral("foo-") + zwsp + QStringLiteral("bar-") + zwsp + QStringLiteral("baz")); // Should insert a ZWSP after each '.' QTest::newRow("periods") << "foo.bar.baz" << QString(QStringLiteral("foo.") + zwsp + QStringLiteral("bar.") + zwsp + QStringLiteral("baz")); // Should insert a ZWSP after each ',' QTest::newRow("commas") << "foo,bar,baz" << QString(QStringLiteral("foo,") + zwsp + QStringLiteral("bar,") + zwsp + QStringLiteral("baz")); // Should result in no additional breaks since the '_'s are followed by spaces QTest::newRow("mixed underscores and spaces") << "foo_ bar_ baz" << "foo_ bar_ baz"; // Should result in no additional breaks since the '_' is the last char QTest::newRow("ends with underscore") << "foo_" << "foo_"; // Should insert a ZWSP before '(' and after ')' QTest::newRow("parens") << "foo(bar)baz" << QString(QStringLiteral("foo") + zwsp + QStringLiteral("(bar)") + zwsp + QStringLiteral("baz")); // Should insert a ZWSP before '[' and after ']' QTest::newRow("brackets") << "foo[bar]baz" << QString(QStringLiteral("foo") + zwsp + QStringLiteral("[bar]") + zwsp + QStringLiteral("baz")); // Should insert a ZWSP before '{' and after '}' QTest::newRow("curly braces") << "foo{bar}baz" << QString(QStringLiteral("foo") + zwsp + QStringLiteral("{bar}") + zwsp + QStringLiteral("baz")); // Should insert a ZWSP before '(' but not after ')' since it's the last char QTest::newRow("ends with ')'") << "foo(bar)" << QString(QStringLiteral("foo") + zwsp + QStringLiteral("(bar)")); // Should insert a single ZWSP between the '_' and the '(' QTest::newRow("'_' followed by '('") << "foo_(bar)" << QString(QStringLiteral("foo_") + zwsp + QStringLiteral("(bar)")); // Should insert ZWSP's between the '_' and the '[', between the double // '['s and the double ']'s, but not before and after 'bar' QTest::newRow("'_' before double brackets") << "foo_[[bar]]" << QString(QStringLiteral("foo_") + zwsp + QStringLiteral("[") + zwsp + QStringLiteral("[bar]") + zwsp + QStringLiteral("]")); // Should only insert ZWSP's between the double '['s and the double ']'s QTest::newRow("space before double brackets") << "foo [[bar]]" << QString(QStringLiteral("foo [") + zwsp + QStringLiteral("[bar]") + zwsp + QStringLiteral("]")); // Shouldn't result in any additional breaks since the '(' is preceded // by a space, and the ')' is followed by a space. QTest::newRow("parens with spaces") << "foo (bar) baz" << "foo (bar) baz"; // Should insert a WJ (Word Joiner) before a single quote const QChar wj(0x2060); QTest::newRow("single quote") << "foo'bar" << QString(QStringLiteral("foo") + QString(wj) + QStringLiteral("'bar")); } static QString replaceZwsp(const QString &string) { const QChar zwsp(0x200b); QString result; for (int i = 0; i < string.length(); i++) if (string[i] == zwsp) { result += QStringLiteral(""); } else { result += string[i]; } return result; } void KStringHandlerTest::preProcessWrap() { QFETCH(QString, string); QFETCH(QString, expected); QCOMPARE(replaceZwsp(KStringHandler::preProcessWrap(string)), replaceZwsp(expected)); } void KStringHandlerTest::logicalLength_data() { QTest::addColumn("string"); QTest::addColumn("expected"); QTest::newRow("Latin") << "foo bar baz" << 11; QTest::newRow("Chinese") << QString::fromUtf8("\xe4\xbd\xa0\xe5\xa5\xbd") << 4; QTest::newRow("Japanese") << QString::fromUtf8("\xe9\x9d\x92\xe3\x81\x84\xe7\xa9\xba") << 6; QTest::newRow("Korean") << QString::fromUtf8("\xed\x95\x9c\xea\xb5\xad\xec\x96\xb4") << 6; QTest::newRow("Mixed") << QString::fromUtf8("KDE\xe6\xa1\x8c\xe9\x9d\xa2") << 7; } void KStringHandlerTest::logicalLength() { QFETCH(QString, string); QFETCH(int, expected); QCOMPARE(KStringHandler::logicalLength(string), expected); } diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index cf9b94c..aa12af4 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -1,332 +1,332 @@ # Configure checks for the caching subdir include(CheckIncludeFiles) check_include_files("sys/types.h;sys/mman.h" HAVE_SYS_MMAN_H) configure_file(caching/config-caching.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/config-caching.h) include(CheckSymbolExists) check_symbol_exists("getgrouplist" "grp.h" HAVE_GETGROUPLIST) configure_file(util/config-getgrouplist.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/config-getgrouplist.h) set (KDE4_DEFAULT_HOME ".kde${_KDE4_DEFAULT_HOME_POSTFIX}" CACHE STRING "The default KDE home directory" ) configure_file(util/config-kde4home.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/config-kde4home.h) set (ACCOUNTS_SERVICE_ICON_DIR "/var/lib/AccountsService/icons" CACHE STRING "Accounts Services icon storage directory") configure_file(util/config-accountsservice.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/config-accountsservice.h) ecm_create_qm_loader(kcoreaddons_QM_LOADER kcoreaddons5_qt) set(kcoreaddons_OPTIONAL_SRCS ) set(kcoreaddons_OPTIONAL_LIBS ) if (FAM_FOUND) include_directories(${FAM_INCLUDE_DIR}) set(kcoreaddons_OPTIONAL_LIBS ${kcoreaddons_OPTIONAL_LIBS} ${FAM_LIBRARIES}) endif () if (Inotify_FOUND) include_directories(${Inotify_INCLUDE_DIRS}) set(kcoreaddons_OPTIONAL_LIBS ${kcoreaddons_OPTIONAL_LIBS} ${Inotify_LIBRARIES}) endif () if(NOT WIN32) set(kcoreaddons_OPTIONAL_SRCS caching/kshareddatacache.cpp) set(kcoreaddons_OPTIONAL_LIBS ${kcoreaddons_OPTIONAL_LIBS} ${CMAKE_THREAD_LIBS_INIT}) set_source_files_properties(caching/kshareddatacache.cpp PROPERTIES COMPILE_FLAGS -fexceptions) else() set(kcoreaddons_OPTIONAL_SRCS caching/kshareddatacache_win.cpp ) endif() if (WIN32) set(kcoreaddons_OPTIONAL_SRCS ${kcoreaddons_OPTIONAL_SRCS} text/kmacroexpander_win.cpp util/klistopenfilesjob_win.cpp util/kprocesslist_win.cpp util/kshell_win.cpp util/kuser_win.cpp ) endif () if (UNIX) set(kcoreaddons_OPTIONAL_SRCS ${kcoreaddons_OPTIONAL_SRCS} text/kmacroexpander_unix.cpp util/klistopenfilesjob_unix.cpp util/kuser_unix.cpp util/kshell_unix.cpp ) if (HAVE_PROCSTAT) set(kcoreaddons_OPTIONAL_SRCS ${kcoreaddons_OPTIONAL_SRCS} util/kprocesslist_unix_procstat.cpp ) else () set(kcoreaddons_OPTIONAL_SRCS ${kcoreaddons_OPTIONAL_SRCS} util/kprocesslist_unix.cpp ) endif () endif () set(libkcoreaddons_SRCS kaboutdata.cpp kcoreaddons.cpp io/kautosavefile.cpp io/kdirwatch.cpp io/kfilesystemtype.cpp io/kmessage.cpp io/kprocess.cpp io/kbackup.cpp io/kurlmimedata.cpp io/kfileutils.cpp jobs/kcompositejob.cpp jobs/kjob.cpp jobs/kjobtrackerinterface.cpp jobs/kjobuidelegate.cpp plugin/kpluginfactory.cpp plugin/kpluginloader.cpp plugin/kpluginmetadata.cpp plugin/desktopfileparser.cpp randomness/krandom.cpp randomness/krandomsequence.cpp text/kmacroexpander.cpp text/kstringhandler.cpp text/ktexttohtml.cpp util/kdelibs4migration.cpp util/kdelibs4configmigrator.cpp util/kformat.cpp util/kformatprivate.cpp util/kosrelease.cpp util/kprocesslist.cpp util/kshell.cpp ${kcoreaddons_OPTIONAL_SRCS} ${kcoreaddons_QM_LOADER} ) set(kcoreaddons_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/../.. # for kcoreaddons_version.h ${CMAKE_CURRENT_SOURCE_DIR}/caching/ ${CMAKE_CURRENT_BINARY_DIR}/io/ ${CMAKE_CURRENT_SOURCE_DIR}/io/ ${CMAKE_CURRENT_SOURCE_DIR}/jobs/ ${CMAKE_CURRENT_SOURCE_DIR}/plugin/ ${CMAKE_CURRENT_SOURCE_DIR}/randomness/ ${CMAKE_CURRENT_SOURCE_DIR}/text/ ${CMAKE_CURRENT_SOURCE_DIR}/util/ ) if (HAVE_PROCSTAT) set(kcoreaddons_INCLUDE_DIRS ${kcoreaddons_INCLUDE_DIRS} ${PROCSTAT_INCLUDE_DIR} ) endif() ecm_qt_declare_logging_category(libkcoreaddons_SRCS HEADER kcoreaddons_debug.h IDENTIFIER KCOREADDONS_DEBUG CATEGORY_NAME org.kde.kcoreaddons) add_library(KF5CoreAddons ${libkcoreaddons_SRCS}) add_library(KF5::CoreAddons ALIAS KF5CoreAddons) ecm_generate_export_header(KF5CoreAddons BASE_NAME KCoreAddons GROUP_BASE_NAME KF VERSION ${KF5_VERSION} DEPRECATED_BASE_VERSION 0 - DEPRECATION_VERSIONS 4.0 5.0 5.2 5.65 + DEPRECATION_VERSIONS 4.0 5.0 5.2 5.65 5.67 EXCLUDE_DEPRECATED_BEFORE_AND_AT ${EXCLUDE_DEPRECATED_BEFORE_AND_AT} ) target_include_directories(KF5CoreAddons PUBLIC "$") target_link_libraries(KF5CoreAddons PUBLIC Qt5::Core PRIVATE ${kcoreaddons_OPTIONAL_LIBS} ) target_link_libraries(KF5CoreAddons PRIVATE ${CMAKE_THREAD_LIBS_INIT}) if(WIN32) target_link_libraries(KF5CoreAddons PRIVATE netapi32 userenv) endif() if(HAVE_PROCSTAT) target_link_libraries(KF5CoreAddons PRIVATE ${PROCSTAT_LIBRARIES}) endif() target_include_directories(KF5CoreAddons INTERFACE "$" ) target_compile_definitions(KF5CoreAddons INTERFACE "$") set_target_properties(KF5CoreAddons PROPERTIES VERSION ${KCOREADDONS_VERSION_STRING} SOVERSION ${KCOREADDONS_SOVERSION} EXPORT_NAME CoreAddons ) ecm_generate_headers(KCoreAddons_HEADERS HEADER_NAMES KAboutData KCoreAddons REQUIRED_HEADERS KCoreAddons_HEADERS ) ecm_generate_headers(KCoreAddons_HEADERS HEADER_NAMES KSharedDataCache RELATIVE caching REQUIRED_HEADERS KCoreAddons_HEADERS ) ecm_generate_headers(KCoreAddons_HEADERS HEADER_NAMES KAutoSaveFile KDirWatch KMessage KProcess KBackup KUrlMimeData KFileSystemType KFileUtils RELATIVE io REQUIRED_HEADERS KCoreAddons_HEADERS ) ecm_generate_headers(KCoreAddons_HEADERS HEADER_NAMES KCompositeJob KJob KJobTrackerInterface KJobUiDelegate RELATIVE jobs REQUIRED_HEADERS KCoreAddons_HEADERS ) ecm_generate_headers(KCoreAddons_HEADERS HEADER_NAMES KExportPlugin KPluginFactory KPluginLoader KPluginMetaData RELATIVE plugin REQUIRED_HEADERS KCoreAddons_HEADERS ) ecm_generate_headers(KCoreAddons_HEADERS HEADER_NAMES KRandom KRandomSequence RELATIVE randomness REQUIRED_HEADERS KCoreAddons_HEADERS ) ecm_generate_headers(KCoreAddons_HEADERS HEADER_NAMES KMacroExpander KStringHandler KTextToHTML KTextToHTMLEmoticonsInterface RELATIVE text REQUIRED_HEADERS KCoreAddons_HEADERS ) ecm_generate_headers(KCoreAddons_HEADERS HEADER_NAMES KFormat KOSRelease KUser KShell KProcessList KListOpenFilesJob Kdelibs4Migration Kdelibs4ConfigMigrator RELATIVE util REQUIRED_HEADERS KCoreAddons_HEADERS ) find_package(PythonModuleGeneration) if (PythonModuleGeneration_FOUND) ecm_generate_python_binding( TARGET KF5::CoreAddons PYTHONNAMESPACE PyKF5 MODULENAME KCoreAddons RULES_FILE "${CMAKE_SOURCE_DIR}/cmake/rules_PyKF5.py" SIP_DEPENDS QtCore/QtCoremod.sip HEADERS kaboutdata.h kcoreaddons.h caching/kshareddatacache.h io/kautosavefile.h io/kdirwatch.h io/kmessage.h io/kprocess.h io/kbackup.h io/kurlmimedata.h io/kfilesystemtype.h jobs/kcompositejob.h jobs/kjob.h jobs/kjobtrackerinterface.h jobs/kjobuidelegate.h plugin/kexportplugin.h plugin/kpluginfactory.h plugin/kpluginloader.h plugin/kpluginmetadata.h randomness/krandom.h randomness/krandomsequence.h text/kmacroexpander.h text/kstringhandler.h text/ktexttohtml.h text/ktexttohtmlemoticonsinterface.h util/kformat.h util/klistopenfilesjob.h util/kosrelease.h util/kprocesslist.h util/kuser.h util/kshell.h util/kdelibs4migration.h util/kdelibs4configmigrator.h ) endif() install(TARGETS KF5CoreAddons EXPORT KF5CoreAddonsTargets ${KF5_INSTALL_TARGETS_DEFAULT_ARGS}) install(FILES ${KCoreAddons_HEADERS} ${CMAKE_CURRENT_BINARY_DIR}/kcoreaddons_export.h DESTINATION ${KDE_INSTALL_INCLUDEDIR_KF5}/KCoreAddons COMPONENT Devel ) # Includes CMake code to install open-source license texts for KAboutData. add_subdirectory(licenses) if(BUILD_QCH) ecm_add_qch( KF5CoreAddons_QCH NAME KCoreAddons BASE_NAME KF5CoreAddons VERSION ${KF5_VERSION} ORG_DOMAIN org.kde SOURCES # using only public headers, to cover only public API ${KCoreAddons_HEADERS} MD_MAINPAGE "${CMAKE_SOURCE_DIR}/README.md" LINK_QCHS Qt5Core_QCH INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR} ${kcoreaddons_INCLUDE_DIRS} BLANK_MACROS KCOREADDONS_EXPORT KCOREADDONS_DEPRECATED KCOREADDONS_DEPRECATED_EXPORT "KCOREADDONS_DEPRECATED_VERSION(x, y, t)" TAGFILE_INSTALL_DESTINATION ${KDE_INSTALL_QTQCHDIR} QCH_INSTALL_DESTINATION ${KDE_INSTALL_QTQCHDIR} COMPONENT Devel ) endif() include(ECMGeneratePriFile) ecm_generate_pri_file(BASE_NAME KCoreAddons LIB_NAME KF5CoreAddons DEPS "core" FILENAME_VAR PRI_FILENAME INCLUDE_INSTALL_DIR ${KDE_INSTALL_INCLUDEDIR_KF5}/KCoreAddons) install(FILES ${PRI_FILENAME} DESTINATION ${ECM_MKSPECS_INSTALL_DIR}) diff --git a/src/lib/text/kstringhandler.cpp b/src/lib/text/kstringhandler.cpp index a27892d..3641df9 100644 --- a/src/lib/text/kstringhandler.cpp +++ b/src/lib/text/kstringhandler.cpp @@ -1,350 +1,390 @@ /* This file is part of the KDE libraries Copyright (C) 1999 Ian Zepp (icszepp@islc.net) Copyright (C) 2006 by Dominic Battre Copyright (C) 2006 by Martin Pool This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "kstringhandler.h" #include // random() #include // for the word ranges #include #include #include #include // // Capitalization routines // QString KStringHandler::capwords(const QString &text) { if (text.isEmpty()) { return text; } const QString strippedText = text.trimmed(); const QString space = QString(QLatin1Char(' ')); const QStringList words = capwords(strippedText.split(space)); QString result = text; result.replace(strippedText, words.join(space)); return result; } QStringList KStringHandler::capwords(const QStringList &list) { QStringList tmp = list; for (QStringList::Iterator it = tmp.begin(); it != tmp.end(); ++it) { *it = (*it)[ 0 ].toUpper() + (*it).midRef(1); } return tmp; } QString KStringHandler::lsqueeze(const QString &str, int maxlen) { if (str.length() > maxlen) { int part = maxlen - 3; return QLatin1String("...") + str.rightRef(part); } else { return str; } } QString KStringHandler::csqueeze(const QString &str, int maxlen) { if (str.length() > maxlen && maxlen > 3) { const int part = (maxlen - 3) / 2; return str.leftRef(part) + QLatin1String("...") + str.rightRef(part); } else { return str; } } QString KStringHandler::rsqueeze(const QString &str, int maxlen) { if (str.length() > maxlen) { int part = maxlen - 3; return str.leftRef(part) + QLatin1String("..."); } else { return str; } } QStringList KStringHandler::perlSplit(const QString &sep, const QString &s, int max) { bool ignoreMax = 0 == max; QStringList l; int searchStart = 0; int tokenStart = s.indexOf(sep, searchStart); while (-1 != tokenStart && (ignoreMax || l.count() < max - 1)) { if (!s.midRef(searchStart, tokenStart - searchStart).isEmpty()) { l << s.mid(searchStart, tokenStart - searchStart); } searchStart = tokenStart + sep.length(); tokenStart = s.indexOf(sep, searchStart); } if (!s.midRef(searchStart, s.length() - searchStart).isEmpty()) { l << s.mid(searchStart, s.length() - searchStart); } return l; } QStringList KStringHandler::perlSplit(const QChar &sep, const QString &s, int max) { bool ignoreMax = 0 == max; QStringList l; int searchStart = 0; int tokenStart = s.indexOf(sep, searchStart); while (-1 != tokenStart && (ignoreMax || l.count() < max - 1)) { if (!s.midRef(searchStart, tokenStart - searchStart).isEmpty()) { l << s.mid(searchStart, tokenStart - searchStart); } searchStart = tokenStart + 1; tokenStart = s.indexOf(sep, searchStart); } if (!s.midRef(searchStart, s.length() - searchStart).isEmpty()) { l << s.mid(searchStart, s.length() - searchStart); } return l; } +#if KCOREADDONS_BUILD_DEPRECATED_SINCE(5, 67) QStringList KStringHandler::perlSplit(const QRegExp &sep, const QString &s, int max) { + // nothing to split + if (s.isEmpty()) { + return QStringList(); + } + bool ignoreMax = 0 == max; QStringList l; int searchStart = 0; int tokenStart = sep.indexIn(s, searchStart); int len = sep.matchedLength(); while (-1 != tokenStart && (ignoreMax || l.count() < max - 1)) { if (!s.midRef(searchStart, tokenStart - searchStart).isEmpty()) { l << s.mid(searchStart, tokenStart - searchStart); } searchStart = tokenStart + len; tokenStart = sep.indexIn(s, searchStart); len = sep.matchedLength(); } if (!s.midRef(searchStart, s.length() - searchStart).isEmpty()) { l << s.mid(searchStart, s.length() - searchStart); } return l; } +#endif + +QStringList KStringHandler::perlSplit(const QRegularExpression &sep, const QString &s, int max) +{ + // nothing to split + if (s.isEmpty()) { + return QStringList(); + } + + bool ignoreMax = max == 0; + + QStringList list; + + int start = 0; + QRegularExpressionMatchIterator iter = sep.globalMatch(s); + QRegularExpressionMatch match; + QString chunk; + while (iter.hasNext() && (ignoreMax || list.count() < max - 1)) { + match = iter.next(); + chunk = s.mid(start, match.capturedStart() - start); + if (!chunk.isEmpty()) { + list.append(chunk); + } + start = match.capturedEnd(); + } + + // catch the remainder + chunk = s.mid(start, s.size() - start); + if (!chunk.isEmpty()) { + list.append(chunk); + } + + return list; +} QString KStringHandler::tagUrls(const QString &text) { QString richText(text); const QRegularExpression urlEx(QStringLiteral("(www\\.(?!\\.)|(fish|ftp|http|https)://[\\d\\w\\./,:_~\\?=&;#@\\-\\+\\%\\$\\(\\)]+)")); // the reference \1 is going to be replaced by the matched url const QLatin1String regexBackRef(QLatin1String("\\1")); const QString anchor = QLatin1String("") + regexBackRef + QLatin1String(""); richText.replace(urlEx, anchor); return richText; } QString KStringHandler::obscure(const QString &str) { QString result; const QChar *unicode = str.unicode(); for (int i = 0; i < str.length(); ++i) // yes, no typo. can't encode ' ' or '!' because // they're the unicode BOM. stupid scrambling. stupid. result += (unicode[ i ].unicode() <= 0x21) ? unicode[ i ] : QChar(0x1001F - unicode[ i ].unicode()); return result; } bool KStringHandler::isUtf8(const char *buf) { int i, n; unsigned char c; bool gotone = false; if (!buf) { return true; // whatever, just don't crash } #define F 0 /* character never appears in text */ #define T 1 /* character appears in plain ASCII text */ #define I 2 /* character appears in ISO-8859 text */ #define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */ static const unsigned char text_chars[256] = { /* BEL BS HT LF FF CR */ F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */ /* ESC */ F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */ /* NEL */ X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */ }; /* *ulen = 0; */ for (i = 0; (c = buf[i]); ++i) { if ((c & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */ /* * Even if the whole file is valid UTF-8 sequences, * still reject it if it uses weird control characters. */ if (text_chars[c] != T) { return false; } } else if ((c & 0x40) == 0) { /* 10xxxxxx never 1st byte */ return false; } else { /* 11xxxxxx begins UTF-8 */ int following; if ((c & 0x20) == 0) { /* 110xxxxx */ following = 1; } else if ((c & 0x10) == 0) { /* 1110xxxx */ following = 2; } else if ((c & 0x08) == 0) { /* 11110xxx */ following = 3; } else if ((c & 0x04) == 0) { /* 111110xx */ following = 4; } else if ((c & 0x02) == 0) { /* 1111110x */ following = 5; } else { return false; } for (n = 0; n < following; ++n) { i++; if (!(c = buf[i])) { goto done; } if ((c & 0x80) == 0 || (c & 0x40)) { return false; } } gotone = true; } } done: return gotone; /* don't claim it's UTF-8 if it's all 7-bit */ } #undef F #undef T #undef I #undef X QString KStringHandler::from8Bit(const char *str) { if (!str) { return QString(); } if (!*str) { static const QLatin1String emptyString(""); return emptyString; } return KStringHandler::isUtf8(str) ? QString::fromUtf8(str) : QString::fromLocal8Bit(str); } QString KStringHandler::preProcessWrap(const QString &text) { const QChar zwsp(0x200b); QString result; result.reserve(text.length()); for (int i = 0; i < text.length(); i++) { const QChar c = text[i]; bool openingParens = (c == QLatin1Char('(') || c == QLatin1Char('{') || c == QLatin1Char('[')); bool singleQuote = (c == QLatin1Char('\'')); bool closingParens = (c == QLatin1Char(')') || c == QLatin1Char('}') || c == QLatin1Char(']')); bool breakAfter = (closingParens || c.isPunct() || c.isSymbol()); bool nextIsSpace = (i == (text.length() - 1) || text[i + 1].isSpace()); bool prevIsSpace = (i == 0 || text[i - 1].isSpace() || result[result.length() - 1] == zwsp); // Provide a breaking opportunity before opening parenthesis if (openingParens && !prevIsSpace) { result += zwsp; } // Provide a word joiner before the single quote if (singleQuote && !prevIsSpace) { result += QChar(0x2060); } result += c; if (breakAfter && !openingParens && !nextIsSpace && !singleQuote) { result += zwsp; } } return result; } int KStringHandler::logicalLength(const QString& text) { int length = 0; auto chrs = text.toUcs4(); for (auto chr : chrs) { auto script = QChar::script(chr); if (script == QChar::Script_Han || script == QChar::Script_Hangul || script == QChar::Script_Hiragana || script == QChar::Script_Katakana || script == QChar::Script_Yi || QChar::isHighSurrogate(chr)) { length += 2; } else { length += 1; } } return length; } diff --git a/src/lib/text/kstringhandler.h b/src/lib/text/kstringhandler.h index b428ac6..6647f40 100644 --- a/src/lib/text/kstringhandler.h +++ b/src/lib/text/kstringhandler.h @@ -1,229 +1,258 @@ /* This file is part of the KDE libraries Copyright (C) 1999 Ian Zepp (icszepp@islc.net) Copyright (C) 2000 Rik Hemsley (rikkus) Copyright (C) 2006 by Dominic Battre Copyright (C) 2006 by Martin Pool This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License version 2 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef KSTRINGHANDLER_H #define KSTRINGHANDLER_H #include #include class QChar; class QRegExp; +class QRegularExpression; class QString; class QStringList; /** * This namespace contains utility functions for handling strings. * * The functions here are intended to provide an easy way to * cut/slice/splice words inside sentences in whatever order desired. * While the main focus of KStringHandler is words (ie characters * separated by spaces/tabs), the two core functions here (split() * and join()) will allow you to use any character as a separator * This will make it easy to redefine what a 'word' means in the * future if needed. * * The function names and calling styles are based on python and mIRC's * scripting support. * * The ranges are a fairly powerful way of getting/stripping words from * a string. These ranges function, for the large part, as they would in * python. See the word(const QString&, int) and remword(const QString&, int) * functions for more detail. * * The methods here are completely stateless. All strings are cut * on the fly and returned as new qstrings/qstringlists. * * @short Namespace for manipulating words and sentences in strings * @author Ian Zepp * @see KShell */ namespace KStringHandler { /** Capitalizes each word in the string * "hello there" becomes "Hello There" (string) * @param text the text to capitalize * @return the resulting string */ KCOREADDONS_EXPORT QString capwords(const QString &text); /** Capitalizes each word in the list * [hello, there] becomes [Hello, There] (list) * @param list the list to capitalize * @return the resulting list */ KCOREADDONS_EXPORT QStringList capwords(const QStringList &list); /** Substitute characters at the beginning of a string by "...". * @param str is the string to modify * @param maxlen is the maximum length the modified string will have * If the original string is shorter than "maxlen", it is returned verbatim * @return the modified string */ KCOREADDONS_EXPORT QString lsqueeze(const QString &str, int maxlen = 40); /** Substitute characters at the middle of a string by "...". * @param str is the string to modify * @param maxlen is the maximum length the modified string will have * If the original string is shorter than "maxlen", it is returned verbatim * @return the modified string */ KCOREADDONS_EXPORT QString csqueeze(const QString &str, int maxlen = 40); /** Substitute characters at the end of a string by "...". * @param str is the string to modify * @param maxlen is the maximum length the modified string will have * If the original string is shorter than "maxlen", it is returned verbatim * @return the modified string */ KCOREADDONS_EXPORT QString rsqueeze(const QString &str, int maxlen = 40); /** * Split a QString into a QStringList in a similar fashion to the static * QStringList function in Qt, except you can specify a maximum number * of tokens. If max is specified (!= 0) then only that number of tokens * will be extracted. The final token will be the remainder of the string. * * Example: * \code * perlSplit("__", "some__string__for__you__here", 4) * QStringList contains: "some", "string", "for", "you__here" * \endcode * * @param sep is the string to use to delimit s. * @param s is the input string * @param max is the maximum number of extractions to perform, or 0. * @return A QStringList containing tokens extracted from s. */ KCOREADDONS_EXPORT QStringList perlSplit(const QString &sep, const QString &s, int max = 0); /** * Split a QString into a QStringList in a similar fashion to the static * QStringList function in Qt, except you can specify a maximum number * of tokens. If max is specified (!= 0) then only that number of tokens * will be extracted. The final token will be the remainder of the string. * * Example: * \code * perlSplit(' ', "kparts reaches the parts other parts can't", 3) * QStringList contains: "kparts", "reaches", "the parts other parts can't" * \endcode * * @param sep is the character to use to delimit s. * @param s is the input string * @param max is the maximum number of extractions to perform, or 0. * @return A QStringList containing tokens extracted from s. */ KCOREADDONS_EXPORT QStringList perlSplit(const QChar &sep, const QString &s, int max = 0); +#if KCOREADDONS_ENABLE_DEPRECATED_SINCE(5, 67) /** * Split a QString into a QStringList in a similar fashion to the static * QStringList function in Qt, except you can specify a maximum number * of tokens. If max is specified (!= 0) then only that number of tokens * will be extracted. The final token will be the remainder of the string. * * Example: * \code * perlSplit(QRegExp("[! ]"), "Split me up ! I'm bored ! OK ?", 3) * QStringList contains: "Split", "me", "up ! I'm bored ! OK ?" * \endcode * * @param sep is the regular expression to use to delimit s. * @param s is the input string * @param max is the maximum number of extractions to perform, or 0. * @return A QStringList containing tokens extracted from s. + * + * @deprecated Since 5.67, use perlSplit(const QRegularExpression &sep, + * const QString &s, int max = 0) instead. */ +KCOREADDONS_DEPRECATED_VERSION(5, 67, "Use KStringHandler::perlSplit(const QRegularExpression &, const QString &, int)") KCOREADDONS_EXPORT QStringList perlSplit(const QRegExp &sep, const QString &s, int max = 0); +#endif + +/** + * Split a QString into a QStringList in a similar fashion to the static + * QStringList function in Qt, except you can specify a maximum number + * of tokens. If max is specified (!= 0) then only that number of tokens + * will be extracted. The final token will be the remainder of the string. + * + * Example: + * \code + * perlSplit(QRegularExpression("[! ]"), "Split me up ! I'm bored ! OK ?", 3) + * QStringList contains: "Split", "me", "up ! I'm bored ! OK ?" + * \endcode + * + * @param sep is the regular expression to use to delimit s. + * @param s is the input string + * @param max is the maximum number of extractions to perform, or 0. + * @return A QStringList containing tokens extracted from s. + * + * @since 5.67 + */ +KCOREADDONS_EXPORT QStringList perlSplit(const QRegularExpression &sep, + const QString &s, int max = 0); /** * This method auto-detects URLs in strings, and adds HTML markup to them * so that richtext or HTML-enabled widgets will display the URL correctly. * @param text the string which may contain URLs * @return the resulting text */ KCOREADDONS_EXPORT QString tagUrls(const QString &text); /** Obscure string by using a simple symmetric encryption. Applying the function to a string obscured by this function will result in the original string. The function can be used to obscure passwords stored to configuration files. Note that this won't give you any more security than preventing that the password is directly copied and pasted. @param str string to be obscured @return obscured string */ KCOREADDONS_EXPORT QString obscure(const QString &str); /** Guess whether a string is UTF8 encoded. @param str the string to check @return true if UTF8. If false, the string is probably in Local8Bit. */ KCOREADDONS_EXPORT bool isUtf8(const char *str); /** Construct QString from a c string, guessing whether it is UTF8- or Local8Bit-encoded. @param str the input string @return the (hopefully correctly guessed) QString representation of @p str @see KEncodingProber */ KCOREADDONS_EXPORT QString from8Bit(const char *str); /** Preprocesses the given string in order to provide additional line breaking opportunities for QTextLayout. This is done by inserting ZWSP (Zero-width space) characters in the string at points that wouldn't normally be considered word boundaries by QTextLayout, but where wrapping the text will produce good results. Examples of such points includes after punctuation signs, underscores and dashes, that aren't followed by spaces. @since 4.4 */ KCOREADDONS_EXPORT QString preProcessWrap(const QString &text); /** Returns the length that reflects the density of information in the text. In general the character from CJK languages are assigned with weight 2, while other Latin characters are assigned with 1. @since 5.41 */ KCOREADDONS_EXPORT int logicalLength(const QString &text); } #endif