diff --git a/autotests/kstringhandlertest.h b/autotests/kstringhandlertest.h --- a/autotests/kstringhandlertest.h +++ b/autotests/kstringhandlertest.h @@ -14,6 +14,8 @@ void obscure(); void preProcessWrap_data(); void preProcessWrap(); + void logicalLength_data(); + void logicalLength(); private: static QString test; diff --git a/autotests/kstringhandlertest.cpp b/autotests/kstringhandlertest.cpp --- a/autotests/kstringhandlertest.cpp +++ b/autotests/kstringhandlertest.cpp @@ -153,3 +153,22 @@ replaceZwsp(expected)); } +void KStringHandlerTest::logicalLength_data() +{ + QTest::addColumn("string"); + QTest::addColumn("expected"); + + QTest::newRow("Latin") << "foo bar baz" << 11; + QTest::newRow("Chinese") << QString::fromUtf8("\xe4\xbd\xa0\xe5\xa5\xbd") << 4; + QTest::newRow("Japanese") << QString::fromUtf8("\xe9\x9d\x92\xe3\x81\x84\xe7\xa9\xba") << 6; + QTest::newRow("Korean") << QString::fromUtf8("\xed\x95\x9c\xea\xb5\xad\xec\x96\xb4") << 6; + QTest::newRow("Mixed") << QString::fromUtf8("KDE\xe6\xa1\x8c\xe9\x9d\xa2") << 7; +} + +void KStringHandlerTest::logicalLength() +{ + QFETCH(QString, string); + QFETCH(int, expected); + QCOMPARE(KStringHandler::logicalLength(string), expected); +} + diff --git a/src/lib/text/kstringhandler.h b/src/lib/text/kstringhandler.h --- a/src/lib/text/kstringhandler.h +++ b/src/lib/text/kstringhandler.h @@ -215,5 +215,15 @@ @since 4.4 */ KCOREADDONS_EXPORT QString preProcessWrap(const QString &text); + +/** + Returns the length that reflects the density of information in the text. In + general the character from CJK languages are assigned with weight 2, while + other Latin characters are assigned with 1. + + @since 5.41 +*/ +KCOREADDONS_EXPORT int logicalLength(const QString &text); + } #endif diff --git a/src/lib/text/kstringhandler.cpp b/src/lib/text/kstringhandler.cpp --- a/src/lib/text/kstringhandler.cpp +++ b/src/lib/text/kstringhandler.cpp @@ -26,6 +26,7 @@ #include // for the word ranges #include #include +#include // // Capitalization routines @@ -339,3 +340,22 @@ return result; } +int KStringHandler::logicalLength(const QString& text) +{ + int length = 0; + auto chrs = text.toUcs4(); + for (auto chr : chrs) { + auto script = QChar::script(chr); + if (script == QChar::Script_Han || + script == QChar::Script_Hangul || + script == QChar::Script_Hiragana || + script == QChar::Script_Katakana || + script == QChar::Script_Yi || + QChar::isHighSurrogate(chr)) { + length += 2; + } else { + length += 1; + } + } + return length; +}