diff --git a/src/ioslaves/file/CMakeLists.txt b/src/ioslaves/file/CMakeLists.txt --- a/src/ioslaves/file/CMakeLists.txt +++ b/src/ioslaves/file/CMakeLists.txt @@ -8,7 +8,7 @@ if(WIN32) set(kio_file_PART_SRCS file.cpp file_win.cpp ) else() - set(kio_file_PART_SRCS file.cpp file_unix.cpp fdreceiver.cpp ) + set(kio_file_PART_SRCS file.cpp file_unix.cpp fdreceiver.cpp legacycodec.cpp) endif() find_package(ACL) diff --git a/src/ioslaves/file/file.cpp b/src/ioslaves/file/file.cpp --- a/src/ioslaves/file/file.cpp +++ b/src/ioslaves/file/file.cpp @@ -33,6 +33,10 @@ #include "kioglobal_p.h" +#ifdef Q_OS_UNIX +#include "legacycodec.h" +#endif + #include #include #ifdef Q_OS_WIN @@ -102,6 +106,10 @@ exit(-1); } +#ifdef Q_OS_UNIX + LegacyCodec codec; +#endif + FileProtocol slave(argv[2], argv[3]); // Make sure the first kDebug is after the slave ctor (which sets a SIGPIPE handler) diff --git a/src/ioslaves/file/legacycodec.h b/src/ioslaves/file/legacycodec.h new file mode 100644 --- /dev/null +++ b/src/ioslaves/file/legacycodec.h @@ -0,0 +1,66 @@ +/* + Copyright (c) 2019 Christoph Feck + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. +*/ + +#ifndef LEGACY_CODEC_H +#define LEGACY_CODEC_H 1 + +#include + +class LegacyCodec : public QTextCodec +{ +public: + LegacyCodec() { + if (codecForLocale()->mibEnum() == 106) { + setCodecForLocale(this); + } + } + + ~LegacyCodec() override { + setCodecForLocale(nullptr); + }; + + QList aliases() const override { + return QList(); + } + + int mibEnum() const override { + return 106; + } + + QByteArray name() const override { + return QByteArray("UTF-8"); + } + +protected: + QByteArray convertFromUnicode(const QChar *input, int number, QTextCodec::ConverterState *state) const override { + Q_UNUSED(state); + return encodeFileNameUTF8(QString::fromRawData(input, number)); + } + + QString convertToUnicode(const char *chars, int len, QTextCodec::ConverterState *state) const override { + Q_UNUSED(state); + return decodeFileNameUTF8(QByteArray::fromRawData(chars, len)); + } + +private: + static QByteArray encodeFileNameUTF8(const QString &fileName); + static QString decodeFileNameUTF8(const QByteArray &localFileName); +}; + +#endif // define LEGACY_CODEC_H diff --git a/src/ioslaves/file/legacycodec.cpp b/src/ioslaves/file/legacycodec.cpp new file mode 100644 --- /dev/null +++ b/src/ioslaves/file/legacycodec.cpp @@ -0,0 +1,175 @@ +/* + Copyright (c) 2013 Szókovács Róbert + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. +*/ + +#include "legacycodec.h" + +QByteArray LegacyCodec::encodeFileNameUTF8(const QString &fileName) +{ + int len = fileName.length(); + const QChar *uc = fileName.constData(); + + uchar replacement = '?'; + int rlen = 3*len; + int surrogate_high = -1; + + QByteArray rstr; + rstr.resize(rlen); + uchar* cursor = (uchar*)rstr.data(); + const QChar *ch = uc; + int invalid = 0; + + const QChar *end = ch + len; + while (ch < end) { + uint u = ch->unicode(); + if (surrogate_high >= 0) { + if (ch->isLowSurrogate()) { + u = QChar::surrogateToUcs4(surrogate_high, u); + surrogate_high = -1; + } else { + // high surrogate without low + *cursor = replacement; + ++ch; + ++invalid; + surrogate_high = -1; + continue; + } + } else if (ch->isLowSurrogate()) { + // low surrogate without high + *cursor = replacement; + ++ch; + ++invalid; + continue; + } else if (ch->isHighSurrogate()) { + surrogate_high = u; + ++ch; + continue; + } + + if (u >= 0x10FE00 && u <= 0x10FE7F) { + *cursor++ = (uchar)(u - 0x10FE00 + 128) ; + } + else if (u < 0x80) { + *cursor++ = (uchar)u; + } else { + if (u < 0x0800) { + *cursor++ = 0xc0 | ((uchar) (u >> 6)); + } else { + // is it one of the Unicode non-characters? + if (QChar::isNonCharacter(u)) { + *cursor++ = replacement; + ++ch; + ++invalid; + continue; + } + + if (u > 0xffff) { + *cursor++ = 0xf0 | ((uchar) (u >> 18)); + *cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f); + } else { + *cursor++ = 0xe0 | (((uchar) (u >> 12)) & 0x3f); + } + *cursor++ = 0x80 | (((uchar) (u >> 6)) & 0x3f); + } + *cursor++ = 0x80 | ((uchar) (u&0x3f)); + } + ++ch; + } + + rstr.resize(cursor - (const uchar*)rstr.constData()); + return rstr; +} + +QString LegacyCodec::decodeFileNameUTF8(const QByteArray &localFileName) +{ + const char *chars = localFileName.constData(); + int len = qstrlen(chars); + int need = 0; + uint uc = 0; + uint min_uc = 0; + + QString result(need + len + 1, Qt::Uninitialized); // worst case + ushort *qch = (ushort *)result.unicode(); + uchar ch; + + for (int i = 0; i < len; ++i) { + ch = chars[i]; + if (need) { + if ((ch&0xc0) == 0x80) { + uc = (uc << 6) | (ch & 0x3f); + --need; + if (!need) { + bool nonCharacter; + if (!(nonCharacter = QChar::isNonCharacter(uc)) && uc > 0xffff && uc < 0x110000) { + // surrogate pair + Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length()); + *qch++ = QChar::highSurrogate(uc); + *qch++ = QChar::lowSurrogate(uc); + } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || nonCharacter || uc >= 0x110000) { + // error: overlong sequence, UTF16 surrogate or non-character + goto error; + } else { + *qch++ = uc; + } + } + } else { + goto error; + } + } else { + if (ch < 128) { + *qch++ = ushort(ch); + } else if ((ch & 0xe0) == 0xc0) { + uc = ch & 0x1f; + need = 1; + min_uc = 0x80; + } else if ((ch & 0xf0) == 0xe0) { + uc = ch & 0x0f; + need = 2; + min_uc = 0x800; + } else if ((ch&0xf8) == 0xf0) { + uc = ch & 0x07; + need = 3; + min_uc = 0x10000; + } else { + goto error; + } + } + } + if (need > 0) { + // unterminated UTF sequence + goto error; + } + result.truncate(qch - (ushort *)result.unicode()); + return result; + +error: + + qch = (ushort *)result.unicode(); + for (int i = 0; i < len; ++i) { + ch = chars[i]; + if (ch < 128) { + *qch++ = ushort(ch); + } else { + uint uc = ch - 128 + 0x10FE00; //U+10FE00-U+10FE7F + *qch++ = QChar::highSurrogate(uc); + *qch++ = QChar::lowSurrogate(uc); + } + } + result.truncate(qch - (ushort *)result.unicode()); + return result; +}