Changeset View
Changeset View
Standalone View
Standalone View
tools/uni2characterwidth/uni2characterwidth.cpp
- This file was added.
1 | /* | ||||
---|---|---|---|---|---|
2 | This file is part of Konsole, a terminal emulator for KDE. | ||||
3 | | ||||
4 | Copyright 2018 by Mariusz Glebocki <mglb@arccos-1.net> | ||||
5 | | ||||
6 | This program is free software; you can redistribute it and/or modify | ||||
7 | it under the terms of the GNU General Public License as published by | ||||
8 | the Free Software Foundation; either version 2 of the License, or | ||||
9 | (at your option) any later version. | ||||
10 | | ||||
11 | This program is distributed in the hope that it will be useful, | ||||
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
14 | GNU General Public License for more details. | ||||
15 | | ||||
16 | You should have received a copy of the GNU General Public License | ||||
17 | along with this program; if not, write to the Free Software | ||||
18 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | ||||
19 | 02110-1301 USA. | ||||
20 | */ | ||||
21 | | ||||
22 | #include <QCommandLineParser> | ||||
23 | #include <QCoreApplication> | ||||
24 | #include <QEventLoop> | ||||
25 | #include <QFile> | ||||
26 | #include <QFileInfo> | ||||
27 | #include <QLoggingCategory> | ||||
28 | #include <QMap> | ||||
29 | #include <QRegularExpression> | ||||
30 | #include <QRegularExpressionMatch> | ||||
31 | #include <QString> | ||||
32 | #include <QStringBuilder> | ||||
33 | #include <QTextStream> | ||||
34 | #include <QTimer> | ||||
35 | #include "template.h" | ||||
36 | | ||||
37 | #include <KIO/Job> | ||||
38 | | ||||
39 | // Backward compatibility | ||||
40 | #if QT_VERSION < QT_VERSION_CHECK(5, 7, 0) | ||||
41 | #define qAsConst(code) code | ||||
42 | #endif | ||||
43 | | ||||
44 | | ||||
45 | | ||||
46 | static constexpr unsigned int CODE_POINTS_NUM = 0x110000; | ||||
47 | static constexpr unsigned int LAST_CODE_POINT = CODE_POINTS_NUM - 1; | ||||
48 | | ||||
49 | struct UcdEntry { | ||||
50 | struct { uint first; uint last; } cp; | ||||
51 | QStringList fields; | ||||
52 | }; | ||||
53 | | ||||
54 | class UcdParserBase { | ||||
55 | public: | ||||
56 | ~UcdParserBase() { | ||||
57 | _source->close(); | ||||
58 | } | ||||
59 | | ||||
60 | bool hasNext() { | ||||
61 | bool hadNext = _hasNext; | ||||
62 | if(!_nextFetched) { | ||||
63 | _hasNext = fetchNext(); | ||||
64 | _nextFetched = true; | ||||
65 | } | ||||
66 | return hadNext; | ||||
67 | } | ||||
68 | | ||||
69 | protected: | ||||
70 | UcdParserBase(QIODevice *source, UcdEntry *entry) | ||||
71 | : _source(source) | ||||
72 | , _nextFetched(false) | ||||
73 | , _hasNext(true) | ||||
74 | , _lineNo(0) | ||||
75 | , _entry(entry) | ||||
76 | { | ||||
77 | Q_ASSERT(_source); | ||||
78 | Q_ASSERT(_entry); | ||||
79 | } | ||||
80 | | ||||
81 | bool fetchNext() { | ||||
82 | Q_ASSERT(_source->isOpen()); | ||||
83 | if(!_source->isOpen()) | ||||
84 | return false; | ||||
85 | | ||||
86 | static const QRegularExpression ENTRY_RE = QRegularExpression(QStringLiteral( | ||||
87 | // Match 1: "cp1" - first CP / "cp2" (optional) - last CP | ||||
88 | R"#((?:^(?<cp1>[[:xdigit:]]+)(?:\.\.(?<cp2>[[:xdigit:]]+))?[ \t]*;)#" | ||||
89 | // Match 1: "field0" - first data field" | ||||
90 | // "udRangeInd" (UnicodeData.txt only) - if present, the line is either first or last line of a range | ||||
91 | R"#([ \t]*(?<field0>[^#;\n]*?(?:, (?<udRangeInd>First|Last)>)?)[ \t]*(?:;|(?:\#.*)?$))|)#" | ||||
92 | // Match 2..n: "field" - n-th field | ||||
93 | R"#((?:\G(?<=;)[ \t]*(?<field>[^#;\n]*?)[ \t]*(?:;|(?:#.*)?$)))#"), | ||||
94 | QRegularExpression::OptimizeOnFirstUsageOption | ||||
95 | ); | ||||
96 | static const QRegularExpression UD_RANGE_IND_RE(QStringLiteral(", (First|Last)")); | ||||
97 | static const QRegularExpression COMMENT_RE(QStringLiteral("^[ \t]*(#.*)?$")); | ||||
98 | | ||||
99 | QString line; | ||||
100 | bool ok; | ||||
101 | _entry->fields.clear(); | ||||
102 | while(!_source->atEnd()) { | ||||
103 | line = QString::fromUtf8(_source->readLine()); | ||||
104 | _lineNo++; | ||||
105 | auto mit = ENTRY_RE.globalMatch(line); | ||||
106 | if(!mit.hasNext()) { | ||||
107 | // Do not complain about comments and empty lines | ||||
108 | if(!COMMENT_RE.match(line).hasMatch()) | ||||
109 | qDebug() << QStringLiteral("Line %1: does not match - skipping").arg(_lineNo); | ||||
110 | continue; | ||||
111 | } | ||||
112 | | ||||
113 | auto match = mit.next(); | ||||
114 | _entry->cp.first = match.captured(QStringLiteral("cp1")).toUInt(&ok, 16); | ||||
115 | if(!ok) { | ||||
116 | qDebug() << QStringLiteral("Line %d Invalid cp1 - skipping").arg(_lineNo); | ||||
117 | continue; | ||||
118 | } | ||||
119 | _entry->cp.last = match.captured(QStringLiteral("cp2")).toUInt(&ok, 16); | ||||
120 | if(!ok) { | ||||
121 | _entry->cp.last = _entry->cp.first; | ||||
122 | } | ||||
123 | QString field0 = match.captured(QStringLiteral("field0")); | ||||
124 | if(field0.isNull()) { | ||||
125 | qDebug() << QStringLiteral("Line %d: Missing field0 - skipping").arg(_lineNo); | ||||
126 | continue; | ||||
127 | } | ||||
128 | if(!match.captured(QStringLiteral("udRangeInd")).isNull()) { | ||||
129 | if(match.captured(QStringLiteral("udRangeInd")) == QStringLiteral("First")) { | ||||
130 | // Fetch next valid line, as it pairs with the current one to form a range | ||||
131 | QRegularExpressionMatch nlMatch; | ||||
132 | int firstLineNo = _lineNo; | ||||
133 | while(!_source->atEnd() && !nlMatch.hasMatch()) { | ||||
134 | line = QString::fromUtf8(_source->readLine()); | ||||
135 | _lineNo++; | ||||
136 | nlMatch = ENTRY_RE.match(line); | ||||
137 | if(!nlMatch.hasMatch()) { | ||||
138 | qDebug() << QStringLiteral("Line %d: does not match - skipping").arg(_lineNo); | ||||
139 | } | ||||
140 | } | ||||
141 | if(nlMatch.hasMatch()) { | ||||
142 | _entry->cp.last = nlMatch.captured(QStringLiteral("cp1")).toUInt(&ok, 16); | ||||
143 | if(!ok) { | ||||
144 | qDebug() << QStringLiteral("Line %1-%2: Missing or invalid second cp1 (\"Last\" entry) - skipping") | ||||
145 | .arg(firstLineNo).arg(_lineNo); | ||||
146 | continue; | ||||
147 | } | ||||
148 | } | ||||
149 | } | ||||
150 | field0.remove(UD_RANGE_IND_RE); | ||||
151 | } | ||||
152 | _entry->fields.append(field0); | ||||
153 | | ||||
154 | while(mit.hasNext()) { | ||||
155 | _entry->fields.append(mit.next().captured(QStringLiteral("field"))); | ||||
156 | } | ||||
157 | | ||||
158 | return !_source->atEnd(); | ||||
159 | } | ||||
160 | return false; | ||||
161 | } | ||||
162 | | ||||
163 | QIODevice *_source; | ||||
164 | bool _nextFetched; | ||||
165 | bool _hasNext; | ||||
166 | | ||||
167 | private: | ||||
168 | int _lineNo; | ||||
169 | UcdEntry *_entry; | ||||
170 | }; | ||||
171 | | ||||
172 | template <class EntryType> | ||||
173 | class UcdParser: public UcdParserBase { | ||||
174 | public: | ||||
175 | static_assert(std::is_base_of<UcdEntry, EntryType>::value, "'EntryType' has to be derived from UcdParser::Entry"); | ||||
176 | | ||||
177 | UcdParser(QIODevice *source): UcdParserBase(source, &_typedEntry) {} | ||||
178 | | ||||
179 | inline const EntryType & next() { | ||||
180 | if(!_nextFetched) | ||||
181 | fetchNext(); | ||||
182 | _nextFetched = false; | ||||
183 | return _typedEntry; | ||||
184 | } | ||||
185 | | ||||
186 | private: | ||||
187 | EntryType _typedEntry; | ||||
188 | }; | ||||
189 | | ||||
190 | class KIODevice: public QIODevice { | ||||
191 | public: | ||||
192 | enum Error { | ||||
193 | NoError, | ||||
194 | UnknownError, | ||||
195 | TimeoutError, | ||||
196 | UnknownHostError, | ||||
197 | MalformedUrlError, | ||||
198 | NotFoundError, | ||||
199 | }; | ||||
200 | | ||||
201 | KIODevice(const QUrl &url) | ||||
202 | : _url(url) | ||||
203 | , _job(nullptr) | ||||
204 | , _error(NoError) {} | ||||
205 | | ||||
206 | ~KIODevice() { | ||||
207 | close(); | ||||
208 | } | ||||
209 | | ||||
210 | bool open() { | ||||
211 | if(_job) | ||||
212 | return false; | ||||
213 | | ||||
214 | _job = KIO::storedGet(_url); | ||||
215 | QObject::connect(_job, &KIO::StoredTransferJob::result, | ||||
216 | _job, [&](KJob *) { | ||||
217 | if(_job->isErrorPage()) | ||||
218 | _eventLoop.exit(KIO::ERR_DOES_NOT_EXIST); | ||||
219 | else if(_job->error() != KJob::NoError) | ||||
220 | _eventLoop.exit(_job->error()); | ||||
221 | else | ||||
222 | _data = _job->data(); | ||||
223 | | ||||
224 | _eventLoop.exit(KJob::NoError); | ||||
225 | }); | ||||
226 | | ||||
227 | _eventLoop.exec(); | ||||
228 | switch(_job->error()) { | ||||
229 | case KJob::NoError: | ||||
230 | _error = NoError; | ||||
231 | setErrorString(QStringLiteral("")); | ||||
232 | QIODevice::open(QIODevice::ReadOnly | QIODevice::Unbuffered); | ||||
233 | break; | ||||
234 | case KJob::KilledJobError: _error = TimeoutError; break; | ||||
235 | case KIO::ERR_UNKNOWN_HOST: _error = UnknownHostError; break; | ||||
236 | case KIO::ERR_DOES_NOT_EXIST: _error = NotFoundError; break; | ||||
237 | case KIO::ERR_MALFORMED_URL: _error = MalformedUrlError; break; | ||||
238 | default: _error = UnknownError; break; | ||||
239 | } | ||||
240 | if(_error != NoError) { | ||||
241 | setErrorString(QStringLiteral("KIO: ") + _job->errorString()); | ||||
242 | delete _job; | ||||
243 | _job = nullptr; | ||||
244 | _data.clear(); | ||||
245 | } | ||||
246 | return _error == NoError; | ||||
247 | } | ||||
248 | bool open(OpenMode mode) override { | ||||
249 | Q_ASSERT(mode == QIODevice::ReadOnly); | ||||
250 | return open(); | ||||
251 | } | ||||
252 | void close() override { | ||||
253 | if(_job) { | ||||
254 | delete _job; | ||||
255 | _job = nullptr; | ||||
256 | _error = NoError; | ||||
257 | setErrorString(QStringLiteral("")); | ||||
258 | _data.clear(); | ||||
259 | QIODevice::close(); | ||||
260 | } | ||||
261 | } | ||||
262 | | ||||
263 | qint64 size() const override { | ||||
264 | return _data.size(); | ||||
265 | } | ||||
266 | | ||||
267 | int error() const { return _error; } | ||||
268 | void unsetError() { _error = NoError; } | ||||
269 | | ||||
270 | protected: | ||||
271 | qint64 writeData(const char *, qint64) override { return -1; } | ||||
272 | qint64 readData(char *data, qint64 maxSize) override { | ||||
273 | Q_UNUSED(maxSize); | ||||
274 | Q_ASSERT(_job); | ||||
275 | Q_ASSERT(_job->error() == NoError); | ||||
276 | Q_ASSERT(data != nullptr); | ||||
277 | if(maxSize == 0 || pos() >= _data.length()) { | ||||
278 | return 0; | ||||
279 | } else if(pos() < _data.length()) { | ||||
280 | qint64 bytesToCopy = qMin(maxSize, _data.length() - pos()); | ||||
281 | memcpy(data, _data.data() + pos(), bytesToCopy); | ||||
282 | return bytesToCopy; | ||||
283 | } else { | ||||
284 | return -1; | ||||
285 | } | ||||
286 | } | ||||
287 | | ||||
288 | private: | ||||
289 | QUrl _url; | ||||
290 | KIO::StoredTransferJob *_job; | ||||
291 | Error _error; | ||||
292 | QEventLoop _eventLoop; | ||||
293 | QByteArray _data; | ||||
294 | }; | ||||
295 | | ||||
296 | | ||||
297 | | ||||
298 | struct CategoryProperty { | ||||
299 | enum Flag: uint32_t { | ||||
300 | Invalid = 0, | ||||
301 | #define CATEGORY_PROPERTY_VALUE(val, sym, intVal) sym = intVal, | ||||
302 | #include "properties.h" | ||||
303 | }; | ||||
304 | enum Group: uint32_t { | ||||
305 | #define CATEGORY_PROPERTY_GROUP(val, sym, intVal) sym = intVal, | ||||
306 | #include "properties.h" | ||||
307 | }; | ||||
308 | | ||||
309 | CategoryProperty(uint32_t value = Unassigned): _value(value) {} | ||||
310 | CategoryProperty(const QString &string): _value(fromString(string)) {} | ||||
311 | operator uint32_t &() { return _value; } | ||||
312 | operator const uint32_t &() const { return _value; } | ||||
313 | bool isValid() const { return _value != Invalid; } | ||||
314 | | ||||
315 | private: | ||||
316 | static uint32_t fromString(const QString &string) { | ||||
317 | static const QMap<QString, uint32_t> map = { | ||||
318 | #define CATEGORY_PROPERTY_VALUE(val, sym, intVal) { QStringLiteral(#val), sym }, | ||||
319 | #include "properties.h" | ||||
320 | }; | ||||
321 | return map.contains(string) ? map[string] : uint8_t(Invalid); | ||||
322 | } | ||||
323 | uint32_t _value; | ||||
324 | }; | ||||
325 | | ||||
326 | struct EastAsianWidthProperty { | ||||
327 | enum Value: uint8_t { | ||||
328 | Invalid = 0x80, | ||||
329 | #define EAST_ASIAN_WIDTH_PROPERTY_VALUE(val, sym, intVal) sym = intVal, | ||||
330 | #include "properties.h" | ||||
331 | }; | ||||
332 | | ||||
333 | EastAsianWidthProperty(uint8_t value = Neutral): _value(value) {} | ||||
334 | EastAsianWidthProperty(const QString &string): _value(fromString(string)) {} | ||||
335 | operator uint8_t &() { return _value; } | ||||
336 | operator const uint8_t &() const { return _value; } | ||||
337 | bool isValid() const { return _value != Invalid; } | ||||
338 | | ||||
339 | private: | ||||
340 | static uint8_t fromString(const QString &string) { | ||||
341 | static const QMap<QString, Value> map = { | ||||
342 | #define EAST_ASIAN_WIDTH_PROPERTY_VALUE(val, sym, intVal) { QStringLiteral(#val), Value::sym }, | ||||
343 | #include "properties.h" | ||||
344 | }; | ||||
345 | return map.contains(string) ? map[string] : Invalid; | ||||
346 | } | ||||
347 | uint8_t _value; | ||||
348 | }; | ||||
349 | | ||||
350 | struct EmojiProperty { | ||||
351 | enum Flag: uint8_t { | ||||
352 | Invalid = 0x80, | ||||
353 | #define EMOJI_PROPERTY_VALUE(val, sym, intVal) sym = intVal, | ||||
354 | #include "properties.h" | ||||
355 | }; | ||||
356 | | ||||
357 | EmojiProperty(uint8_t value = None): _value(value) {} | ||||
358 | EmojiProperty(const QString &string): _value(fromString(string)) {} | ||||
359 | operator uint8_t &() { return _value; } | ||||
360 | operator const uint8_t &() const { return _value; } | ||||
361 | bool isValid() const { return !(_value & Invalid); } | ||||
362 | | ||||
363 | private: | ||||
364 | static uint8_t fromString(const QString &string) { | ||||
365 | static const QMap<QString, uint8_t> map = { | ||||
366 | #define EMOJI_PROPERTY_VALUE(val, sym, intVal) { QStringLiteral(#val), sym }, | ||||
367 | #include "properties.h" | ||||
368 | }; | ||||
369 | return map.contains(string) ? map[string] : uint8_t(Invalid); | ||||
370 | } | ||||
371 | uint8_t _value; | ||||
372 | }; | ||||
373 | | ||||
374 | | ||||
375 | | ||||
376 | struct CharacterWidth { | ||||
377 | enum Width: int8_t { | ||||
378 | Invalid = SCHAR_MIN, | ||||
379 | _VALID_START = -3, | ||||
380 | Ambiguous = -2, | ||||
381 | NonPrintable = -1, | ||||
382 | // 0 | ||||
383 | // 1 | ||||
384 | Unassigned = 1, | ||||
385 | // 2 | ||||
386 | _VALID_END = 3, | ||||
387 | }; | ||||
388 | | ||||
389 | CharacterWidth(const CharacterWidth &other): _width(other._width) {} | ||||
390 | CharacterWidth(int8_t width = Invalid): _width(width) {} | ||||
391 | CharacterWidth & operator =(const CharacterWidth &other) { _width = other._width; return *this; } | ||||
392 | int operator =(const int8_t width) { _width = width; return _width; } | ||||
393 | int width() const { return _width; } | ||||
394 | operator int() const { return width(); } | ||||
395 | | ||||
396 | const QString toString() const { | ||||
397 | switch(_width) { | ||||
398 | case Ambiguous: return QStringLiteral("Ambiguous"); | ||||
399 | case NonPrintable: return QStringLiteral("NonPrintable"); | ||||
400 | case 0: return QStringLiteral("0"); | ||||
401 | case 1: return QStringLiteral("1"); | ||||
402 | case 2: return QStringLiteral("2"); | ||||
403 | default: | ||||
404 | case Invalid: return QStringLiteral("Invalid"); | ||||
405 | } | ||||
406 | } | ||||
407 | | ||||
408 | bool isValid() const { return (_width > _VALID_START && _width < _VALID_END); }; | ||||
409 | | ||||
410 | private: | ||||
411 | int8_t _width; | ||||
412 | }; | ||||
413 | | ||||
414 | | ||||
415 | | ||||
416 | struct CharacterProperties { | ||||
417 | CategoryProperty category; | ||||
418 | EastAsianWidthProperty eastAsianWidth; | ||||
419 | EmojiProperty emoji; | ||||
420 | CharacterWidth customWidth; | ||||
421 | // For debug purposes in "details" output generator | ||||
422 | uint8_t widthFromPropsRule; | ||||
423 | }; | ||||
424 | | ||||
425 | | ||||
426 | | ||||
427 | struct UnicodeDataEntry: public UcdEntry { | ||||
428 | enum FieldId { | ||||
429 | NameId = 0, | ||||
430 | CategoryId = 1, | ||||
431 | }; | ||||
432 | CategoryProperty category() const { return CategoryProperty(this->fields.value(CategoryId)); } | ||||
433 | }; | ||||
434 | | ||||
435 | struct EastAsianWidthEntry: public UcdEntry { | ||||
436 | enum FieldId { | ||||
437 | WidthId = 0, | ||||
438 | }; | ||||
439 | EastAsianWidthProperty eastAsianWidth() const { return EastAsianWidthProperty(this->fields.value(WidthId)); } | ||||
440 | }; | ||||
441 | | ||||
442 | struct EmojiDataEntry: public UcdEntry { | ||||
443 | enum FieldId { | ||||
444 | EmojiId = 0, | ||||
445 | }; | ||||
446 | EmojiProperty emoji() const { return EmojiProperty(this->fields.value(EmojiId)); } | ||||
447 | }; | ||||
448 | | ||||
449 | struct GenericWidthEntry: public UcdEntry { | ||||
450 | enum FieldId { | ||||
451 | WidthId = 0, | ||||
452 | }; | ||||
453 | CharacterWidth width() const { | ||||
454 | bool ok; | ||||
455 | CharacterWidth w = this->fields.value(WidthId).toInt(&ok, 10); | ||||
456 | return (ok && w.isValid()) ? w : CharacterWidth::Invalid; | ||||
457 | } | ||||
458 | }; | ||||
459 | | ||||
460 | struct WidthsRange { | ||||
461 | struct { uint first; uint last; } cp; | ||||
462 | CharacterWidth width; | ||||
463 | }; | ||||
464 | | ||||
465 | QVector<WidthsRange> rangesFromWidths(const QVector<CharacterWidth> &widths, QPair<uint, uint> ucsRange = {0, CODE_POINTS_NUM}) { | ||||
466 | QVector<WidthsRange> ranges; | ||||
467 | | ||||
468 | if(ucsRange.second >= CODE_POINTS_NUM) | ||||
469 | ucsRange.second = widths.size() - 1; | ||||
470 | | ||||
471 | uint first = ucsRange.first; | ||||
472 | for(uint cp = first + 1; cp <= uint(ucsRange.second); ++cp) { | ||||
473 | if(widths[first] != widths[cp]) { | ||||
474 | ranges.append({{first, cp-1}, widths[cp-1]}); | ||||
475 | first = cp; | ||||
476 | } | ||||
477 | } | ||||
478 | ranges.append({{first, uint(ucsRange.second)}, widths[ucsRange.second]}); | ||||
479 | | ||||
480 | return ranges; | ||||
481 | } | ||||
482 | | ||||
483 | // Real ranges look like this (each continuous letter sequence is a range): | ||||
484 | // | ||||
485 | // D D D D D D D D 8 ranges | ||||
486 | // C C C C C C CC C CC 9 ranges | ||||
487 | // BBB BBB B B BBB BBBBBB 6 ranges | ||||
488 | // A A A A 4 ranges | ||||
489 | // ∑: 27 ranges | ||||
490 | // | ||||
491 | // To reduce total ranges count, the holes in groups can be filled with ranges | ||||
492 | // from groups above them: | ||||
493 | // | ||||
494 | // D D D D D D D D 8 ranges | ||||
495 | // CCC C CCCCC CCCCCCC 4 ranges | ||||
496 | // BBBBBBB BBBBBBB BBBBBBBBBBBBBBBB 3 ranges | ||||
497 | // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 1 ranges | ||||
498 | // ∑: 16 ranges | ||||
499 | // | ||||
500 | // First range is always without change. Last range (A) can be dropped | ||||
501 | // (it always contains everything). Search should be done in order: D, C, B (A). | ||||
502 | // For simplicity the funtion returns all ranges, including first and last. | ||||
503 | QMap<CharacterWidth, QVector<QPair<uint, uint>>> mergedRangesFromWidths(const QVector<CharacterWidth> &widths, const QVector<CharacterWidth> widthsSortOrder, | ||||
504 | QPair<uint, uint> ucsRange = {0, CODE_POINTS_NUM}) { | ||||
505 | if(ucsRange.second >= CODE_POINTS_NUM) | ||||
506 | ucsRange.second = widths.size() - 1; | ||||
507 | QVector<WidthsRange> ranges = rangesFromWidths(widths, ucsRange); | ||||
508 | QMap<CharacterWidth, QVector<QPair<uint, uint>>> mergedRanges; | ||||
509 | | ||||
510 | int cmwi; // Currently Merged Width Index | ||||
511 | int sri = -1; // Start Range Index (for current width) | ||||
512 | int cri; // Currrent Range Index | ||||
513 | | ||||
514 | // First width ranges are without change. Last one has one range spanning everything, so we can skip this | ||||
515 | for(cmwi = 1; cmwi < widthsSortOrder.size() - 1; ++cmwi) { | ||||
516 | const CharacterWidth &cmw = widthsSortOrder[cmwi]; // Currently Merged Width | ||||
517 | for(cri = 0; cri < ranges.size(); ++cri) { | ||||
518 | WidthsRange &cr = ranges[cri]; // Current Range | ||||
519 | if(cr.width == cmw) { | ||||
520 | // Range is suitable for merge | ||||
521 | if(sri < 0) { | ||||
522 | // First one, just remember it | ||||
523 | sri = cri; | ||||
524 | } else { | ||||
525 | // Merge | ||||
526 | ranges[sri].cp.last = cr.cp.last; | ||||
527 | cr.width = CharacterWidth::Invalid; | ||||
528 | } | ||||
529 | } else { | ||||
530 | // Current range has another width - can we continue merging? | ||||
531 | if(sri >= 0) { | ||||
532 | const int crwi = widthsSortOrder.indexOf(cr.width); // Current Range Width Index | ||||
533 | if(!(crwi < cmwi && crwi >= 0)) { | ||||
534 | // current range is not above currently merged width - stop merging | ||||
535 | sri = -1; | ||||
536 | } | ||||
537 | } | ||||
538 | } | ||||
539 | } | ||||
540 | } | ||||
541 | | ||||
542 | for(const auto &range: qAsConst(ranges)) { | ||||
543 | if(range.width.isValid() && range.width != widthsSortOrder.last()) | ||||
544 | mergedRanges[range.width].append({range.cp.first, range.cp.last}); | ||||
545 | } | ||||
546 | mergedRanges[widthsSortOrder.last()].append({ucsRange.first, ucsRange.second}); | ||||
547 | | ||||
548 | return mergedRanges; | ||||
549 | } | ||||
550 | | ||||
551 | namespace generators { | ||||
552 | | ||||
553 | using GeneratorFunc = bool (*)(QTextStream &, const QVector<CharacterProperties> &, | ||||
554 | const QVector<CharacterWidth> &, const QMap<QString, QString> &); | ||||
555 | | ||||
556 | bool code(QTextStream &out, const QVector<CharacterProperties> &props, const QVector<CharacterWidth> &widths, | ||||
557 | const QMap<QString, QString> &args) { | ||||
558 | static constexpr int DIRECT_LUT_SIZE = 256; | ||||
559 | | ||||
560 | Q_UNUSED(props); | ||||
561 | QTextStream eout(stderr, QIODevice::WriteOnly); | ||||
562 | | ||||
563 | if(args.value(QStringLiteral("param")).isEmpty()) { | ||||
564 | eout << QStringLiteral("Template file not specified.") << endl << endl; | ||||
565 | return false; | ||||
566 | } | ||||
567 | QFile templateFile(args.value(QStringLiteral("param"))); | ||||
568 | if(!templateFile.open(QIODevice::ReadOnly)) { | ||||
569 | eout << QStringLiteral("Could not open file ") << templateFile.fileName() << ": " << templateFile.errorString(); | ||||
570 | exit(1); | ||||
571 | } | ||||
572 | | ||||
573 | const QString templateText = QString::fromUtf8(templateFile.readAll()); | ||||
574 | templateFile.close(); | ||||
575 | | ||||
576 | Var::Map data = { | ||||
577 | {QStringLiteral("gen-file-warning"), QStringLiteral("THIS IS A GENERATED FILE. DO NOT EDIT.")}, | ||||
578 | {QStringLiteral("cmdline"), args.value(QStringLiteral("cmdline"))}, | ||||
579 | {QStringLiteral("direct-lut"), Var::Vector(DIRECT_LUT_SIZE)}, | ||||
580 | {QStringLiteral("direct-lut-size"), DIRECT_LUT_SIZE}, | ||||
581 | {QStringLiteral("ranges-luts"), Var::Vector()}, | ||||
582 | {QStringLiteral("ranges-lut-list"), Var::Vector()}, | ||||
583 | {QStringLiteral("ranges-lut-list-size"), 0}, | ||||
584 | }; | ||||
585 | | ||||
586 | // Fill direct-lut with widths of 0x00-0xFF | ||||
587 | for(unsigned i = 0; i < DIRECT_LUT_SIZE; ++i) { | ||||
588 | Q_ASSERT(widths[i].isValid()); | ||||
589 | data[QStringLiteral("direct-lut")].vec[i] = int(widths[i]); | ||||
590 | } | ||||
591 | | ||||
592 | static const QVector<CharacterWidth> widthsSortOrder = {CharacterWidth::NonPrintable, 2, CharacterWidth::Ambiguous, 0, 1}; | ||||
593 | const QMap<CharacterWidth, QVector<QPair<uint, uint>>> mergedRanges | ||||
594 | = mergedRangesFromWidths(widths, widthsSortOrder, {DIRECT_LUT_SIZE, CODE_POINTS_NUM}); | ||||
595 | | ||||
596 | // Find last non-empty ranges lut | ||||
597 | int lastWidthId = 0; | ||||
598 | for(int wi = widthsSortOrder.size() - 1; wi > 0; --wi) { | ||||
599 | if(mergedRanges.contains(widthsSortOrder[wi])) { | ||||
600 | lastWidthId = wi; | ||||
601 | break; | ||||
602 | } | ||||
603 | } | ||||
604 | // Create ranges-luts for all widths except last non-empty one and empty ones | ||||
605 | for(int wi = 0; lastWidthId != 0 && wi < lastWidthId; ++wi) { | ||||
606 | const CharacterWidth width = widthsSortOrder[wi]; | ||||
607 | auto currentMergedRangesIt = mergedRanges.find(width); | ||||
608 | if(currentMergedRangesIt == mergedRanges.end() || currentMergedRangesIt.value().isEmpty()) | ||||
609 | continue; | ||||
610 | const int size = mergedRanges[width].size(); | ||||
611 | const QString name = QString(QStringLiteral("LUT_%1")).arg(width.toString().toUpper()); | ||||
612 | data[QStringLiteral("ranges-luts")].vec.append(Var::Map { | ||||
613 | {QStringLiteral("name"), name}, | ||||
614 | {QStringLiteral("ranges"), Var::Vector()}, | ||||
615 | {QStringLiteral("size"), size}, | ||||
616 | }); | ||||
617 | data[QStringLiteral("ranges-lut-list")].vec.append(Var::Map { | ||||
618 | {QStringLiteral("width"), int(width)}, | ||||
619 | {QStringLiteral("name"), name}, | ||||
620 | {QStringLiteral("size"), size}, | ||||
621 | }); | ||||
622 | auto ¤tLut = data[QStringLiteral("ranges-luts")].vec.last()[QStringLiteral("ranges")].vec; | ||||
623 | for(const auto &range: *currentMergedRangesIt) { | ||||
624 | Q_ASSERT(range.first <= LAST_CODE_POINT); | ||||
625 | Q_ASSERT(range.second <= LAST_CODE_POINT); | ||||
626 | currentLut.append(Var(Var::Map {{QStringLiteral("first"), range.first}, {QStringLiteral("last"), range.second}})); | ||||
627 | } | ||||
628 | } | ||||
629 | data[QStringLiteral("ranges-lut-list")].vec.append(Var::Map { | ||||
630 | {QStringLiteral("width"), widthsSortOrder[lastWidthId].width()}, | ||||
631 | {QStringLiteral("name"), QStringLiteral("nullptr")}, | ||||
632 | {QStringLiteral("size"), 1}, | ||||
633 | }); | ||||
634 | data[QStringLiteral("ranges-lut-list-size")] = mergedRanges.size(); | ||||
635 | | ||||
636 | Template t(templateText); | ||||
637 | t.parse(); | ||||
638 | out << t.generate(data); | ||||
639 | | ||||
640 | return true; | ||||
641 | } | ||||
642 | | ||||
643 | bool list(QTextStream &out, const QVector<CharacterProperties> &props, const QVector<CharacterWidth> &widths, | ||||
644 | const QMap<QString, QString> &args) { | ||||
645 | Q_UNUSED(props); | ||||
646 | | ||||
647 | out << QStringLiteral("# generated with: ") << args.value(QStringLiteral("cmdline")) << QStringLiteral("\n"); | ||||
648 | for(uint cp = 1; cp <= LAST_CODE_POINT; ++cp) { | ||||
649 | out << QString::asprintf("%06X ; %2d\n", cp, int(widths[cp])); | ||||
650 | } | ||||
651 | | ||||
652 | return true; | ||||
653 | } | ||||
654 | | ||||
655 | bool ranges(QTextStream &out, const QVector<CharacterProperties> &props, const QVector<CharacterWidth> &widths, | ||||
656 | const QMap<QString, QString> &args) { | ||||
657 | Q_UNUSED(props); | ||||
658 | const auto ranges = rangesFromWidths(widths); | ||||
659 | | ||||
660 | out << QStringLiteral("# generated with: ") << args.value(QStringLiteral("cmdline")) << QStringLiteral("\n"); | ||||
661 | for(const WidthsRange &range: ranges) { | ||||
662 | if(range.cp.first != range.cp.last) | ||||
663 | out << QString::asprintf("%06X..%06X ; %2d\n", range.cp.first, range.cp.last, int(range.width)); | ||||
664 | else | ||||
665 | out << QString::asprintf("%06X ; %2d\n", range.cp.first, int(range.width)); | ||||
666 | } | ||||
667 | | ||||
668 | return true; | ||||
669 | } | ||||
670 | | ||||
671 | bool compactRanges(QTextStream &out, const QVector<CharacterProperties> &props, const QVector<CharacterWidth> &widths, | ||||
672 | const QMap<QString, QString> &args) { | ||||
673 | Q_UNUSED(props); | ||||
674 | static const QVector<CharacterWidth> widthsSortOrder = {CharacterWidth::NonPrintable, 2, CharacterWidth::Ambiguous, 0, 1}; | ||||
675 | const auto mergedRanges = mergedRangesFromWidths(widths, widthsSortOrder); | ||||
676 | | ||||
677 | out << QStringLiteral("# generated with: ") << args.value(QStringLiteral("cmdline")) << QStringLiteral("\n"); | ||||
678 | for(const int width: qAsConst(widthsSortOrder)) { | ||||
679 | const auto currentMergedRangesIt = mergedRanges.find(width); | ||||
680 | if(currentMergedRangesIt == mergedRanges.end() || currentMergedRangesIt.value().isEmpty()) | ||||
681 | continue; | ||||
682 | for(const auto &range: currentMergedRangesIt.value()) { | ||||
683 | if(range.first != range.second) | ||||
684 | out << QString::asprintf("%06X..%06X ; %2d\n", range.first, range.second, int(width)); | ||||
685 | else | ||||
686 | out << QString::asprintf("%06X ; %2d\n", range.first, int(width)); | ||||
687 | } | ||||
688 | } | ||||
689 | | ||||
690 | return true; | ||||
691 | } | ||||
692 | | ||||
693 | bool details(QTextStream &out, const QVector<CharacterProperties> &props, const QVector<CharacterWidth> &widths, | ||||
694 | const QMap<QString, QString> &args) { | ||||
695 | out.setFieldAlignment(QTextStream::AlignLeft); | ||||
696 | | ||||
697 | out << QStringLiteral("# generated with: ") << args.value(QStringLiteral("cmdline")) << QStringLiteral("\n"); | ||||
698 | out << QString::asprintf("#%-5s ; %-4s ; %-8s ; %-3s ; %-2s ; %-4s ; %-4s\n", | ||||
699 | "CP", "Wdth", "Cat", "EAW", "EM", "CstW", "Rule"); | ||||
700 | QMap<CharacterWidth, uint> widthStats; | ||||
701 | for(uint cp = 0; cp <= LAST_CODE_POINT; ++cp) { | ||||
702 | out << QString::asprintf("%06X ; %4d ; %08X ; %02X ; %02X ; %4d ; %d\n", cp, | ||||
703 | int8_t(widths[cp]), uint32_t(props[cp].category), uint8_t(props[cp].eastAsianWidth), | ||||
704 | uint8_t(props[cp].emoji), int8_t(props[cp].customWidth), props[cp].widthFromPropsRule); | ||||
705 | if(!widthStats.contains(widths[cp])) | ||||
706 | widthStats.insert(widths[cp], 0); | ||||
707 | widthStats[widths[cp]]++; | ||||
708 | } | ||||
709 | QMap<CharacterWidth, uint> rangesStats; | ||||
710 | const auto ranges = rangesFromWidths(widths); | ||||
711 | for(const auto &range: ranges) { | ||||
712 | if(!rangesStats.contains(range.width)) | ||||
713 | rangesStats.insert(range.width, 0); | ||||
714 | rangesStats[range.width]++; | ||||
715 | } | ||||
716 | out << QStringLiteral("# STATS") << endl; | ||||
717 | out << QStringLiteral("#") << endl; | ||||
718 | out << QStringLiteral("# Characters count for each width:") << endl; | ||||
719 | for(auto wi = widthStats.constBegin(); wi != widthStats.constEnd(); ++wi) { | ||||
720 | out << QString::asprintf("# %2d: %7d\n", int(wi.key()), widthStats[wi.key()]); | ||||
721 | } | ||||
722 | out << QStringLiteral("#") << endl; | ||||
723 | out << QStringLiteral("# Ranges count for each width:") << endl; | ||||
724 | int howmany = 0; | ||||
725 | for(auto wi = rangesStats.constBegin(); wi != rangesStats.constEnd(); ++wi) { | ||||
726 | if(howmany >= 20) break; | ||||
727 | howmany++; | ||||
728 | out << QString::asprintf("# %2d: %7d\n", int(wi.key()), rangesStats[wi.key()]); | ||||
729 | } | ||||
730 | | ||||
731 | return true; | ||||
732 | } | ||||
733 | } // namespace generators | ||||
734 | | ||||
735 | | ||||
736 | | ||||
737 | template <class EntryType> | ||||
738 | static void processInputFiles(QVector<CharacterProperties> &props, const QStringList &files, const QString &fileTypeName, | ||||
739 | void (*cb)(CharacterProperties &prop, const EntryType &entry)) { | ||||
740 | static const QRegularExpression PROTOCOL_RE(QStringLiteral(R"#(^[a-z]+://)#"), QRegularExpression::OptimizeOnFirstUsageOption); | ||||
741 | for(const QString &fileName: files) { | ||||
742 | qInfo().noquote() << QStringLiteral("Parsing as %1: %2").arg(fileTypeName).arg(fileName); | ||||
743 | QSharedPointer<QIODevice> source = nullptr; | ||||
744 | if(PROTOCOL_RE.match(fileName).hasMatch()) { | ||||
745 | source.reset(new KIODevice(QUrl(fileName))); | ||||
746 | } else { | ||||
747 | source.reset(new QFile(fileName)); | ||||
748 | } | ||||
749 | | ||||
750 | if(!source->open(QIODevice::ReadOnly)) { | ||||
751 | qCritical() << QStringLiteral("Could not open %1: %2").arg(fileName).arg(source->errorString()); | ||||
752 | exit(1); | ||||
753 | } | ||||
754 | UcdParser<EntryType> p(source.data()); | ||||
755 | while(p.hasNext()) { | ||||
756 | const auto &e = p.next(); | ||||
757 | for(uint cp = e.cp.first; cp <= e.cp.last; ++cp) { | ||||
758 | cb(props[cp], e); | ||||
759 | } | ||||
760 | } | ||||
761 | } | ||||
762 | } | ||||
763 | | ||||
764 | static const QString escapeCmdline(const QStringList &args) { | ||||
765 | static QString cmdline = QString(); | ||||
766 | if(!cmdline.isEmpty()) | ||||
767 | return cmdline; | ||||
768 | | ||||
769 | QTextStream stream(&cmdline, QIODevice::WriteOnly); | ||||
770 | | ||||
771 | // basename for command name | ||||
772 | stream << QFileInfo(args[0]).baseName(); | ||||
773 | for(auto it = args.begin() + 1; it != args.end(); ++it) { | ||||
774 | if(!it->startsWith(QLatin1Char('-'))) | ||||
775 | stream << QStringLiteral(" \"") << QString(*it).replace(QRegularExpression(QStringLiteral(R"(["`$\\])")), QStringLiteral(R"(\\\1)")) << '"'; | ||||
776 | else | ||||
777 | stream << ' ' << *it; | ||||
778 | } | ||||
779 | stream.flush(); | ||||
780 | return cmdline; | ||||
781 | } | ||||
782 | | ||||
783 | enum ConvertOptions { | ||||
784 | AmbiguousWidthOpt = 0, | ||||
785 | EmojiOpt = 1, | ||||
786 | }; | ||||
787 | | ||||
788 | // Character width assignment | ||||
789 | // | ||||
790 | // Rules (from highest to lowest priority): | ||||
791 | // | ||||
792 | // * Local overlay | ||||
793 | // * (not implemented) Character unique properties described in The Unicode Standard, Version 10.0 | ||||
794 | // * Unicode category Cc, Cs: -1 | ||||
795 | // * Emoji: 2 | ||||
796 | // * Unicode category Mn, Me, Cf: 0 | ||||
797 | // * East Asian Width W, F: 2 | ||||
798 | // * East Asian Width H, N, Na: 1 | ||||
799 | // * East Asian Width A: (varies) | ||||
800 | // * Unassigned/Undefined/Private Use: 1 | ||||
801 | // | ||||
802 | // The list is loosely based on character width implementations in Vim 8.1 | ||||
803 | // and glibc 2.27. There are a few cases which could look better | ||||
804 | // (decomposed Hangul, emoji with modifiers, etc) with different widths, | ||||
805 | // but interactive terminal programs (at least vim, zsh, everything based | ||||
806 | // on glibc's wcwidth) would see their width as it is implemented now. | ||||
807 | static inline CharacterWidth widthFromProps(const CharacterProperties &props, uint cp, const QMap<ConvertOptions, int> &convertOpts) { | ||||
808 | CharacterWidth cw; | ||||
809 | auto &widthFromPropsRule = const_cast<uint8_t &>(props.widthFromPropsRule); | ||||
810 | if(props.customWidth.isValid()) { | ||||
811 | widthFromPropsRule = 1; | ||||
812 | cw = props.customWidth; | ||||
813 | | ||||
814 | } else if((CategoryProperty::Control | CategoryProperty::Surrogate) & props.category) { | ||||
815 | widthFromPropsRule = 2; | ||||
816 | cw = CharacterWidth::NonPrintable; | ||||
817 | | ||||
818 | } else if(convertOpts[EmojiOpt] & props.emoji && !(EmojiProperty::EmojiComponent & props.emoji)) { | ||||
819 | widthFromPropsRule = 3; | ||||
820 | cw = 2; | ||||
821 | | ||||
822 | } else if((CategoryProperty::NonspacingMark | CategoryProperty::EnclosingMark | CategoryProperty::Format) & props.category) { | ||||
823 | widthFromPropsRule = 4; | ||||
824 | cw = 0; | ||||
825 | | ||||
826 | } else if((EastAsianWidthProperty::Wide | EastAsianWidthProperty::Fullwidth) & props.eastAsianWidth) { | ||||
827 | widthFromPropsRule = 5; | ||||
828 | cw = 2; | ||||
829 | | ||||
830 | } else if((EastAsianWidthProperty::Halfwidth | EastAsianWidthProperty::Neutral | EastAsianWidthProperty::Narrow) & props.eastAsianWidth) { | ||||
831 | widthFromPropsRule = 6; | ||||
832 | cw = 1; | ||||
833 | | ||||
834 | } else if((CategoryProperty::Unassigned | CategoryProperty::PrivateUse) & props.category) { | ||||
835 | widthFromPropsRule = 7; | ||||
836 | cw = CharacterWidth::Unassigned; | ||||
837 | | ||||
838 | } else if((EastAsianWidthProperty::Ambiguous) & props.eastAsianWidth) { | ||||
839 | widthFromPropsRule = 8; | ||||
840 | cw = convertOpts[AmbiguousWidthOpt]; | ||||
841 | | ||||
842 | } else if(!props.category.isValid()) { | ||||
843 | widthFromPropsRule = 9; | ||||
844 | qWarning() << QStringLiteral("Code point U+%1 has invalid category - this should not happen. Assuming \"unassigned\"") | ||||
845 | .arg(cp, 4, 16, QLatin1Char('0')); | ||||
846 | cw = CharacterWidth::Unassigned; | ||||
847 | | ||||
848 | } else { | ||||
849 | widthFromPropsRule = 10; | ||||
850 | qWarning() << QStringLiteral("Code point U+%1 not classified - this should not happen. Assuming non-printable character") | ||||
851 | .arg(cp, 4, 16, QLatin1Char('0')); | ||||
852 | cw = CharacterWidth::NonPrintable; | ||||
853 | } | ||||
854 | | ||||
855 | return cw; | ||||
856 | } | ||||
857 | | ||||
858 | int main(int argc, char *argv[]) { | ||||
859 | static const QMap<QString, generators::GeneratorFunc> GENERATOR_FUNCS_MAP = { | ||||
860 | {QStringLiteral("code"), generators::code}, | ||||
861 | {QStringLiteral("compact-ranges"), generators::compactRanges}, | ||||
862 | {QStringLiteral("ranges"), generators::ranges}, | ||||
863 | {QStringLiteral("list"), generators::list}, | ||||
864 | {QStringLiteral("details"), generators::details}, | ||||
865 | {QStringLiteral("dummy"), [](QTextStream &, const QVector<CharacterProperties> &, const QVector<CharacterWidth> &, | ||||
866 | const QMap<QString, QString> &)->bool {return true;}}, | ||||
867 | }; | ||||
868 | qSetMessagePattern(QStringLiteral("%{message}")); | ||||
869 | | ||||
870 | QCoreApplication app(argc, argv); | ||||
871 | QCommandLineParser parser; | ||||
872 | parser.setApplicationDescription( | ||||
873 | QStringLiteral("\nUCD files to characters widths converter.\n") | ||||
874 | ); | ||||
875 | parser.addHelpOption(); | ||||
876 | parser.addOptions({ | ||||
877 | {{QStringLiteral("U"), QStringLiteral("unicode-data")}, | ||||
878 | QStringLiteral("Path or URL to UnicodeData.txt."), | ||||
879 | QStringLiteral("URL|file")}, | ||||
880 | {{QStringLiteral("A"), QStringLiteral("east-asian-width")}, | ||||
881 | QStringLiteral("Path or URL to EastAsianWidth.txt."), | ||||
882 | QStringLiteral("URL|file")}, | ||||
883 | {{QStringLiteral("E"), QStringLiteral("emoji-data")}, | ||||
884 | QStringLiteral("Path or URL to emoji-data.txt."), | ||||
885 | QStringLiteral("URL|file")}, | ||||
886 | {{QStringLiteral("W"), QStringLiteral("generic-width")}, | ||||
887 | QStringLiteral("Path or URL to generic file with width data. Accepts output from compact-ranges, ranges, list and details generator."), | ||||
888 | QStringLiteral("URL|file")}, | ||||
889 | | ||||
890 | {QStringLiteral("ambiguous-width"), | ||||
891 | QStringLiteral("Ambiguous characters width."), | ||||
892 | QStringLiteral("separate|1|2"), QString(QStringLiteral("%1")).arg(CharacterWidth::Ambiguous)}, | ||||
893 | {QStringLiteral("emoji"), | ||||
894 | QStringLiteral("Which emoji emoji subset is treated as emoji."), | ||||
895 | QStringLiteral("all|presentation"), QStringLiteral("presentation")}, | ||||
896 | | ||||
897 | {{QStringLiteral("g"), QStringLiteral("generator")}, | ||||
898 | QStringLiteral("Output generator (use \"-\" to list available generators). The code generator requires path to a template file."), | ||||
899 | QStringLiteral("generator[:template]"), QStringLiteral("details")}, | ||||
900 | }); | ||||
901 | parser.addPositionalArgument(QStringLiteral("output"), QStringLiteral("Output file (leave empty for stdout).")); | ||||
902 | parser.process(app); | ||||
903 | | ||||
904 | const QStringList unicodeDataFiles = parser.values(QStringLiteral("unicode-data")); | ||||
905 | const QStringList eastAsianWidthFiles = parser.values(QStringLiteral("east-asian-width")); | ||||
906 | const QStringList emojiDataFiles = parser.values(QStringLiteral("emoji-data")); | ||||
907 | const QStringList genericWidthFiles = parser.values(QStringLiteral("generic-width")); | ||||
908 | const QString ambiguousWidthStr = parser.value(QStringLiteral("ambiguous-width")); | ||||
909 | const QString emojiStr = parser.value(QStringLiteral("emoji")); | ||||
910 | const QString generator = parser.value(QStringLiteral("generator")); | ||||
911 | const QString outputFileName = parser.positionalArguments().value(0); | ||||
912 | | ||||
913 | QTextStream eout(stderr, QIODevice::WriteOnly); | ||||
914 | if(unicodeDataFiles.isEmpty() && eastAsianWidthFiles.isEmpty() && emojiDataFiles.isEmpty() && genericWidthFiles.isEmpty()) { | ||||
915 | eout << QStringLiteral("Input files not specified.") << endl << endl; | ||||
916 | parser.showHelp(1); | ||||
917 | } | ||||
918 | | ||||
919 | static QMap<ConvertOptions, int> convertOpts = { | ||||
920 | {AmbiguousWidthOpt, CharacterWidth::Ambiguous}, | ||||
921 | {EmojiOpt, EmojiProperty::EmojiPresentation}, | ||||
922 | }; | ||||
923 | | ||||
924 | if(emojiStr == QStringLiteral("presentation")) | ||||
925 | convertOpts[EmojiOpt] = EmojiProperty::EmojiPresentation; | ||||
926 | else if(emojiStr == QStringLiteral("all")) | ||||
927 | convertOpts[EmojiOpt] = EmojiProperty::Emoji; | ||||
928 | else { | ||||
929 | convertOpts[EmojiOpt] = EmojiProperty::EmojiPresentation; | ||||
930 | qWarning() << QStringLiteral("invalid emoji option value: %1. Assuming \"presentation\".").arg(emojiStr); | ||||
931 | } | ||||
932 | | ||||
933 | if(ambiguousWidthStr == QStringLiteral("separate")) | ||||
934 | convertOpts[AmbiguousWidthOpt] = CharacterWidth::Ambiguous; | ||||
935 | else if(ambiguousWidthStr == QStringLiteral("1")) | ||||
936 | convertOpts[AmbiguousWidthOpt] = 1; | ||||
937 | else if(ambiguousWidthStr == QStringLiteral("2")) | ||||
938 | convertOpts[AmbiguousWidthOpt] = 2; | ||||
939 | else { | ||||
940 | convertOpts[AmbiguousWidthOpt] = CharacterWidth::Ambiguous; | ||||
941 | qWarning() << QStringLiteral("Invalid ambiguous-width option value: %1. Assuming \"separate\".").arg(emojiStr); | ||||
942 | } | ||||
943 | | ||||
944 | const int sepPos = generator.indexOf(QLatin1Char(':')); | ||||
945 | const auto generatorName = generator.left(sepPos); | ||||
946 | const auto generatorParam = sepPos >= 0 ? generator.mid(sepPos + 1) : QString(); | ||||
947 | | ||||
948 | if(!GENERATOR_FUNCS_MAP.contains(generatorName)) { | ||||
949 | int status = 0; | ||||
950 | if(generatorName != QStringLiteral("-")) { | ||||
951 | status = 1; | ||||
952 | eout << QStringLiteral("Invalid output generator. Available generators:") << endl; | ||||
953 | } | ||||
954 | | ||||
955 | for(auto it = GENERATOR_FUNCS_MAP.constBegin(); it != GENERATOR_FUNCS_MAP.constEnd(); ++it) { | ||||
956 | eout << it.key() << endl; | ||||
957 | } | ||||
958 | exit(status); | ||||
959 | } | ||||
960 | auto generatorFunc = GENERATOR_FUNCS_MAP[generatorName]; | ||||
961 | | ||||
962 | QFile outFile; | ||||
963 | if(!outputFileName.isEmpty()) { | ||||
964 | outFile.setFileName(outputFileName); | ||||
965 | if(!outFile.open(QIODevice::WriteOnly)) { | ||||
966 | eout << QStringLiteral("Could not open file ") << outputFileName << QStringLiteral(": ") << outFile.errorString() << endl; | ||||
967 | exit(1); | ||||
968 | } | ||||
969 | } else { | ||||
970 | outFile.open(stdout, QIODevice::WriteOnly); | ||||
971 | } | ||||
972 | QTextStream out(&outFile); | ||||
973 | | ||||
974 | QVector<CharacterProperties> props(CODE_POINTS_NUM); | ||||
975 | | ||||
976 | processInputFiles<UnicodeDataEntry>( | ||||
977 | props, unicodeDataFiles, QStringLiteral("UnicodeData.txt"), | ||||
978 | [](CharacterProperties &prop, const UnicodeDataEntry &entry) { prop.category = entry.category(); }); | ||||
979 | | ||||
980 | processInputFiles<EastAsianWidthEntry>( | ||||
981 | props, eastAsianWidthFiles, QStringLiteral("EastAsianWidth.txt"), | ||||
982 | [](CharacterProperties &prop, const EastAsianWidthEntry &entry) { prop.eastAsianWidth = entry.eastAsianWidth(); }); | ||||
983 | | ||||
984 | processInputFiles<EmojiDataEntry>( | ||||
985 | props, emojiDataFiles, QStringLiteral("emoji-data.txt"), | ||||
986 | [](CharacterProperties &prop, const EmojiDataEntry &entry) { prop.emoji |= entry.emoji(); }); | ||||
987 | | ||||
988 | processInputFiles<GenericWidthEntry>( | ||||
989 | props, genericWidthFiles, QStringLiteral("generic width data"), | ||||
990 | [](CharacterProperties &prop, const GenericWidthEntry &entry) { prop.customWidth = entry.width(); }); | ||||
991 | | ||||
992 | qInfo() << "Generating character width data"; | ||||
993 | QVector<CharacterWidth> widths(CODE_POINTS_NUM); | ||||
994 | widths[0] = 0; // NULL character always has width 0 | ||||
995 | for(uint cp = 1; cp <= LAST_CODE_POINT; ++cp) { | ||||
996 | widths[cp] = widthFromProps(props[cp], cp, convertOpts); | ||||
997 | } | ||||
998 | | ||||
999 | const QMap<QString, QString> generatorArgs = { | ||||
1000 | {QStringLiteral("cmdline"), escapeCmdline(app.arguments())}, | ||||
1001 | {QStringLiteral("param"), generatorParam}, | ||||
1002 | {QStringLiteral("output"), outputFileName.isEmpty() ? QStringLiteral("<stdout>") : outputFileName}, | ||||
1003 | }; | ||||
1004 | | ||||
1005 | qInfo() << "Generating output"; | ||||
1006 | if(!generatorFunc(out, props, widths, generatorArgs)) { | ||||
1007 | parser.showHelp(1); | ||||
1008 | } | ||||
1009 | | ||||
1010 | return 0; | ||||
1011 | } |