diff --git a/src/kcharselect-data b/src/kcharselect-data
index 45aa3ce..e8ca45f 100644
Binary files a/src/kcharselect-data and b/src/kcharselect-data differ
diff --git a/src/kcharselect-generate-datafile.py b/src/kcharselect-generate-datafile.py
index e3f0fc4..54e3751 100755
--- a/src/kcharselect-generate-datafile.py
+++ b/src/kcharselect-generate-datafile.py
@@ -1,880 +1,882 @@
 #!/usr/bin/python3
 # -*- coding: utf-8 -*-
 #
 # This script generates a data file containing all Unicode information needed
 # by KCharSelect.
 #
 ##############################################################################
 # Copyright (C) 2007 Daniel Laidig <d.laidig@gmx.de>
 # Copyright (C) 2016 John Zaitseff <J.Zaitseff@zap.org.au>
 #
 # This script is free software; you can redistribute it and/or modify it under
 # the terms of the GNU Library General Public License as published by the Free
 # Software Foundation; either version 2 of the License, or (at your option)
 # any later version.
 #
 # This script is distributed in the hope that it will be useful, but WITHOUT
 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
 # License for more details.
 #
 # You should have received a copy of the GNU Library General Public License
 # along with this library; see the file COPYING.LIB.  If not, write to the
 # Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 # 02110-1301, USA.
 ##############################################################################
 #
 # The current directory must contain the following files that can be found at
 # http://www.unicode.org/Public/UNIDATA/:
 # - UnicodeData.txt
 # - Unihan_Readings.txt (you need to uncompress it from Unihan.zip)
 # - NamesList.txt
 # - Blocks.txt
 #
 # The generated file is named "kcharselect-data" and has to be put in
 # kwidgetsaddons/src.  Additionally a translation dummy named
 # "kcharselect-translation.cpp" is generated and has to be placed in the same
 # directory.
 #
 # FILE STRUCTURE
 #
 # The generated file is a binary file. The first 40 bytes are the header and
 # contain the position of each part of the file. Each entry is uint32.
 #
 # pos   content
 # 0     names strings begin
 # 4     names offsets begin
 # 8     details strings begin
 # 12    details offsets begin
 # 16    block strings begin
 # 20    block offsets begin
 # 24    section strings begin
 # 28    section offsets begin
 # 32    unihan strings begin
 # 36    unihan offsets begin
 #
 # The string parts always contain all strings in a row, followed by a 0x00
 # byte.  There is one exception: The data for seeAlso in details is only 2
 # bytes (as is always is _one_ unicode character) and _not_ followed by a 0x00
 # byte.
 #
 # The offset parts contain entries with a fixed length.  Unicode characters
 # are always uint16 and offsets uint32.  Offsets are positions in the data
 # file.
 #
 # names_offsets:
 # each entry 6 bytes
 # 16bit: unicode
 # 32bit: offset to name in names_strings
 #
 # names_strings:
 # the first byte is the category (same values as QChar::Category),
 # directly followed by the character name (terminated by 0x00)
 #
 # nameslist_offsets:
 # char, alias, alias_count, note, note_count, approxEquiv, approxEquiv_coutn, equiv, equiv_count, seeAlso, seeAlso_count
 # 16    32     8            32    8           32           8                  32     8            32       8
 # => each entry 27 bytes
 #
 # blocks_offsets:
 # each entry 4 bytes
 # 16bit: start unicode
 # 16bit: end unicode
 # Note that there is no string offset.
 #
 # section_offsets:
 # each entry 4 bytes
 # 16bit: section offset
 # 16bit: block offset
 # Note that these offsets are _not_ positions in the data file but indexes.
 # For example 0x0403 means the fourth section includes the third block.
 #
 # unihan_offsets:
 # each entry 30 bytes
 # 16bit: unicode
 # 32bit: offset to unihan_strings for Definition
 # 32bit: offset to unihan_strings for Cantonese
 # 32bit: offset to unihan_strings for Mandarin
 # 32bit: offset to unihan_strings for Tang
 # 32bit: offset to unihan_strings for Korean
 # 32bit: offset to unihan_strings for JapaneseKun
 # 32bit: offset to unihan_strings for JapaneseOn
 
 from struct import *
 import sys
 import re
 import io
 
 # Based on http://www.unicode.org/charts/, updated for Unicode 9.0
 sectiondata = '''
 SECTION European Scripts
 Basic Latin
 Latin-1 Supplement
 Latin Extended-A
 Latin Extended-B
 Latin Extended-C
 Latin Extended-D
 Latin Extended-E
 Latin Extended Additional
 Armenian
 Coptic
 Cyrillic
 Cyrillic Supplement
 Cyrillic Extended-A
 Cyrillic Extended-B
 Cyrillic Extended-C
 Georgian
 Georgian Supplement
+Georgian Extended
 Glagolitic
 Greek and Coptic
 Greek Extended
 Ogham
 Runic
 
 SECTION African Scripts
 Bamum
 Ethiopic
 Ethiopic Supplement
 Ethiopic Extended
 Ethiopic Extended-A
 NKo
 Tifinagh
 Vai
 
 SECTION Middle Eastern Scripts
 Arabic
 Arabic Supplement
 Arabic Extended-A
 Arabic Presentation Forms-A
 Arabic Presentation Forms-B
 Hebrew
 Mandaic
 Samaritan
 Syriac
 Syriac Supplement
 
 SECTION Central Asian Scripts
 Mongolian
 Phags-pa
 Tibetan
 
 SECTION South Asian Scripts
 Bengali
 Common Indic Number Forms
 Devanagari
 Devanagari Extended
 Gujarati
 Gurmukhi
 Kannada
 Lepcha
 Limbu
 Malayalam
 Meetei Mayek
 Meetei Mayek Extensions
 Ol Chiki
 Oriya
 Saurashtra
 Sinhala
 Syloti Nagri
 Tamil
 Telugu
 Thaana
 Vedic Extensions
 
 SECTION Southeast Asian Scripts
 Cham
 Kayah Li
 Khmer
 Khmer Symbols
 Lao
 Myanmar
 Myanmar Extended-A
 Myanmar Extended-B
 New Tai Lue
 Tai Le
 Tai Tham
 Tai Viet
 Thai
 
 SECTION Indonesia and Oceania Scripts
 Balinese
 Batak
 Buginese
 Buhid
 Hanunoo
 Javanese
 Rejang
 Sundanese
 Sundanese Supplement
 Tagalog
 Tagbanwa
 
 SECTION East Asian Scripts
 Bopomofo
 Bopomofo Extended
 CJK Unified Ideographs
 CJK Unified Ideographs Extension A
 CJK Compatibility
 CJK Compatibility Ideographs
 CJK Compatibility Forms
 CJK Radicals Supplement
 CJK Strokes
 CJK Symbols and Punctuation
 Enclosed CJK Letters and Months
 Hangul Jamo
 Hangul Jamo Extended-A
 Hangul Jamo Extended-B
 Hangul Compatibility Jamo
 Hangul Syllables
 Hiragana
 Ideographic Description Characters
 Kanbun
 Kangxi Radicals
 Katakana
 Katakana Phonetic Extensions
 Lisu
 Yi Radicals
 Yi Syllables
 
 SECTION American Scripts
 Cherokee
 Cherokee Supplement
 Unified Canadian Aboriginal Syllabics
 Unified Canadian Aboriginal Syllabics Extended
 
 SECTION Symbols
 General Punctuation
 Alchemical Symbols
 Braille Patterns
+Chess Symbols
 Control Pictures
 Currency Symbols
 Dingbats
 Domino Tiles
 Emoticons
 Enclosed Alphanumerics
 Enclosed Alphanumeric Supplement
 Enclosed Ideographic Supplement
 Mahjong Tiles
 Miscellaneous Symbols
 Miscellaneous Symbols and Pictographs
 Miscellaneous Technical
 Optical Character Recognition
 Ornamental Dingbats
 Playing Cards
 Small Form Variants
 Supplemental Punctuation
 Supplemental Symbols and Pictographs
 Transport and Map Symbols
 Vertical Forms
 Yijing Hexagram Symbols
 
 SECTION Mathematical Symbols
 Arrows
 Block Elements
 Box Drawing
 Geometric Shapes
 Geometric Shapes Extended
 Letterlike Symbols
 Mathematical Operators
 Miscellaneous Mathematical Symbols-A
 Miscellaneous Mathematical Symbols-B
 Miscellaneous Symbols and Arrows
 Number Forms
 Superscripts and Subscripts
 Supplemental Arrows-A
 Supplemental Arrows-B
 Supplemental Arrows-C
 Supplemental Mathematical Operators
 
 SECTION Phonetic Symbols
 IPA Extensions
 Modifier Tone Letters
 Phonetic Extensions
 Phonetic Extensions Supplement
 Spacing Modifier Letters
 
 SECTION Combining Diacritics
 Combining Diacritical Marks
 Combining Diacritical Marks Extended
 Combining Diacritical Marks Supplement
 Combining Diacritical Marks for Symbols
 Combining Half Marks
 
 SECTION Other
 Alphabetic Presentation Forms
 Halfwidth and Fullwidth Forms
 High Private Use Surrogates
 High Surrogates
 Low Surrogates
 Private Use Area
 Specials
 Variation Selectors
 '''
 
 categoryMap = { # same values as QChar::Category
     "Mn": 1,
     "Mc": 2,
     "Me": 3,
     "Nd": 4,
     "Nl": 5,
     "No": 6,
     "Zs": 7,
     "Zl": 8,
     "Zp": 9,
     "Cc": 10,
     "Cf": 11,
     "Cs": 12,
     "Co": 13,
     "Cn": 14,
     "Lu":  15,
     "Ll":  16,
     "Lt":  17,
     "Lm":  18,
     "Lo":  19,
     "Pc":  20,
     "Pd":  21,
     "Ps":  22,
     "Pe":  23,
     "Pi":  24,
     "Pf":  25,
     "Po":  26,
     "Sm":  27,
     "Sc":  28,
     "Sk":  29,
     "So":  30
 }
 
 
 # Temporary code point remapping
 #
 # Initial SMP support without needing a new data file format
 # - BMP U+Fxxx are remapped to U+Exxx
 # - SMP symbols U+1Fxxx are remapped to U+Fxxx
 # - Private Use Area is limited to U+F000 ... U+F8FF
 
 def remap(char):
     cp = int(char, 16)
     if cp >= 0xE000 and cp <= 0xFFFF:
         return "E"+char[1:]
     if cp >= 0x1F000 and cp <= 0x1FFFF:
         return char[1:]
     return char
 
 class Names:
     def __init__(self):
         self.names = []
         self.controlpos = -1
     def addName(self, uni, name, category):
         self.names.append([uni, name, category])
 
     def calculateStringSize(self):
         size = 0
         hadcontrol = False
         for entry in self.names:
             if entry[1] == "<control>":
                 if not hadcontrol:
                     size += len(entry[1]) + 2
                     hadcontrol = True
             else:
                 size += len(entry[1]) + 2
         return size
 
     def calculateOffsetSize(self):
         return len(self.names)*6
 
     def writeStrings(self, out, pos):
         hadcontrol = False
         for entry in self.names:
             if entry[1] == "<control>":
                 if not hadcontrol:
                     out.write(pack("=b", entry[2]))
                     out.write(entry[1].encode("utf-8") + b"\0")
                     size = len(entry[1]) + 2
                     entry[1] = pos
                     self.controlpos = pos
                     pos += size
                     hadcontrol = True
                 else:
                     entry[1] = self.controlpos
             else:
                 out.write(pack("=b", entry[2]))
                 out.write(entry[1].encode("utf-8") + b"\0")
                 size = len(entry[1]) + 2
                 entry[1] = pos
                 pos += size
         return pos
 
     def writeOffsets(self, out, pos):
         for entry in self.names:
             out.write(pack("=HI", int(entry[0], 16), entry[1]))
             pos += 6
         return pos
 
 class Details:
     def __init__(self):
         self.details = {}
     def addEntry(self, char, category, text):
         if not char in self.details:
             self.details[char] = {}
         if not category in self.details[char]:
             self.details[char][category] = []
         self.details[char][category].append(text)
 
     def calculateStringSize(self):
         size = 0
         for char in self.details.values():
             for cat in char.values():
                 for s in cat:
                     if type(s) is str:
                         size += len(s.encode("utf-8")) + 1
                     else:
                         size += 2
         return size
 
     def calculateOffsetSize(self):
         return len(self.details)*27
 
     def writeStrings(self, out, pos):
         for char in self.details.values():
             for cat in char.values():
                 for i in range(0, len(cat)):
                     s = cat[i]
                     if type(s) is str:
                         out.write(s.encode("utf-8") + b"\0")
                         size = len(s.encode("utf-8")) + 1
                     else:
                         out.write(pack("=H", s))
                         size = 2
                     cat[i] = pos
                     pos += size
         return pos
 
     def writeOffsets(self, out, pos):
         for char in self.details.keys():
             alias = 0
             alias_count = 0
             note = 0
             note_count = 0
             approxEquiv = 0
             approxEquiv_count = 0
             equiv = 0
             equiv_count = 0
             seeAlso = 0
             seeAlso_count = 0
             if "alias" in self.details[char]:
                 alias = self.details[char]["alias"][0]
                 alias_count = len(self.details[char]["alias"])
 
             if "note" in self.details[char]:
                 note = self.details[char]["note"][0]
                 note_count = len(self.details[char]["note"])
 
             if "approxEquiv" in self.details[char]:
                 approxEquiv = self.details[char]["approxEquiv"][0]
                 approxEquiv_count = len(self.details[char]["approxEquiv"])
 
             if "equiv" in self.details[char]:
                 equiv = self.details[char]["equiv"][0]
                 equiv_count = len(self.details[char]["equiv"])
 
             if "seeAlso" in self.details[char]:
                 seeAlso = self.details[char]["seeAlso"][0]
                 seeAlso_count = len(self.details[char]["seeAlso"])
 
             out.write(pack("=HIbIbIbIbIb", char, alias, alias_count, note, note_count, approxEquiv, approxEquiv_count, equiv, equiv_count, seeAlso, seeAlso_count))
             pos += 27
 
         return pos
 
 class SectionsBlocks:
     def __init__(self):
         self.sections = []
         self.blocks = []
         self.blockList = []
         self.sectionList = []
 
     def addBlock(self, begin, end, name):
         self.blocks.append([begin, end, name])
         self.blockList.append(name)
 
     def addSection(self, section, block):
         self.sections.append([section, block])
         if not section in self.sectionList:
             self.sectionList.append(section)
 
     def calculateBlockStringSize(self):
         size = 0
         for block in self.blocks:
             size += len(block[2]) + 1
         return size
 
     def calculateBlockOffsetSize(self):
         return len(self.blocks) * 4
 
     def calculateSectionStringSize(self):
         size = 0
         lastsection = ""
         for section in self.sections:
             if section[0] != lastsection:
                 size += len(section[0]) + 1
                 lastsection = section[0]
         return size
 
     def calculateSectionOffsetSize(self):
         return len(self.sections) * 4
 
     def writeBlockStrings(self, out, pos):
         index = 0
         for block in self.blocks:
             out.write(block[2].encode("utf-8") + b"\0")
             size = len(block[2].encode("utf-8")) + 1
             found = False
             for section in self.sections:
                 if section[1] == block[2]:
                     print("found", section)
                     section[1] = index
                     found = True
             if not found:
                 print("Error: Did not find any category for block \""+block[2]+"\"")
                 sys.exit(1)
             block[2] = index
             pos += size
             index += 1
         return pos
 
     def writeBlockOffsets(self, out, pos):
         for block in self.blocks:
             out.write(pack("=HH", int(block[0], 16), int(block[1], 16)))
             pos += 4
         return pos
 
     def writeSectionStrings(self, out, pos):
         lastsection = ""
         lastpos = 0
         index = -1
         for section in self.sections:
             if section[0] != lastsection:
                 index += 1
                 lastsection = section[0]
                 out.write(section[0].encode("utf-8") + b"\0")
                 size = len(section[0].encode("utf-8")) + 1
                 section[0] = index
                 lastpos = pos
                 pos += size
             else:
                 section[0] = index
         return pos
 
     def writeSectionOffsets(self, out, pos):
         for section in self.sections:
             out.write(pack("=HH", section[0], section[1]))
             pos += 4
         return pos
 
     def getBlockList(self):
         return self.blockList
 
     def getSectionList(self):
         return self.sectionList
 
 class Unihan:
     def __init__(self):
         self.unihan = {}
 
     def addUnihan(self, uni, category, value):
         uni = int(uni, 16)
         if category != "kDefinition" and category != "kCantonese" and category != "kMandarin" and category != "kTang" and category != "kKorean" and category != "kJapaneseKun" and category != "kJapaneseOn":
             return
         if not uni in self.unihan:
             self.unihan[uni] = [None, None, None, None, None, None, None]
         if category == "kDefinition":
             self.unihan[uni][0] = value
         elif category == "kCantonese":
             self.unihan[uni][1] = value
         elif category == "kMandarin":
             self.unihan[uni][2] = value
         elif category == "kTang":
             self.unihan[uni][3] = value
         elif category == "kKorean":
             self.unihan[uni][4] = value
         elif category == "kJapaneseKun":
             self.unihan[uni][5] = value
         elif category == "kJapaneseOn":
             self.unihan[uni][6] = value
 
     def calculateStringSize(self):
         size = 0
         for char in self.unihan.keys():
             for entry in self.unihan[char]:
                 if entry != None:
                     size += len(entry.encode("utf-8")) + 1
         return size
 
     def calculateOffsetSize(self):
         return len(self.unihan) * 30
 
     def writeStrings(self, out, pos):
         for char in self.unihan.keys():
             for i in range(0, 7):
                 if self.unihan[char][i] != None:
                     out.write(self.unihan[char][i].encode("utf-8") + b"\0")
                     size = len(self.unihan[char][i].encode("utf-8")) + 1
                     self.unihan[char][i] = pos
                     pos += size
         return pos
 
     def writeOffsets(self, out, pos):
         for char in self.unihan.keys():
             out.write(pack("=H", char))
             for i in range(0, 7):
                 if self.unihan[char][i] != None:
                     out.write(pack("=I", self.unihan[char][i]))
                 else:
                     out.write(pack("=I", 0))
             pos += 30
         return pos
 
 class Parser:
     def parseUnicodeData(self, inUnicodeData, names):
         regexp = re.compile(r'^([^;]+);([^;]+);([^;]+)')
         for line in inUnicodeData:
             line = line[:-1]
             m = regexp.match(line)
             if not m:
                 continue
             uni = remap(m.group(1))
             name = m.group(2)
             category = m.group(3)
             if len(uni) > 4:
                 continue
             names.addName(uni, name, categoryMap[category])
 
     def parseDetails(self, inNamesList, details):
         invalidRegexp = re.compile(r'^@')
         unicodeRegexp = re.compile(r'^([0-9A-F]+)')
 
         aliasRegexp = re.compile(r'^\s+=\s+(.+)$') #equal
         seeAlsoRegexp1 = re.compile(r'^\s+x\s+.*\s([0-9A-F]{4,6})\)$') #ex
         seeAlsoRegexp2 = re.compile(r'^\s+x\s+([0-9A-F]{4,6})$') #ex
         noteRegexp = re.compile(r'^\s+\*\s+(.+)$') #star
         approxEquivalentRegexp = re.compile(r'^\s+#\s+(.+)$') #pound
         equivalentRegexp = re.compile(r'^\s+:\s+(.+)$') #colon
 
         drop = 0
         currChar = 0
 
         for line in inNamesList:
             line = line[:-1]
             m1 = unicodeRegexp.match(line)
             m2 = aliasRegexp.match(line)
             m3 = noteRegexp.match(line)
             m4 = approxEquivalentRegexp.match(line)
             m5 = equivalentRegexp.match(line)
             m6 = seeAlsoRegexp1.match(line)
             m7 = seeAlsoRegexp2.match(line)
             if invalidRegexp.match(line):
                 continue
             elif m1:
                 mg1 = remap(m1.group(1))
                 currChar = int(mg1, 16)
                 if len(mg1) > 4:
                     drop = 1
                     continue
             elif drop == 1:
                 continue
             elif m2:
                 value = m2.group(1)
                 details.addEntry(currChar, "alias", value)
             elif m3:
                 value = m3.group(1)
                 details.addEntry(currChar, "note", value)
             elif m4:
                 value = m4.group(1)
                 details.addEntry(currChar, "approxEquiv", value)
             elif m5:
                 value = m5.group(1)
                 details.addEntry(currChar, "equiv", value)
             elif m6:
                 value = int(remap(m6.group(1)), 16)
                 if value < 0x10000:
                     details.addEntry(currChar, "seeAlso", value)
             elif m7:
                 value = int(remap(m7.group(1)), 16)
                 if value < 0x10000:
                     details.addEntry(currChar, "seeAlso", value)
     def parseBlocks(self, inBlocks, sectionsBlocks):
         regexp = re.compile(r'^([0-9A-F]+)\.\.([0-9A-F]+); (.+)$')
         for line in inBlocks:
             line = line[:-1]
             m = regexp.match(line)
             if not m:
                 continue
             m1 = remap(m.group(1))
             m2 = remap(m.group(2))
             if len(m1) > 4:
                 continue
             sectionsBlocks.addBlock(m1, m2, m.group(3))
     def parseSections(self, inSections, sectionsBlocks):
         currSection = ""
         for line in inSections:
             line = line[:-1]
             if len(line) == 0:
                 continue
             temp = line.split(" ")
             if temp[0] == "SECTION":
                 currSection = line[8:]
             elif currSection != "":
                 sectionsBlocks.addSection(currSection, line)
             else:
                 print("error in data file")
                 sys.exit(1)
     def parseUnihan(self, inUnihan, unihan):
         regexp = re.compile(r'^U\+([0-9A-F]+)\s+([^\s]+)\s+(.+)$')
         count = 0
         for line in inUnihan:
             if count % 100000 == 0:
                 print("\b."); sys.stdout.flush()
             count += 1
             line = line[:-1]
             m = regexp.match(line)
             if not m:
                 continue
             if len(remap(m.group(1))) <= 4:
                 unihan.addUnihan(remap(m.group(1)), m.group(2), m.group(3))
 
 def writeTranslationDummy(out, data):
     out.write(b"""/* This file is part of the KDE libraries
 
    Copyright (C) 2007 Daniel Laidig <d.laidig@gmx.de>
    Copyright (C) 2016 John Zaitseff <J.Zaitseff@zap.org.au>
 
    This library is free software; you can redistribute it and/or modify it
    under the terms of the GNU Library General Public License as published by
    the Free Software Foundation; either version 2 of the License, or (at your
    option) any later version.
 
    This library is distributed in the hope that it will be useful, but WITHOUT
    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
    License for more details.
 
    You should have received a copy of the GNU Library General Public License
    along with this library; see the file COPYING.LIB.  If not, write to the
    Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
    MA 02110-1301, USA.
 
    This file is autogenerated by kcharselect/kcharselect-generate-datafile.py
 */\n\n""")
     for group in data:
         for entry in group[1]:
             out.write(b"QT_TRANSLATE_NOOP3(\"KCharSelectData\", \""+entry.encode("utf-8")+b"\", \""+group[0].encode("utf-8")+b"\");\n")
 
 out = open("kcharselect-data", "wb")
 outTranslationDummy = open("kcharselect-translation.cpp", "wb")
 
 inUnicodeData = open("UnicodeData.txt", "r")
 inNamesList = open("NamesList.txt", "r")
 inBlocks = open("Blocks.txt", "r")
 inSections = io.StringIO(sectiondata)
 inUnihan = open("Unihan_Readings.txt", "r")
 
 if calcsize('=H') != 2 or calcsize('=I') != 4:
     print("Error: Sizes of ushort and uint are not 16 and 32 bit as expected")
     sys.exit(1)
 
 names = Names()
 details = Details()
 sectionsBlocks = SectionsBlocks()
 unihan = Unihan()
 
 parser = Parser()
 
 print("========== parsing files ===================")
 parser.parseUnicodeData(inUnicodeData, names)
 print("."); sys.stdout.flush()
 parser.parseDetails(inNamesList, details)
 print("\b."); sys.stdout.flush()
 parser.parseBlocks(inBlocks, sectionsBlocks)
 print("\b."); sys.stdout.flush()
 parser.parseSections(inSections, sectionsBlocks)
 print("\b."); sys.stdout.flush()
 parser.parseUnihan(inUnihan, unihan)
 print("\b."); sys.stdout.flush()
 
 print("done.")
 
 pos = 0
 
 #write header, size: 40 bytes
 print("========== writing header ==================")
 out.write(pack("=I", 40))
 print("names strings begin", 40)
 
 namesOffsetBegin = names.calculateStringSize() + 40
 out.write(pack("=I", namesOffsetBegin))
 print("names offsets begin", namesOffsetBegin)
 
 detailsStringBegin = namesOffsetBegin + names.calculateOffsetSize()
 out.write(pack("=I", detailsStringBegin))
 print("details strings begin", detailsStringBegin)
 
 detailsOffsetBegin = detailsStringBegin + details.calculateStringSize()
 out.write(pack("=I", detailsOffsetBegin))
 print("details offsets begin", detailsOffsetBegin)
 
 blocksStringBegin = detailsOffsetBegin + details.calculateOffsetSize()
 out.write(pack("=I", blocksStringBegin))
 print("block strings begin", blocksStringBegin)
 
 blocksOffsetBegin = blocksStringBegin + sectionsBlocks.calculateBlockStringSize()
 out.write(pack("=I", blocksOffsetBegin))
 print("block offsets begin", blocksOffsetBegin)
 
 sectionStringBegin = blocksOffsetBegin + sectionsBlocks.calculateBlockOffsetSize()
 out.write(pack("=I", sectionStringBegin))
 print("section strings begin", sectionStringBegin)
 
 sectionOffsetBegin = sectionStringBegin + sectionsBlocks.calculateSectionStringSize()
 out.write(pack("=I", sectionOffsetBegin))
 print("section offsets begin", sectionOffsetBegin)
 
 unihanStringBegin = sectionOffsetBegin + sectionsBlocks.calculateSectionOffsetSize()
 out.write(pack("=I", unihanStringBegin))
 print("unihan strings begin", unihanStringBegin)
 
 unihanOffsetBegin = unihanStringBegin + unihan.calculateStringSize()
 out.write(pack("=I", unihanOffsetBegin))
 print("unihan offsets begin", unihanOffsetBegin)
 
 end = unihanOffsetBegin + unihan.calculateOffsetSize()
 print("end should be", end)
 
 pos += 40
 
 print("========== writing data ====================")
 
 pos = names.writeStrings(out, pos)
 print("names strings written, position", pos)
 pos = names.writeOffsets(out, pos)
 print("names offsets written, position", pos)
 pos = details.writeStrings(out, pos)
 print("details strings written, position", pos)
 pos = details.writeOffsets(out, pos)
 print("details offsets written, position", pos)
 pos = sectionsBlocks.writeBlockStrings(out, pos)
 print("block strings written, position", pos)
 pos = sectionsBlocks.writeBlockOffsets(out, pos)
 print("block offsets written, position", pos)
 pos = sectionsBlocks.writeSectionStrings(out, pos)
 print("section strings written, position", pos)
 pos = sectionsBlocks.writeSectionOffsets(out, pos)
 print("section offsets written, position", pos)
 pos = unihan.writeStrings(out, pos)
 print("unihan strings written, position", pos)
 pos = unihan.writeOffsets(out, pos)
 print("unihan offsets written, position", pos)
 
 print("========== writing translation dummy  ======")
 translationData = [["KCharSelect section name", sectionsBlocks.getSectionList()], ["KCharselect unicode block name",sectionsBlocks.getBlockList()]]
 writeTranslationDummy(outTranslationDummy, translationData)
 print("done. make sure to copy both kcharselect-data and kcharselect-translation.cpp.")
diff --git a/src/kcharselectdata.cpp b/src/kcharselectdata.cpp
index 57f343b..f54ffc4 100644
--- a/src/kcharselectdata.cpp
+++ b/src/kcharselectdata.cpp
@@ -1,1046 +1,1046 @@
 /* This file is part of the KDE libraries
 
    Copyright (C) 2007 Daniel Laidig <d.laidig@gmx.de>
 
    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.
 
    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.
 
    You should have received a copy of the GNU Library General Public License
    along with this library; see the file COPYING.LIB.  If not, write to
    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
    Boston, MA 02110-1301, USA.
 */
 
 #include "kcharselectdata_p.h"
 
 #include <QCoreApplication>
 #include <QRegularExpression>
 #include <QStringList>
 #include <QFile>
 #include <qendian.h>
 #include <QFutureInterface>
 #include <QRunnable>
 #include <QThreadPool>
 
 #include <string.h>
 #include <qstandardpaths.h>
 
 /* constants for hangul (de)composition, see UAX #15 */
 #define SBase 0xAC00
 #define LBase 0x1100
 #define VBase 0x1161
 #define TBase 0x11A7
 #define LCount 19
 #define VCount 21
 #define TCount 28
 #define NCount (VCount * TCount)
 #define SCount (LCount * NCount)
 
 class RunIndexCreation : public QFutureInterface<Index>, public QRunnable
 {
 public:
     RunIndexCreation(KCharSelectData *data, const QByteArray &dataFile)
         : m_data(data), m_dataFile(dataFile)
     {
     }
 
     QFuture<Index> start()
     {
         setRunnable(this);
         reportStarted();
         QFuture<Index> f = this->future();
         QThreadPool::globalInstance()->start(this);
         return f;
     }
 
     void run() override
     {
         Index index = m_data->createIndex(m_dataFile);
         reportResult(index);
         reportFinished();
     }
 
 private:
     KCharSelectData *m_data;
     QByteArray m_dataFile;
 };
 
 static const char JAMO_L_TABLE[][4] = {
     "G", "GG", "N", "D", "DD", "R", "M", "B", "BB",
     "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H"
 };
 
 static const char JAMO_V_TABLE[][4] = {
     "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O",
     "WA", "WAE", "OE", "YO", "U", "WEO", "WE", "WI",
     "YU", "EU", "YI", "I"
 };
 
 static const char JAMO_T_TABLE[][4] = {
     "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM",
     "LB", "LS", "LT", "LP", "LH", "M", "B", "BS",
     "S", "SS", "NG", "J", "C", "K", "T", "P", "H"
 };
 
 bool KCharSelectData::openDataFile()
 {
     if (!dataFile.isEmpty()) {
         return true;
     } else {
         QFile file(QStandardPaths::locate(QStandardPaths::GenericDataLocation, QStringLiteral("kf5/kcharselect/kcharselect-data")));
         if (!file.open(QIODevice::ReadOnly)) {
             return false;
         }
         dataFile = file.readAll();
         file.close();
         if (dataFile.size() < 40) {
             dataFile.clear();
             return false;
         }
         const uchar *data = reinterpret_cast<const uchar *>(dataFile.constData());
         const quint32 offsetBegin = qFromLittleEndian<quint32>(data + 20);
         const quint32 offsetEnd = qFromLittleEndian<quint32>(data + 24);
         uint blocks = (offsetEnd - offsetBegin) / 4;
         if (blocks <= 167) { // maximum possible number of blocks in BMP
             // no remapping
             remapType = -1;
-        } else if (blocks >= 174 && blocks <= 175) {
+        } else if (blocks >= 174 && blocks <= 177) {
             // remapping introduced in 5.25
             remapType = 0;
         } else {
             // unknown remapping, abort
             dataFile.clear();
             return false;
         }
         futureIndex = (new RunIndexCreation(this, dataFile))->start();
         return true;
     }
 }
 
 // Temporary remapping code points <-> 16 bit database codes
 // See kcharselect-generate-datafile.py for details
 
 quint16 KCharSelectData::mapCodePointToDataBase(uint code) const
 {
     if (remapType == 0) {
         if (code >= 0xE000 && code <= 0xEFFF) {
             return 0xFFFF;
         }
         if (code >= 0xF000 && code <= 0xFFFF) {
             return code - 0x1000;
         }
         if (code >= 0x1F000 && code <= 0x1FFFF) {
             return code - 0x10000;
         }
     }
     if (code >= 0x10000) {
         return 0xFFFF;
     }
     return code;
 }
 
 uint KCharSelectData::mapDataBaseToCodePoint(quint16 code) const
 {
     if (remapType == 0) {
         if (code >= 0xE000 && code <= 0xEFFF) {
             return code + 0x1000;
         }
         if (code >= 0xF000) {
             return code + 0x10000;
         }
     }
     return code;
 }
 
 quint32 KCharSelectData::getDetailIndex(uint c) const
 {
     const uchar *data = reinterpret_cast<const uchar *>(dataFile.constData());
     // Convert from little-endian, so that this code works on PPC too.
     // http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=482286
     const quint32 offsetBegin = qFromLittleEndian<quint32>(data + 12);
     const quint32 offsetEnd = qFromLittleEndian<quint32>(data + 16);
 
     int min = 0;
     int mid;
     int max = ((offsetEnd - offsetBegin) / 27) - 1;
 
     quint16 unicode = mapCodePointToDataBase(c);
     if (unicode == 0xFFFF) {
         return 0;
     }
 
     static quint16 most_recent_searched;
     static quint32 most_recent_result;
 
     if (unicode == most_recent_searched) {
         return most_recent_result;
     }
 
     most_recent_searched = unicode;
 
     while (max >= min) {
         mid = (min + max) / 2;
         const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid * 27);
         if (unicode > midUnicode) {
             min = mid + 1;
         } else if (unicode < midUnicode) {
             max = mid - 1;
         } else {
             most_recent_result = offsetBegin + mid * 27;
 
             return most_recent_result;
         }
     }
 
     most_recent_result = 0;
     return 0;
 }
 
 QString KCharSelectData::formatCode(uint code, int length, const QString &prefix, int base)
 {
     QString s = QString::number(code, base).toUpper();
     while (s.size() < length) {
         s.prepend(QLatin1Char('0'));
     }
     s.prepend(prefix);
     return s;
 }
 
 QVector<uint> KCharSelectData::blockContents(int block)
 {
     if (!openDataFile()) {
         return QVector<uint>();
     }
 
     const uchar *data = reinterpret_cast<const uchar *>(dataFile.constData());
     const quint32 offsetBegin = qFromLittleEndian<quint32>(data + 20);
     const quint32 offsetEnd = qFromLittleEndian<quint32>(data + 24);
 
     int max = ((offsetEnd - offsetBegin) / 4) - 1;
 
     QVector<uint> res;
 
     if (block > max) {
         return res;
     }
 
     quint16 unicodeBegin = qFromLittleEndian<quint16>(data + offsetBegin + block * 4);
     quint16 unicodeEnd = qFromLittleEndian<quint16>(data + offsetBegin + block * 4 + 2);
 
     while (unicodeBegin < unicodeEnd) {
         res.append(mapDataBaseToCodePoint(unicodeBegin));
         unicodeBegin++;
     }
     res.append(mapDataBaseToCodePoint(unicodeBegin)); // Be carefull when unicodeEnd==0xffff
 
     return res;
 }
 
 QVector<int> KCharSelectData::sectionContents(int section)
 {
     if (!openDataFile()) {
         return QVector<int>();
     }
 
     const uchar *data = reinterpret_cast<const uchar *>(dataFile.constData());
     const quint32 offsetBegin = qFromLittleEndian<quint32>(data + 28);
     const quint32 offsetEnd = qFromLittleEndian<quint32>(data + 32);
 
     int max = ((offsetEnd - offsetBegin) / 4) - 1;
 
     QVector<int> res;
 
     if (section > max) {
         return res;
     }
 
     for (int i = 0; i <= max; i++) {
         const quint16 currSection = qFromLittleEndian<quint16>(data + offsetBegin + i * 4);
         if (currSection == section) {
             res.append(qFromLittleEndian<quint16>(data + offsetBegin + i * 4 + 2));
         }
     }
 
     return res;
 }
 
 QStringList KCharSelectData::sectionList()
 {
     if (!openDataFile()) {
         return QStringList();
     }
 
     const uchar *udata = reinterpret_cast<const uchar *>(dataFile.constData());
     const quint32 stringBegin = qFromLittleEndian<quint32>(udata + 24);
     const quint32 stringEnd = qFromLittleEndian<quint32>(udata + 28);
 
     const char *data = dataFile.constData();
     QStringList list;
     quint32 i = stringBegin;
     while (i < stringEnd) {
         list.append(QCoreApplication::translate("KCharSelectData", data + i, "KCharSelect section name"));
         i += qstrlen(data + i) + 1;
     }
 
     return list;
 }
 
 QString KCharSelectData::block(uint c)
 {
     return blockName(blockIndex(c));
 }
 
 QString KCharSelectData::section(uint c)
 {
     return sectionName(sectionIndex(blockIndex(c)));
 }
 
 QString KCharSelectData::name(uint c)
 {
     if (!openDataFile()) {
         return QString();
     }
 
     if ((c & 0xFFFE) == 0xFFFE || (c >= 0xFDD0 && c <= 0xFDEF)) {
         return QCoreApplication::translate("KCharSelectData", "<noncharacter>");
     } else if ((c >= 0x3400 && c <= 0x4DBF)
             || (c >= 0x4E00 && c <= 0x9FFF)
             || (c >= 0x20000 && c <= 0x2F7FF)) {
         return QStringLiteral("CJK UNIFIED IDEOGRAPH-") + formatCode(c, 4, QString());
     } else if (c >= 0xAC00 && c <= 0xD7AF) {
         /* compute hangul syllable name as per UAX #15 */
         int SIndex = c - SBase;
         int LIndex, VIndex, TIndex;
 
         if (SIndex < 0 || SIndex >= SCount) {
             return QString();
         }
 
         LIndex = SIndex / NCount;
         VIndex = (SIndex % NCount) / TCount;
         TIndex = SIndex % TCount;
 
         return QLatin1String("HANGUL SYLLABLE ") + QLatin1String(JAMO_L_TABLE[LIndex])
                + QLatin1String(JAMO_V_TABLE[VIndex]) + QLatin1String(JAMO_T_TABLE[TIndex]);
     } else if (c >= 0xD800 && c <= 0xDB7F) {
         return QCoreApplication::translate("KCharSelectData", "<Non Private Use High Surrogate>");
     } else if (c >= 0xDB80 && c <= 0xDBFF) {
         return QCoreApplication::translate("KCharSelectData", "<Private Use High Surrogate>");
     } else if (c >= 0xDC00 && c <= 0xDFFF) {
         return QCoreApplication::translate("KCharSelectData", "<Low Surrogate>");
     } else if ((c >= 0xE000 && c <= 0xF8FF) || c >= 0xF0000) {
         return QCoreApplication::translate("KCharSelectData", "<Private Use>");
     } else if ((c >= 0xF900 && c <= 0xFAFF) || (c >= 0x2F800 && c <= 0x2FFFF)) {
         return QStringLiteral("CJK COMPATIBILITY IDEOGRAPH-") + formatCode(c, 4, QString());
     }
     quint16 unicode = mapCodePointToDataBase(c);
     if (unicode == 0xFFFF) {
         return QStringLiteral("NON-BMP-CHARACTER-") + formatCode(c, 4, QString());
     } else {
         const uchar *data = reinterpret_cast<const uchar *>(dataFile.constData());
         const quint32 offsetBegin = qFromLittleEndian<quint32>(data + 4);
         const quint32 offsetEnd = qFromLittleEndian<quint32>(data + 8);
 
         int min = 0;
         int mid;
         int max = ((offsetEnd - offsetBegin) / 6) - 1;
         QString s;
 
         while (max >= min) {
             mid = (min + max) / 2;
             const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid * 6);
             if (unicode > midUnicode) {
                 min = mid + 1;
             } else if (unicode < midUnicode) {
                 max = mid - 1;
             } else {
                 quint32 offset = qFromLittleEndian<quint32>(data + offsetBegin + mid * 6 + 2);
                 s = QString::fromUtf8(dataFile.constData() + offset + 1);
                 break;
             }
         }
 
         if (s.isNull()) {
             return QCoreApplication::translate("KCharSelectData", "<not assigned>");
         } else {
             return s;
         }
     }
 }
 
 int KCharSelectData::blockIndex(uint c)
 {
     if (!openDataFile()) {
         return 0;
     }
 
     const uchar *data = reinterpret_cast<const uchar *>(dataFile.constData());
     const quint32 offsetBegin = qFromLittleEndian<quint32>(data + 20);
     const quint32 offsetEnd = qFromLittleEndian<quint32>(data + 24);
     const quint16 unicode = mapCodePointToDataBase(c);
     if (unicode == 0xFFFF) {
         return 0;
     }
 
     int max = ((offsetEnd - offsetBegin) / 4) - 1;
 
     int i = 0;
 
     while (unicode > qFromLittleEndian<quint16>(data + offsetBegin + i * 4 + 2) && i < max) {
         i++;
     }
 
     return i;
 }
 
 int KCharSelectData::sectionIndex(int block)
 {
     if (!openDataFile()) {
         return 0;
     }
 
     const uchar *data = reinterpret_cast<const uchar *>(dataFile.constData());
     const quint32 offsetBegin = qFromLittleEndian<quint32>(data + 28);
     const quint32 offsetEnd = qFromLittleEndian<quint32>(data + 32);
 
     int max = ((offsetEnd - offsetBegin) / 4) - 1;
 
     for (int i = 0; i <= max; i++) {
         if (qFromLittleEndian<quint16>(data + offsetBegin + i * 4 + 2) == block) {
             return qFromLittleEndian<quint16>(data + offsetBegin + i * 4);
         }
     }
 
     return 0;
 }
 
 QString KCharSelectData::blockName(int index)
 {
     if (!openDataFile()) {
         return QString();
     }
 
     const uchar *udata = reinterpret_cast<const uchar *>(dataFile.constData());
     const quint32 stringBegin = qFromLittleEndian<quint32>(udata + 16);
     const quint32 stringEnd = qFromLittleEndian<quint32>(udata + 20);
 
     quint32 i = stringBegin;
     int currIndex = 0;
 
     const char *data = dataFile.constData();
     while (i < stringEnd && currIndex < index) {
         i += qstrlen(data + i) + 1;
         currIndex++;
     }
 
     return QCoreApplication::translate("KCharSelectData", data + i, "KCharselect unicode block name");
 }
 
 QString KCharSelectData::sectionName(int index)
 {
     if (!openDataFile()) {
         return QString();
     }
 
     const uchar *udata = reinterpret_cast<const uchar *>(dataFile.constData());
     const quint32 stringBegin = qFromLittleEndian<quint32>(udata + 24);
     const quint32 stringEnd = qFromLittleEndian<quint32>(udata + 28);
 
     quint32 i = stringBegin;
     int currIndex = 0;
 
     const char *data = dataFile.constData();
     while (i < stringEnd && currIndex < index) {
         i += qstrlen(data + i) + 1;
         currIndex++;
     }
 
     return QCoreApplication::translate("KCharSelectData", data + i, "KCharselect unicode section name");
 }
 
 QStringList KCharSelectData::aliases(uint c)
 {
     if (!openDataFile()) {
         return QStringList();
     }
     const uchar *udata = reinterpret_cast<const uchar *>(dataFile.constData());
     const int detailIndex = getDetailIndex(c);
     if (detailIndex == 0) {
         return QStringList();
     }
 
     const quint8 count = * (quint8 *)(udata + detailIndex + 6);
     quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 2);
 
     QStringList aliases;
 
     const char *data = dataFile.constData();
     for (int i = 0;  i < count;  i++) {
         aliases.append(QString::fromUtf8(data + offset));
         offset += qstrlen(data + offset) + 1;
     }
     return aliases;
 }
 
 QStringList KCharSelectData::notes(uint c)
 {
     if (!openDataFile()) {
         return QStringList();
     }
     const int detailIndex = getDetailIndex(c);
     if (detailIndex == 0) {
         return QStringList();
     }
 
     const uchar *udata = reinterpret_cast<const uchar *>(dataFile.constData());
     const quint8 count = * (quint8 *)(udata + detailIndex + 11);
     quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 7);
 
     QStringList notes;
 
     const char *data = dataFile.constData();
     for (int i = 0;  i < count;  i++) {
         notes.append(QString::fromUtf8(data + offset));
         offset += qstrlen(data + offset) + 1;
     }
 
     return notes;
 }
 
 QVector<uint> KCharSelectData::seeAlso(uint c)
 {
     if (!openDataFile()) {
         return QVector<uint>();
     }
     const int detailIndex = getDetailIndex(c);
     if (detailIndex == 0) {
         return QVector<uint>();
     }
 
     const uchar *udata = reinterpret_cast<const uchar *>(dataFile.constData());
     const quint8 count = * (quint8 *)(udata + detailIndex + 26);
     quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 22);
 
     QVector<uint> seeAlso;
 
     for (int i = 0;  i < count;  i++) {
         seeAlso.append(mapDataBaseToCodePoint(qFromLittleEndian<quint16> (udata + offset)));
         offset += 2;
     }
 
     return seeAlso;
 }
 
 QStringList KCharSelectData::equivalents(uint c)
 {
     if (!openDataFile()) {
         return QStringList();
     }
     const int detailIndex = getDetailIndex(c);
     if (detailIndex == 0) {
         return QStringList();
     }
 
     const uchar *udata = reinterpret_cast<const uchar *>(dataFile.constData());
     const quint8 count = * (quint8 *)(udata + detailIndex + 21);
     quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 17);
 
     QStringList equivalents;
 
     const char *data = dataFile.constData();
     for (int i = 0;  i < count;  i++) {
         equivalents.append(QString::fromUtf8(data + offset));
         offset += qstrlen(data + offset) + 1;
     }
 
     return equivalents;
 }
 
 QStringList KCharSelectData::approximateEquivalents(uint c)
 {
     if (!openDataFile()) {
         return QStringList();
     }
     const int detailIndex = getDetailIndex(c);
     if (detailIndex == 0) {
         return QStringList();
     }
 
     const uchar *udata = reinterpret_cast<const uchar *>(dataFile.constData());
     const quint8 count = * (quint8 *)(udata + detailIndex + 16);
     quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 12);
 
     QStringList approxEquivalents;
 
     const char *data = dataFile.constData();
     for (int i = 0;  i < count;  i++) {
         approxEquivalents.append(QString::fromUtf8(data + offset));
         offset += qstrlen(data + offset) + 1;
     }
 
     return approxEquivalents;
 }
 
 QVector<uint> KCharSelectData::decomposition(uint c) {
     // for now, only decompose Hangul Syllable into Hangul Jamo
     uint SIndex = c - SBase;
     if (SIndex >= SCount) {
         return QVector<uint>();
     }
 
     uint L = LBase + SIndex / NCount;  // Choseong
     uint V = VBase + (SIndex % NCount) / TCount; // Jungseong
     uint T = TBase + SIndex % TCount; // Jongsung
     QVector<uint> jamoList;
     jamoList.append(L);
     jamoList.append(V);
     if (T != TBase) {
         jamoList.append(T);
     }
     return jamoList;
 }
 
 QStringList KCharSelectData::unihanInfo(uint c)
 {
     if (!openDataFile()) {
         return QStringList();
     }
 
     quint16 unicode = mapCodePointToDataBase(c);
     if (unicode == 0xFFFF) {
         return QStringList();
     }
 
     const char *data = dataFile.constData();
     const uchar *udata = reinterpret_cast<const uchar *>(data);
     const quint32 offsetBegin = qFromLittleEndian<quint32>(udata + 36);
     const quint32 offsetEnd = dataFile.size();
 
     int min = 0;
     int mid;
     int max = ((offsetEnd - offsetBegin) / 30) - 1;
 
     while (max >= min) {
         mid = (min + max) / 2;
         const quint16 midUnicode = qFromLittleEndian<quint16>(udata + offsetBegin + mid * 30);
         if (unicode > midUnicode) {
             min = mid + 1;
         } else if (unicode < midUnicode) {
             max = mid - 1;
         } else {
             QStringList res;
             for (int i = 0; i < 7; i++) {
                 quint32 offset = qFromLittleEndian<quint32>(udata + offsetBegin + mid * 30 + 2 + i * 4);
                 if (offset != 0) {
                     res.append(QString::fromUtf8(data + offset));
                 } else {
                     res.append(QString());
                 }
             }
             return res;
         }
     }
 
     return QStringList();
 }
 
 QChar::Category KCharSelectData::category(uint c)
 {
     if (!openDataFile()) {
         return QChar::category(c);
     }
 
     ushort unicode = mapCodePointToDataBase(c);
     if (unicode == 0xFFFF) {
         return QChar::category(c);
     }
 
     const uchar *data = reinterpret_cast<const uchar *>(dataFile.constData());
     const quint32 offsetBegin = qFromLittleEndian<quint32>(data + 4);
     const quint32 offsetEnd = qFromLittleEndian<quint32>(data + 8);
 
     int min = 0;
     int mid;
     int max = ((offsetEnd - offsetBegin) / 6) - 1;
     QString s;
 
     while (max >= min) {
         mid = (min + max) / 2;
         const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid * 6);
         if (unicode > midUnicode) {
             min = mid + 1;
         } else if (unicode < midUnicode) {
             max = mid - 1;
         } else {
             quint32 offset = qFromLittleEndian<quint32>(data + offsetBegin + mid * 6 + 2);
             uchar categoryCode = *(data + offset);
             Q_ASSERT(categoryCode > 0);
             categoryCode--;  /* Qt5 changed QChar::Category enum to start from 0 instead of 1
                                 See QtBase commit d17c76feee9eece4 */
             return QChar::Category(categoryCode);
         }
     }
 
     return QChar::category(c);
 }
 
 bool KCharSelectData::isPrint(uint c)
 {
     QChar::Category cat = category(c);
     return !(cat == QChar::Other_Control || cat == QChar::Other_NotAssigned);
 }
 
 bool KCharSelectData::isDisplayable(uint c)
 {
     // Qt internally uses U+FDD0 and U+FDD1 to mark the beginning and the end of frames.
     // They should be seen as non-printable characters, as trying to display them leads
     //  to a crash caused by a Qt "noBlockInString" assertion.
     if (c == 0xFDD0 || c == 0xFDD1) {
         return false;
     }
 
     return !isIgnorable(c) && isPrint(c);
 }
 
 bool KCharSelectData::isIgnorable(uint c)
 {
     /*
      * According to the Unicode standard, Default Ignorable Code Points
      * should be ignored unless explicitly supported. For example, U+202E
      * RIGHT-TO-LEFT-OVERRIDE ir printable according to Qt, but displaying
      * it gives the undesired effect of all text being turned RTL. We do not
      * have a way to "explicitly" support it, so we will treat it as
      * non-printable.
      *
      * There is a list of these on
      * http://unicode.org/Public/UNIDATA/DerivedCoreProperties.txt under the
      * property Default_Ignorable_Code_Point.
      */
 
     //NOTE: not very nice to hardcode these here; is it worth it to modify
     //      the binary data file to hold them?
     return c == 0x00AD || c == 0x034F || c == 0x115F || c == 0x1160 ||
            c == 0x17B4 || c == 0x17B5 || (c >= 0x180B && c <= 0x180D) ||
            (c >= 0x200B && c <= 0x200F) || (c >= 0x202A && c <= 0x202E) ||
            (c >= 0x2060 && c <= 0x206F) || c == 0x3164 ||
            (c >= 0xFE00 && c <= 0xFE0F) || c == 0xFEFF || c == 0xFFA0 ||
            (c >= 0xFFF0 && c <= 0xFFF8);
 }
 
 bool KCharSelectData::isCombining(uint c)
 {
     return section(c) == QCoreApplication::translate("KCharSelectData", "Combining Diacritics", "KCharSelect section name");
     //FIXME: this is an imperfect test. There are many combining characters
     //       that are outside of this section. See Grapheme_Extend in
     //       http://www.unicode.org/Public/UNIDATA/DerivedCoreProperties.txt
 }
 
 QString KCharSelectData::display(uint c, const QFont &font)
 {
     if (!isDisplayable(c)) {
         return QStringLiteral("<b>") + QCoreApplication::translate("KCharSelectData", "Non-printable") + QStringLiteral("</b>");
     } else {
         QString s = QStringLiteral("<font size=\"+4\" face=\"") + font.family() + QStringLiteral("\">");
         if (isCombining(c)) {
             s += displayCombining(c);
         } else {
             s += QStringLiteral("&#") + QString::number(c) + QLatin1Char(';');
         }
         s += QStringLiteral("</font>");
         return s;
     }
 }
 
 QString KCharSelectData::displayCombining(uint c)
 {
     /*
      * The purpose of this is to make it easier to see how a combining
      * character affects the text around it.
      * The initial plan was to use U+25CC DOTTED CIRCLE for this purpose,
      * as seen in pdfs from Unicode, but there seem to be a lot of alignment
      * problems with that.
      *
      * Eventually, it would be nice to determine whether the character
      * combines to the left or to the right, etc.
      */
     QString s = QStringLiteral("&nbsp;&#") + QString::number(c) + QStringLiteral(";&nbsp;") +
                 QStringLiteral(" (ab&#") + QString::number(c) + QStringLiteral(";c)");
     return s;
 }
 
 QString KCharSelectData::categoryText(QChar::Category category)
 {
     switch (category) {
     case QChar::Other_Control: return QCoreApplication::translate("KCharSelectData", "Other, Control");
     case QChar::Other_Format: return QCoreApplication::translate("KCharSelectData", "Other, Format");
     case QChar::Other_NotAssigned: return QCoreApplication::translate("KCharSelectData", "Other, Not Assigned");
     case QChar::Other_PrivateUse: return QCoreApplication::translate("KCharSelectData", "Other, Private Use");
     case QChar::Other_Surrogate: return QCoreApplication::translate("KCharSelectData", "Other, Surrogate");
     case QChar::Letter_Lowercase: return QCoreApplication::translate("KCharSelectData", "Letter, Lowercase");
     case QChar::Letter_Modifier: return QCoreApplication::translate("KCharSelectData", "Letter, Modifier");
     case QChar::Letter_Other: return QCoreApplication::translate("KCharSelectData", "Letter, Other");
     case QChar::Letter_Titlecase: return QCoreApplication::translate("KCharSelectData", "Letter, Titlecase");
     case QChar::Letter_Uppercase: return QCoreApplication::translate("KCharSelectData", "Letter, Uppercase");
     case QChar::Mark_SpacingCombining: return QCoreApplication::translate("KCharSelectData", "Mark, Spacing Combining");
     case QChar::Mark_Enclosing: return QCoreApplication::translate("KCharSelectData", "Mark, Enclosing");
     case QChar::Mark_NonSpacing: return QCoreApplication::translate("KCharSelectData", "Mark, Non-Spacing");
     case QChar::Number_DecimalDigit: return QCoreApplication::translate("KCharSelectData", "Number, Decimal Digit");
     case QChar::Number_Letter: return QCoreApplication::translate("KCharSelectData", "Number, Letter");
     case QChar::Number_Other: return QCoreApplication::translate("KCharSelectData", "Number, Other");
     case QChar::Punctuation_Connector: return QCoreApplication::translate("KCharSelectData", "Punctuation, Connector");
     case QChar::Punctuation_Dash: return QCoreApplication::translate("KCharSelectData", "Punctuation, Dash");
     case QChar::Punctuation_Close: return QCoreApplication::translate("KCharSelectData", "Punctuation, Close");
     case QChar::Punctuation_FinalQuote: return QCoreApplication::translate("KCharSelectData", "Punctuation, Final Quote");
     case QChar::Punctuation_InitialQuote: return QCoreApplication::translate("KCharSelectData", "Punctuation, Initial Quote");
     case QChar::Punctuation_Other: return QCoreApplication::translate("KCharSelectData", "Punctuation, Other");
     case QChar::Punctuation_Open: return QCoreApplication::translate("KCharSelectData", "Punctuation, Open");
     case QChar::Symbol_Currency: return QCoreApplication::translate("KCharSelectData", "Symbol, Currency");
     case QChar::Symbol_Modifier: return QCoreApplication::translate("KCharSelectData", "Symbol, Modifier");
     case QChar::Symbol_Math: return QCoreApplication::translate("KCharSelectData", "Symbol, Math");
     case QChar::Symbol_Other: return QCoreApplication::translate("KCharSelectData", "Symbol, Other");
     case QChar::Separator_Line: return QCoreApplication::translate("KCharSelectData", "Separator, Line");
     case QChar::Separator_Paragraph: return QCoreApplication::translate("KCharSelectData", "Separator, Paragraph");
     case QChar::Separator_Space: return QCoreApplication::translate("KCharSelectData", "Separator, Space");
     default: return QCoreApplication::translate("KCharSelectData", "Unknown");
     }
 }
 
 QVector<uint> KCharSelectData::find(const QString &needle)
 {
     QSet<uint> result;
 
     QVector<uint> returnRes;
     QString simplified = needle.simplified();
     QStringList searchStrings;
 
     QRegularExpression octalExp(QStringLiteral("^\\\\[0-7][0-7\\\\]*$"));
     QRegularExpressionMatch match = octalExp.match(simplified);
     if (match.hasMatch()) {
         // search for C octal escaped UTF-8
         QByteArray utf8;
         int byte = -1;
         for (int i = 0; i <= simplified.length(); ++i) {
             int c = simplified.at(i).unicode();
             if (c >= '0' && c <= '7') {
                 byte = 8 * byte + c - '0';
             } else if (byte == -1) {
                 byte = 0;
             } else if (byte >= 0x00 && byte <= 0xFF) {
                 utf8.append((char) byte);
                 byte = 0;
             }
         }
         simplified = QString::fromUtf8(utf8);
     }
 
     if (simplified.length() <= 2) {
         QVector<uint> ucs4 = simplified.toUcs4();
         if (ucs4.size() == 1) {
             // search for hex representation of the character
             searchStrings = QStringList(formatCode(ucs4.at(0)));
         }
     } else {
         searchStrings = splitString(simplified);
     }
 
     if (searchStrings.count() == 0) {
         return returnRes;
     }
 
     QRegularExpression hexExp(QStringLiteral("^(|u\\+|U\\+|0x|0X)([A-Fa-f0-9]{4,5})$"));
     foreach (const QString &s, searchStrings) {
         QRegularExpressionMatch match = hexExp.match(s);
         if (match.hasMatch()) {
             returnRes.append(match.captured(2).toInt(nullptr, 16));
             // search for "1234" instead of "0x1234"
             if (s.length() == 6 || s.length() == 7) {
                 searchStrings[searchStrings.indexOf(s)] = match.captured(2);
             }
         }
         // try to parse string as decimal number
         bool ok;
         int unicode = s.toInt(&ok);
         if (ok && unicode >= 0 && unicode <= QChar::LastValidCodePoint) {
             returnRes.append(unicode);
         }
     }
 
     bool firstSubString = true;
     foreach (const QString &s, searchStrings) {
         QSet<uint> partResult = getMatchingChars(s.toLower());
         if (firstSubString) {
             result = partResult;
             firstSubString = false;
         } else {
             result = result.intersect(partResult);
         }
     }
 
     // remove results found by matching the code point to prevent duplicate results
     // while letting these characters stay at the beginning
     foreach (uint c, returnRes) {
         result.remove(c);
     }
 
     QVector<uint> sortedResult;
     sortedResult.reserve(result.count());
     QSet<uint>::const_iterator it = result.begin();
     const QSet<uint>::const_iterator end = result.end();
     for ( ; it != end ; ++it ) {
         sortedResult.append(*it);
     }
     qSort(sortedResult);
 
     returnRes += sortedResult;
     return returnRes;
 }
 
 QSet<uint> KCharSelectData::getMatchingChars(const QString &s)
 {
     if (dataFile.isEmpty()) {
         return QSet<uint>();
     }
     futureIndex.waitForFinished();
     const Index index = futureIndex;
     Index::const_iterator pos = index.lowerBound(s);
     QSet<uint> result;
 
     while (pos != index.constEnd() && pos.key().startsWith(s)) {
         foreach (quint16 c, pos.value()) {
             result.insert(mapDataBaseToCodePoint(c));
         }
         ++pos;
     }
 
     return result;
 }
 
 QStringList KCharSelectData::splitString(const QString &s)
 {
     QStringList result;
     int start = 0;
     int end = 0;
     int length = s.length();
     while (end < length) {
         while (end < length && (s[end].isLetterOrNumber() || s[end] == QLatin1Char('+'))) {
             end++;
         }
         if (start != end) {
             result.append(s.mid(start, end - start));
         }
         start = end;
         while (end < length && !(s[end].isLetterOrNumber() || s[end] == QLatin1Char('+'))) {
             end++;
             start++;
         }
     }
     return result;
 }
 
 void KCharSelectData::appendToIndex(Index *index, quint16 unicode, const QString &s)
 {
     const QStringList strings = splitString(s);
     foreach (const QString &s, strings) {
         (*index)[s.toLower()].append(unicode);
     }
 }
 
 Index KCharSelectData::createIndex(const QByteArray &dataFile)
 {
     Index i;
 
     // character names
     const uchar *udata = reinterpret_cast<const uchar *>(dataFile.constData());
     const char *data = dataFile.constData();
     const quint32 nameOffsetBegin = qFromLittleEndian<quint32>(udata + 4);
     const quint32 nameOffsetEnd = qFromLittleEndian<quint32>(udata + 8);
 
     int max = ((nameOffsetEnd - nameOffsetBegin) / 6) - 1;
 
     for (int pos = 0; pos <= max; pos++) {
         const quint16 unicode = qFromLittleEndian<quint16>(udata + nameOffsetBegin + pos * 6);
         quint32 offset = qFromLittleEndian<quint32>(udata + nameOffsetBegin + pos * 6 + 2);
         appendToIndex(&i, unicode, QString::fromUtf8(data + offset + 1));
     }
 
     // details
     const quint32 detailsOffsetBegin = qFromLittleEndian<quint32>(udata + 12);
     const quint32 detailsOffsetEnd = qFromLittleEndian<quint32>(udata + 16);
 
     max = ((detailsOffsetEnd - detailsOffsetBegin) / 27) - 1;
 
     for (int pos = 0; pos <= max; pos++) {
         const quint16 unicode = qFromLittleEndian<quint16>(udata + detailsOffsetBegin + pos * 27);
 
         // aliases
         const quint8 aliasCount = * (quint8 *)(udata + detailsOffsetBegin + pos * 27 + 6);
         quint32 aliasOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos * 27 + 2);
 
         for (int j = 0;  j < aliasCount;  j++) {
             appendToIndex(&i, unicode, QString::fromUtf8(data + aliasOffset));
             aliasOffset += qstrlen(data + aliasOffset) + 1;
         }
 
         // notes
         const quint8 notesCount = * (quint8 *)(udata + detailsOffsetBegin + pos * 27 + 11);
         quint32 notesOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos * 27 + 7);
 
         for (int j = 0;  j < notesCount;  j++) {
             appendToIndex(&i, unicode, QString::fromUtf8(data + notesOffset));
             notesOffset += qstrlen(data + notesOffset) + 1;
         }
 
         // approximate equivalents
         const quint8 apprCount = * (quint8 *)(udata + detailsOffsetBegin + pos * 27 + 16);
         quint32 apprOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos * 27 + 12);
 
         for (int j = 0;  j < apprCount;  j++) {
             appendToIndex(&i, unicode, QString::fromUtf8(data + apprOffset));
             apprOffset += qstrlen(data + apprOffset) + 1;
         }
 
         // equivalents
         const quint8 equivCount = * (quint8 *)(udata + detailsOffsetBegin + pos * 27 + 21);
         quint32 equivOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos * 27 + 17);
 
         for (int j = 0;  j < equivCount;  j++) {
             appendToIndex(&i, unicode, QString::fromUtf8(data + equivOffset));
             equivOffset += qstrlen(data + equivOffset) + 1;
         }
 
         // see also - convert to string (hex)
         const quint8 seeAlsoCount = * (quint8 *)(udata + detailsOffsetBegin + pos * 27 + 26);
         quint32 seeAlsoOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos * 27 + 22);
 
         for (int j = 0;  j < seeAlsoCount;  j++) {
             quint16 seeAlso = qFromLittleEndian<quint16> (udata + seeAlsoOffset);
             appendToIndex(&i, unicode, formatCode(seeAlso, 4, QString()));
             equivOffset += qstrlen(data + equivOffset) + 1;
         }
     }
 
     // unihan data
     // temporary disabled due to the huge amount of data
 //     const quint32 unihanOffsetBegin = qFromLittleEndian<quint32>(udata+36);
 //     const quint32 unihanOffsetEnd = dataFile.size();
 //     max = ((unihanOffsetEnd - unihanOffsetBegin) / 30) - 1;
 //
 //     for (int pos = 0; pos <= max; pos++) {
 //         const quint16 unicode = qFromLittleEndian<quint16>(udata + unihanOffsetBegin + pos*30);
 //         for(int j = 0; j < 7; j++) {
 //             quint32 offset = qFromLittleEndian<quint32>(udata + unihanOffsetBegin + pos*30 + 2 + j*4);
 //             if(offset != 0) {
 //                 appendToIndex(&i, unicode, QString::fromUtf8(data + offset));
 //             }
 //         }
 //     }
 
     return i;
 }