diff --git a/gdb/qt5printers/core.py b/gdb/qt5printers/core.py index 1f7cfb2..5c04402 100644 --- a/gdb/qt5printers/core.py +++ b/gdb/qt5printers/core.py @@ -1,900 +1,900 @@ # Copyright 2014 Alex Merry # # Permission to use, copy, modify, and distribute this software # and its documentation for any purpose and without fee is hereby # granted, provided that the above copyright notice appear in all # copies and that both that the copyright notice and this # permission notice and warranty disclaimer appear in supporting # documentation, and that the name of the author not be used in # advertising or publicity pertaining to distribution of the # software without specific, written prior permission. # # The author disclaims all warranties with regard to this # software, including all implied warranties of merchantability # and fitness. In no event shall the author be liable for any # special, indirect or consequential damages or any damages # whatsoever resulting from loss of use, data or profits, whether # in an action of contract, negligence or other tortious action, # arising out of or in connection with the use or performance of # this software. import gdb.printing import itertools from qt5printers import typeinfo try: import urlparse except ImportError: # Python 3 import urllib.parse as urlparse """Qt5Core pretty printer for GDB.""" # NB: no QPair printer: the default should be fine def _format_jd(jd): """Format a Julian Day in YYYY-MM-DD format.""" - # maths from http://www.tondering.dk/claus/cal/julperiod.php + # maths from https://www.tondering.dk/claus/cal/julperiod.php a = jd + 32044 b = (4 * a + 3) // 146097 c = a - ( (146097 * b) // 4 ) d = (4 * c + 3) // 1461 e = c - ( (1461 * d) // 4 ) m = (5 * e + 2) // 153 day = e - ( (153 * m + 2) // 5 ) + 1 month = m + 3 - 12 * ( m // 10 ) year = 100 * b + d - 4800 + ( m // 10 ) return '{:0=4}-{:0=2}-{:0=2}'.format(year, month, day) def _jd_is_valid(jd): """Return whether QDate would consider a given Julian Day valid.""" return jd >= -784350574879 and jd <= 784354017364 def _format_time_ms(msecs): """Format a number of milliseconds since midnight in HH:MM:SS.ssss format.""" secs = msecs // 1000 mins = secs // 60 hours = mins // 60 return '{:0=2}:{:0=2}:{:0=2}.{:0=3}'.format( hours % 24, mins % 60, secs % 60, msecs % 1000) def _ms_is_valid(msecs): """Return whether QTime would consider a ms since midnight valid.""" return msecs >= 0 and msecs <= 86400000 class ArrayIter: """Iterates over a fixed-size array.""" def __init__(self, array, size): self.array = array self.i = -1 self.size = size def __iter__(self): return self def __next__(self): if self.i + 1 >= self.size: raise StopIteration self.i += 1 return ('[%d]' % self.i, self.array[self.i]) def next(self): return self.__next__() class StructReader: """Reads entries from a struct.""" def __init__(self, data): self.data = data.reinterpret_cast(gdb.lookup_type('char').pointer()) self.ptr_t = gdb.lookup_type('void').pointer() def next_aligned_val(self, typ): ptr_val = int(str(self.data.reinterpret_cast(self.ptr_t)), 16) misalignment = ptr_val % self.ptr_t.sizeof if misalignment > 0: self.data += self.ptr_t.sizeof - misalignment val = self.data.reinterpret_cast(typ.pointer()) self.data += typ.sizeof return val.referenced_value() def next_val(self, typ): val = self.data.reinterpret_cast(typ.pointer()) self.data += typ.sizeof return val.referenced_value() class QBitArrayPrinter: """Print a Qt5 QBitArray""" class Iter: def __init__(self, data, size): self.data = data self.i = -1 self.size = size def __iter__(self): return self def __next__(self): if self.i + 1 >= self.size: raise StopIteration self.i += 1 if self.data[1 + (self.i >> 3)] & (1 << (self.i&7)): return (str(self.i), 1) else: return (str(self.i), 0) def next(self): return self.__next__() def __init__(self, val): self.val = val def children(self): d = self.val['d']['d'] data = d.reinterpret_cast(gdb.lookup_type('char').pointer()) + d['offset'] size = (int(d['size']) << 3) - int(data[0]) return self.Iter(data, size) def to_string(self): d = self.val['d']['d'] data = d.reinterpret_cast(gdb.lookup_type('char').pointer()) + d['offset'] size = (int(d['size']) << 3) - int(data[0]) if size == 0: return '' return None def display_hint(self): return 'array' class QByteArrayPrinter: """Print a Qt5 QByteArray""" def __init__(self, val): self.val = val def children(self): d = self.val['d'] data = d.reinterpret_cast(gdb.lookup_type('char').pointer()) + d['offset'] return ArrayIter(data, d['size']) def to_string(self): d = self.val['d'] data = d.reinterpret_cast(gdb.lookup_type('char').pointer()) + d['offset'] return data.string('', 'replace', d['size']) def display_hint(self): return 'string' class QCharPrinter: """Print a Qt5 QChar""" def __init__(self, val): self.val = val def to_string(self): ucs = self.val['ucs'] data = ucs.address.reinterpret_cast(gdb.lookup_type('char').pointer()) unicode_str = data.string('utf-16', 'replace', 2) uch = unicode_str[0] if uch == unichr(0x27): return "'\\''" # this actually gives us Python escapes, but they should all be # valid C escapes as well return "'" + uch.encode('unicode_escape') + "'" def display_hint(self): # this is not recognized by gdb, hence the manual escaping and quoting # we do above return 'char' class QDatePrinter: """Print a Qt5 QDate""" def __init__(self, val): self.val = val def to_string(self): jd = int(self.val['jd']) if not _jd_is_valid(jd): return '' return _format_jd(jd) def display_hint(self): return 'date' class QDateTimePrinter: """Print a Qt5 QDateTime""" def __init__(self, val): self.val = val _unix_epoch_jd = 2440588 _ms_per_day = 86400000 # status field _validDate = 0x04 _validTime = 0x08 _validDateTime = 0x10 _timeZoneCached = 0x20 # time spec _localTime = 0 _UTC = 1 _offsetFromUTC = 2 _timeZone = 3 def to_string(self): d = self.val['d']['d'] if not d: return '' try: qshareddata_t = gdb.lookup_type('QSharedData') except gdb.error: try: # well, it only has a QAtomicInt in it qshareddata_t = gdb.lookup_type('QAtomicInt') except gdb.error: # let's hope it's the same size as an int qshareddata_t = gdb.lookup_type('int') try: timespec_t = gdb.lookup_type('Qt::TimeSpec') except gdb.error: # probably an int timespec_t = gdb.lookup_type('int') reader = StructReader(d) reader.next_val(qshareddata_t) m_msecs = reader.next_aligned_val(gdb.lookup_type('qint64')) spec = int(reader.next_val(timespec_t)) m_offsetFromUtc = reader.next_val(gdb.lookup_type('int')) m_timeZone = reader.next_val(gdb.lookup_type('QTimeZone')) status = int(reader.next_val(gdb.lookup_type('int'))) if spec == self._timeZone: timeZoneStr = QTimeZonePrinter(m_timeZone).to_string() if timeZoneStr == '': return '' if spec == self._localTime or (spec == self._timeZone and not status & self._timeZoneCached): # Because QDateTime delays timezone calculations as far as # possible, the ValidDateTime flag may not be set even if # it is a valid DateTime. if not status & self._validDate or not status & self._validTime: return '' elif not (status & self._validDateTime): return '' # actually fetch: m_msecs = int(m_msecs) jd = self._unix_epoch_jd # UNIX epoch jd += m_msecs // self._ms_per_day msecs = m_msecs % self._ms_per_day if msecs < 0: # need to adjust back to the previous day jd -= 1 msecs += self._ms_per_day result = _format_jd(jd) + ' ' + _format_time_ms(msecs) if spec == self._localTime: result += ' (Local)' elif spec == self._UTC: result += ' (UTC)' elif spec == self._offsetFromUTC: offset = int(m_offsetFromUtc) if offset == 0: diffstr = '' else: hours = abs(offset // 3600) mins = abs((offset % 3600) // 60) secs = abs(offset % 60) sign = '+' if offset > 0 else '-' diffstr = '{:}{:0=2d}:{:0=2d}'.format(sign, hours, mins) if secs > 0: diffstr += ':{:0=2d}'.format(secs) result += ' (UTC{:})'.format(diffstr) elif spec == self._timeZone: result += ' ({:})'.format(timeZoneStr) return result def display_hint(self): return 'datetime' class QHashPrinter: """Print a Qt5 QHash""" class Iter: def __init__(self, d, e): self.buckets_left = d['numBuckets'] self.node_type = e.type # set us up at the end of a "dummy bucket" self.current_bucket = d['buckets'] - 1 self.current_node = None self.i = -1 self.waiting_for_value = False def __iter__(self): return self def __next__(self): if self.waiting_for_value: self.waiting_for_value = False node = self.current_node.reinterpret_cast(self.node_type) return ('value' + str(self.i), node['value']) if self.current_node: self.current_node = self.current_node['next'] # the dummy node that terminates a bucket is distinguishable # by not having its 'next' value set if not self.current_node or not self.current_node['next']: while self.buckets_left: self.current_bucket += 1 self.buckets_left -= 1 self.current_node = self.current_bucket.referenced_value() if self.current_node['next']: break else: raise StopIteration self.i += 1 self.waiting_for_value = True node = self.current_node.reinterpret_cast(self.node_type) return ('key' + str(self.i), node['key']) def next(self): return self.__next__() def __init__(self, val): self.val = val def children(self): d = self.val['d'] if d['size'] == 0: return [] return self.Iter(d, self.val['e']) def to_string(self): # if we return an empty list from children, gdb doesn't print anything if self.val['d']['size'] == 0: return '' return None def display_hint(self): return 'map' class QLatin1StringPrinter: """Print a Qt5 QLatin1String""" def __init__(self, val): self.val = val def to_string(self): return self.val['m_data'].string('', 'replace', self.val['m_size']) def display_hint(self): return 'string' class QLinkedListPrinter: """Print a Qt5 QLinkedList""" class Iter: def __init__(self, tail, size): self.current = tail self.i = -1 self.size = size def __iter__(self): return self def __next__(self): if self.i + 1 >= self.size: raise StopIteration self.i += 1 self.current = self.current['n'] return (str(self.i), self.current['t']) def next(self): return self.__next__() def __init__(self, val): self.val = val def children(self): size = int(self.val['d']['size']) if size == 0: return [] return self.Iter(self.val['e'], size) def to_string(self): # if we return an empty list from children, gdb doesn't print anything if self.val['d']['size'] == 0: return '' return None def display_hint(self): return 'array' class QListPrinter: """Print a Qt5 QList""" class Iter: def __init__(self, array, begin, end, typ): self.array = array self.end = end self.begin = begin self.offset = 0 if typ.name == 'QStringList': self.el_type = gdb.lookup_type('QString') else: self.el_type = typ.template_argument(0) if ((self.el_type.sizeof > gdb.lookup_type('void').pointer().sizeof) or typeinfo.type_is_known_static(self.el_type)): self.is_pointer = True elif (typeinfo.type_is_known_movable(self.el_type) or typeinfo.type_is_known_primitive(self.el_type)): self.is_pointer = False else: raise ValueError("Could not determine whether QList stores " + self.el_type.name + " directly or as a pointer: to fix " + "this, add it to one of the variables in the "+ "qt5printers.typeinfo module") self.node_type = gdb.lookup_type(typ.name + '::Node').pointer() def __iter__(self): return self def __next__(self): if self.begin + self.offset >= self.end: raise StopIteration node = self.array[self.begin + self.offset].reinterpret_cast(self.node_type) if self.is_pointer: p = node['v'] else: p = node self.offset += 1 return ((str(self.offset), p.cast(self.el_type))) def next(self): return self.__next__() def __init__(self, val): self.val = val def children(self): d = self.val['d'] begin = int(d['begin']) end = int(d['end']) if begin == end: return [] return self.Iter(d['array'], begin, end, self.val.type.strip_typedefs()) def to_string(self): # if we return an empty list from children, gdb doesn't print anything if self.val['d']['begin'] == self.val['d']['end']: return '' return None def display_hint(self): return 'array' class QMapPrinter: """Print a Qt5 QMap""" class Iter: def __init__(self, root, node_p_type): self.root = root self.current = None self.node_p_type = node_p_type self.next_is_key = True self.i = -1 # we store the path here to avoid keeping re-fetching # values from the inferior (also, skips the pointer # arithmetic involved in using the parent pointer) self.path = [] def __iter__(self): return self def moveToNextNode(self): if self.current is None: # find the leftmost node if not self.root['left']: return False self.current = self.root while self.current['left']: self.path.append(self.current) self.current = self.current['left'] elif self.current['right']: self.path.append(self.current) self.current = self.current['right'] while self.current['left']: self.path.append(self.current) self.current = self.current['left'] else: last = self.current self.current = self.path.pop() while self.current['right'] == last: last = self.current self.current = self.path.pop() # if there are no more parents, we are at the root if len(self.path) == 0: return False return True def __next__(self): if self.next_is_key: if not self.moveToNextNode(): raise StopIteration self.current_typed = self.current.reinterpret_cast(self.node_p_type) self.next_is_key = False self.i += 1 return ('key' + str(self.i), self.current_typed['key']) else: self.next_is_key = True return ('value' + str(self.i), self.current_typed['value']) def next(self): return self.__next__() def __init__(self, val): self.val = val def children(self): d = self.val['d'] size = int(d['size']) if size == 0: return [] realtype = self.val.type.strip_typedefs() keytype = realtype.template_argument(0) valtype = realtype.template_argument(1) node_type = gdb.lookup_type('QMapData<' + keytype.name + ',' + valtype.name + '>::Node') return self.Iter(d['header'], node_type.pointer()) def to_string(self): # if we return an empty list from children, gdb doesn't print anything if self.val['d']['size'] == 0: return '' return None def display_hint(self): return 'map' class QSetPrinter: """Print a Qt5 QSet""" def __init__(self, val): self.val = val def children(self): hashPrinter = QHashPrinter(self.val['q_hash']) # the keys of the hash are the elements of the set, so select # every other item (starting with the first) return itertools.islice(hashPrinter.children(), 0, None, 2) def to_string(self): # if we return an empty list from children, gdb doesn't print anything if self.val['q_hash']['d']['size'] == 0: return '' return None def display_hint(self): return 'array' class QStringPrinter: """Print a Qt5 QString""" def __init__(self, val): self.val = val def to_string(self): d = self.val['d'] data = d.reinterpret_cast(gdb.lookup_type('char').pointer()) + d['offset'] data_len = d['size'] * gdb.lookup_type('unsigned short').sizeof return data.string('utf-16', 'replace', data_len) def display_hint(self): return 'string' class QTimePrinter: """Print a Qt5 QTime""" def __init__(self, val): self.val = val def to_string(self): msecs = int(self.val['mds']) if not _ms_is_valid(msecs): return '' return _format_time_ms(msecs) def display_hint(self): return 'time' class QTimeZonePrinter: """Print a Qt5 QTimeZone""" def __init__(self, val): self.val = val def to_string(self): d = self.val['d']['d'] if not d: return '' try: # Accessing the private data is error-prone, # so try just calling the id() method. # This should be reasonably safe, as all it will # do is create a QByteArray that references the # same internal data as the stored one. However, # it will only work with an attached process. m_id = gdb.parse_and_eval('((QTimeZone*){:})->id()'.format(self.val.address)) except: ptr_size = gdb.lookup_type('void').pointer().sizeof try: qshareddata_t = gdb.lookup_type('QSharedData') except gdb.error: try: # well, it only has a QAtomicInt in it qshareddata_t = gdb.lookup_type('QAtomicInt') except gdb.error: # let's hope it's the same size as an int qshareddata_t = gdb.lookup_type('int') reader = StructReader(d) reader.next_val(gdb.lookup_type('void').pointer()) # vtable reader.next_val(qshareddata_t) m_id = reader.next_aligned_val(gdb.lookup_type('QByteArray')) return QByteArrayPrinter(m_id).to_string() def display_hint(self): return 'string' class QVariantPrinter: """Print a Qt5 QVariant""" _varmap = { 'char': 'c', 'uchar': 'uc', 'short': 's', 'signed char': 'sc', 'ushort': 'us', 'int': 'i', 'uint': 'u', 'long': 'l', 'ulong': 'ul', 'bool': 'b', 'double': 'd', 'float': 'f', 'qreal': 'real', 'qlonglong': 'll', 'qulonglong': 'ull', 'QObject*': 'o', 'void*': 'ptr' } def __init__(self, val): self.val = val def children(self): d = self.val['d'] typ = int(d['type']) if typ == typeinfo.meta_type_unknown: return [('type', 'invalid')] data = d['data'] if typ in typeinfo.meta_type_names: typename = typeinfo.meta_type_names[typ] if typename in self._varmap: field = self._varmap[typename] return [('type', typename), ('data', data[field])] try: if typename.endswith('*'): gdb_type = gdb.lookup_type(typename[0:-1]).pointer() else: gdb_type = gdb.lookup_type(typename) except gdb.error: # couldn't find any type information return [('type', typename), ('data', data)] if gdb_type.sizeof > data.type.sizeof: is_pointer = True elif (typeinfo.type_is_known_movable(gdb_type) or typeinfo.type_is_known_primitive(gdb_type)): is_pointer = False elif gdb_type.tag == 'enum': is_pointer = False else: # couldn't figure out how the type is stored return [('type', typename), ('data', data)] if is_pointer: value = data['shared']['ptr'].reinterpret_cast(gdb_type.pointer()) else: void_star = gdb.lookup_type('void').pointer() data_void = data['c'].address.reinterpret_cast(void_star) value = data_void.reinterpret_cast(gdb_type.pointer()) return [('type', typename), ('data', value.referenced_value())] else: # custom type? return [('type', typ), ('data', data)] def to_string(self): return None class QVarLengthArrayPrinter: """Print a Qt5 QVarLengthArray""" def __init__(self, val): self.val = val def children(self): size = int(self.val['s']) if size == 0: return [] return ArrayIter(self.val['ptr'], size) def to_string(self): # if we return an empty list from children, gdb doesn't print anything if self.val['s'] == 0: return '' return None def display_hint(self): return 'array' class QVectorPrinter: """Print a Qt5 QVector""" def __init__(self, val): self.val = val def children(self): d = self.val['d'] el_type = self.val.type.template_argument(0) data_len = int(d['size']) if data_len == 0: return [] data_char = d.reinterpret_cast(gdb.lookup_type('char').pointer()) + d['offset'] data = data_char.reinterpret_cast(el_type.pointer()) return ArrayIter(data, data_len) def to_string(self): # if we return an empty list from children, gdb doesn't print anything if self.val['d']['size'] == 0: return '' return None def display_hint(self): return 'array' class QUrlPrinter: """Print a Qt5 QUrl""" def __init__(self, val): self.val = val def to_string(self): d = self.val['d'] if not d: return '' int_t = gdb.lookup_type('int') try: atomicint_t = gdb.lookup_type('QAtomicInt') except gdb.error: # let's hope it's the same size as an int atomicint_t = int_t qstring_t = gdb.lookup_type('QString') uchar_t = gdb.lookup_type('uchar') reader = StructReader(d) # These fields (including order) are unstable, and # may change between even patch-level Qt releases reader.next_val(atomicint_t) port = int(reader.next_val(int_t)) scheme = reader.next_val(qstring_t) userName = reader.next_val(qstring_t) password = reader.next_val(qstring_t) host = reader.next_val(qstring_t) path = reader.next_val(qstring_t) query = reader.next_val(qstring_t) fragment = reader.next_val(qstring_t) reader.next_val(gdb.lookup_type('void').pointer()) sections = int(reader.next_val(uchar_t)) flags = int(reader.next_val(uchar_t)) # isLocalFile and no query and no fragment if flags & 0x01 and not (sections & 0x40) and not (sections & 0x80): # local file return path def qs_to_s(qstring): return QStringPrinter(qstring).to_string() # QUrl::toString() is way more complicated than what we do here, # but this is good enough for debugging result = '' if sections & 0x01: result += qs_to_s(scheme) + ':' if sections & (0x02 | 0x04 | 0x08 | 0x10) or flags & 0x01: result += '//' if sections & 0x02 or sections & 0x04: result += qs_to_s(userName) if sections & 0x04: # this may appear in backtraces that will be sent to other # people result += ':' result += '@' if sections & 0x08: result += qs_to_s(host) if port != -1: result += ':' + str(port) result += qs_to_s(path) if sections & 0x40: result += '?' + qs_to_s(query) if sections & 0x80: result += '#' + qs_to_s(fragment) return result def display_hint(self): return 'string' def build_pretty_printer(): """Builds the pretty printer for Qt5Core.""" pp = gdb.printing.RegexpCollectionPrettyPrinter("Qt5Core") pp.add_printer('QBitArray', '^QBitArray$', QBitArrayPrinter) pp.add_printer('QByteArray', '^QByteArray$', QByteArrayPrinter) pp.add_printer('QChar', '^QChar$', QCharPrinter) pp.add_printer('QDate', '^QDate$', QDatePrinter) pp.add_printer('QDateTime', '^QDateTime$', QDateTimePrinter) pp.add_printer('QLatin1String', '^QLatin1String$', QLatin1StringPrinter) pp.add_printer('QLinkedList', '^QLinkedList<.*>$', QLinkedListPrinter) pp.add_printer('QList', '^QList<.*>$', QListPrinter) pp.add_printer('QMap', '^QMap<.*>$', QMapPrinter) pp.add_printer('QHash', '^QHash<.*>$', QHashPrinter) pp.add_printer('QQueue', '^QQueue<.*>$', QListPrinter) pp.add_printer('QSet', '^QSet<.*>$', QSetPrinter) pp.add_printer('QStack', '^QStack<.*>$', QVectorPrinter) pp.add_printer('QString', '^QString$', QStringPrinter) pp.add_printer('QStringList', '^QStringList$', QListPrinter) pp.add_printer('QTime', '^QTime$', QTimePrinter) pp.add_printer('QTimeZone', '^QTimeZone$', QTimeZonePrinter) pp.add_printer('QVariant', '^QVariant$', QVariantPrinter) pp.add_printer('QVariantList', '^QVariantList$', QListPrinter) pp.add_printer('QVariantMap', '^QVariantMap$', QMapPrinter) pp.add_printer('QVector', '^QVector<.*>$', QVectorPrinter) pp.add_printer('QVarLengthArray', '^QVarLengthArray<.*>$', QVarLengthArrayPrinter) pp.add_printer('QUrl', '^QUrl$', QUrlPrinter) return pp printer = build_pretty_printer() """The pretty printer for Qt5Core. This can be registered using gdb.printing.register_pretty_printer(). """ diff --git a/grantlee_strings_extractor.py b/grantlee_strings_extractor.py index 5fe3179..1f37779 100755 --- a/grantlee_strings_extractor.py +++ b/grantlee_strings_extractor.py @@ -1,402 +1,402 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- ## # Copyright 2010,2011 Stephen Kelly # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ## -## Parts of this file are reproduced from the Django framework. The Django licence appears below. +## Parts of this file are reproduced from the Django framework. The Django license appears below. ## # Copyright (c) Django Software Foundation and individual contributors. # All rights reserved. # # Redistribution and use in source and binary forms, with or without modification, # are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. Neither the name of Django nor the names of its contributors may be used # to endorse or promote products derived from this software without # specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ## import os, sys, glob, operator import re import os.path # == Introduction to the template syntax == # # The template syntax looks like this: # (For more see here: http://grantlee.org/apidox/for_themers.html ) # # This is plain text # This is text with a {{ value }} substitution # This is {% if condition_is_met %}a conditional{% endif %} # {# This is a comment #} # This is a {% comment %} multi-line # comment # {% endcomment %} # # That is, we have plain text. # We have value substitution with {{ }} # We have comments with {# #} # We have control tags with {% %} # # The first token inside {% %} syntax is called a tag name. Above, we have # an if tag and a comment tag. # # The 'value' in {{ value }} is called a filter expression. In the above case # the filter expression is a simple value which was inserted into the context. # In other cases it can be {{ value|upper }}, that is the value can be passed # through a filter called 'upper' with the '|', or filter expression can # be {{ value|join:"-" }}, that is it can be passed through the join filter # which takes an argument. In this case, the 'value' would actually be a list, # and the join filter would concatenate them with a dash. A filter can have # either no arguments, like upper, or it can take one argument, delimited by # a colon (';'). A filter expression can consist of a value followed by a # chain of filters, such as {{ value|join:"-"|upper }}. A filter expression # can appear one time inside {{ }} but may appear multiple times inside {% %} # For example {% cycle foo|upper bar|join:"-" bat %} contains 3 filter # expressions, 'foo|upper', 'bar|join:"-"' and 'bat'. # # Comments are ignored in the templates. # # == i18n in templates == # # The purpose of this script is to extract translatable strings from templates # The aim is to allow template authors to write templates like this: # # This is a {{ _("translatable string") }} in the template. # This is a {% i18n "translatable string about %1" something %} # This is a {% i18nc "Some context information" "string about %1" something %} # This is a {% i18np "%1 string about %2" numthings something %} # This is a {% i18ncp "some context" "%1 string about %2" numthings something %} # # That is, simple translation with _(), and i18n* tags to allow for variable # substitution, context messages and plurals. Translatable strings may appear # in a filter expression, either as the value begin filtered, or as the argument # or both: # # {{ _("hello")|upper }} # {{ list|join:_("and") }} # # == How the strings are extracted == # # The strings are extracted by parsing the template with regular expressions. # The tag_re regular expression breaks the template into a stream of tokens # containing plain text, {{ values }} and {% tags %}. # That work is done by the tokenize method with the create_token method. # Each token is then processed to extract the translatable strings from # the filter expressions. # The original context of much of this script is in the django template system: -# http://code.djangoproject.com/browser/django/trunk/django/template/base.py +# https://github.com/django/django/blob/master/django/template/base.py TOKEN_TEXT = 0 TOKEN_VAR = 1 TOKEN_BLOCK = 2 TOKEN_COMMENT = 3 # template syntax constants FILTER_SEPARATOR = '|' FILTER_ARGUMENT_SEPARATOR = ':' BLOCK_TAG_START = '{%' BLOCK_TAG_END = '%}' VARIABLE_TAG_START = '{{' VARIABLE_TAG_END = '}}' COMMENT_TAG_START = '{#' COMMENT_TAG_END = '#}' # match a variable or block tag and capture the entire tag, including start/end delimiters tag_re = re.compile('(%s.*?%s|%s.*?%s)' % (re.escape(BLOCK_TAG_START), re.escape(BLOCK_TAG_END), re.escape(VARIABLE_TAG_START), re.escape(VARIABLE_TAG_END))) # Expression to match some_token and some_token="with spaces" (and similarly # for single-quoted strings). smart_split_re = re.compile(r""" ((?: [^\s'"]* (?: (?:"(?:[^"\\]|\\.)*" | '(?:[^'\\]|\\.)*') [^\s'"]* )+ ) | \S+) """, re.VERBOSE) def smart_split(text): r""" Generator that splits a string by spaces, leaving quoted phrases together. Supports both single and double quotes, and supports escaping quotes with backslashes. In the output, strings will keep their initial and trailing quote marks and escaped quotes will remain escaped (the results can then be further processed with unescape_string_literal()). >>> list(smart_split(r'This is "a person\'s" test.')) [u'This', u'is', u'"a person\\\'s"', u'test.'] >>> list(smart_split(r"Another 'person\'s' test.")) [u'Another', u"'person\\'s'", u'test.'] >>> list(smart_split(r'A "\"funky\" style" test.')) [u'A', u'"\\"funky\\" style"', u'test.'] """ for bit in smart_split_re.finditer(text): yield bit.group(0) # This only matches constant *strings* (things in quotes or marked for # translation). constant_string = r"(?:%(strdq)s|%(strsq)s)" % { 'strdq': r'"[^"\\]*(?:\\.[^"\\]*)*"', # double-quoted string 'strsq': r"'[^'\\]*(?:\\.[^'\\]*)*'", # single-quoted string } filter_raw_string = r"""^%(i18n_open)s(?P%(constant_string)s)%(i18n_close)s""" % { 'constant_string': constant_string, 'i18n_open' : re.escape("_("), 'i18n_close' : re.escape(")"), } filter_re = re.compile(filter_raw_string, re.UNICODE|re.VERBOSE) class TemplateSyntaxError(Exception): pass class TranslatableString: _string = '' context = '' plural = '' line_number = -1 def __repr__(self): return "String('%s', '%s', '%s')" % (self._string, self.context, self.plural) class Token(object): def __init__(self, token_type, contents): # token_type must be TOKEN_TEXT, TOKEN_VAR, TOKEN_BLOCK or TOKEN_COMMENT. self.token_type, self.contents = token_type, contents def __str__(self): return '<%s token: "%s...">' % \ ({TOKEN_TEXT: 'Text', TOKEN_VAR: 'Var', TOKEN_BLOCK: 'Block', TOKEN_COMMENT: 'Comment'}[self.token_type], self.contents[:20].replace('\n', '')) def create_token(token_string, in_tag): """ Convert the given token string into a new Token object and return it. If in_tag is True, we are processing something that matched a tag, otherwise it should be treated as a literal string. """ if in_tag: if token_string.startswith(VARIABLE_TAG_START): token = Token(TOKEN_VAR, token_string[len(VARIABLE_TAG_START):-len(VARIABLE_TAG_END)].strip()) elif token_string.startswith(BLOCK_TAG_START): token = Token(TOKEN_BLOCK, token_string[len(BLOCK_TAG_START):-len(BLOCK_TAG_END)].strip()) elif token_string.startswith(COMMENT_TAG_START): token = Token(TOKEN_COMMENT, '') else: token = Token(TOKEN_TEXT, token_string) return token def tokenize(template_string): in_tag = False result = [] for bit in tag_re.split(template_string): if bit: result.append(create_token(bit, in_tag)) in_tag = not in_tag return result class TranslationOutputter: translatable_strings = [] line_number = 0 def get_translatable_filter_args(self, token): """ Find the filter expressions in token and extract the strings in it. """ matches = filter_re.finditer(token) upto = 0 var_obj = False for match in matches: l10nable = match.group("l10nable") if l10nable: # Make sure it's a quoted string if l10nable.startswith('"') and l10nable.endswith('"') \ or l10nable.startswith("'") and l10nable.endswith("'"): ts = TranslatableString() ts._string = l10nable[1:-1] ts.line_number = self.line_number self.translatable_strings.append(ts) def get_contextual_strings(self, token): split = [] _bits = smart_split(token.contents) _bit = next(_bits) if _bit =="i18n" or _bit == "i18n_var": # {% i18n "A one %1, a two %2, a three %3" var1 var2 var3 %} # {% i18n_var "A one %1, a two %2, a three %3" var1 var2 var3 as result %} _bit = next(_bits) if not _bit.startswith("'") and not _bit.startswith('"'): return sentinal = _bit[0] if not _bit.endswith(sentinal): return translatable_string = TranslatableString() translatable_string._string = _bit[1:-1] translatable_string.line_number = self.line_number self.translatable_strings.append(translatable_string) elif _bit =="i18nc" or _bit == "i18nc_var": # {% i18nc "An email send operation failed." "%1 Failed!" var1 %} # {% i18nc_var "An email send operation failed." "%1 Failed!" var1 as result %} _bit = next(_bits) if not _bit.startswith("'") and not _bit.startswith('"'): return sentinal = _bit[0] if not _bit.endswith(sentinal): return translatable_string = TranslatableString() translatable_string.context = _bit[1:-1] _bit = next(_bits) translatable_string._string = _bit[1:-1] translatable_string.line_number = self.line_number self.translatable_strings.append(translatable_string) elif _bit =="i18np" or _bit =="i18np_var": # {% i18np "An email send operation failed." "%1 email send operations failed. Error : % 2." count count errorMsg %} # {% i18np_var "An email send operation failed." "%1 email send operations failed. Error : % 2." count count errorMsg as result %} _bit = next(_bits) if not _bit.startswith("'") and not _bit.startswith('"'): return sentinal = _bit[0] if not _bit.endswith(sentinal): return translatable_string = TranslatableString() translatable_string._string = _bit[1:-1] _bit = next(_bits) translatable_string.plural = _bit[1:-1] translatable_string.line_number = self.line_number self.translatable_strings.append(translatable_string) elif _bit =="i18ncp" or _bit =="i18ncp_var": # {% i18np "The user tried to send an email, but that failed." "An email send operation failed." "%1 email send operation failed." count count %} # {% i18np_var "The user tried to send an email, but that failed." "An email send operation failed." "%1 email send operation failed." count count as result %} _bit = next(_bits) if not _bit.startswith("'") and not _bit.startswith('"'): return sentinal = _bit[0] if not _bit.endswith(sentinal): return translatable_string = TranslatableString() translatable_string.context = _bit[1:-1] _bit = next(_bits) translatable_string._string = _bit[1:-1] _bit = next(_bits) translatable_string.plural = _bit[1:-1] translatable_string.line_number = self.line_number self.translatable_strings.append(translatable_string) else: return for _bit in _bits: if (_bit == "as"): return self.get_translatable_filter_args(_bit) def get_plain_strings(self, token): split = [] bits = iter(smart_split(token.contents)) for bit in bits: self.get_translatable_filter_args(bit) def translate(self, template_file, outputfile): self.translatable_strings = [] self.line_number = 0 template_string_lines = template_file.readlines() for template_string_line in template_string_lines: self.line_number += 1 for token in tokenize(template_string_line): if token.token_type == TOKEN_VAR or token.token_type == TOKEN_BLOCK: self.get_plain_strings(token) if token.token_type == TOKEN_BLOCK: self.get_contextual_strings(token) self.createOutput(template_file.name, self.translatable_strings, outputfile) def createOutput(self, template_filename, translatable_strings, outputfile): for translatable_string in translatable_strings: outputfile.write("// i18n: file: " + template_filename + ":" + str(translatable_string.line_number) + "\n") if translatable_string.context: if not translatable_string.plural: outputfile.write("pgettext(\"" + translatable_string.context + "\", \"" + translatable_string._string + "\");\n") else: outputfile.write("npgettext(\"" + translatable_string.context + "\", \"" + translatable_string._string + "\", \"" + translatable_string.plural + "\");\n") else: if translatable_string.plural: outputfile.write("ngettext(\"" + translatable_string._string + "\", \"" + translatable_string.plural + "\");\n") else: outputfile.write("gettext(\"" + translatable_string._string + "\");\n") if __name__ == "__main__": ex = TranslationOutputter() outputfile = sys.stdout files = sys.argv[1:] for filename in files: f = open(filename, "r") ex.translate(f, outputfile) outputfile.write("\n") diff --git a/kf5/convert-kmimetype.pl b/kf5/convert-kmimetype.pl index cf6a3d2..5a5bf2a 100755 --- a/kf5/convert-kmimetype.pl +++ b/kf5/convert-kmimetype.pl @@ -1,173 +1,173 @@ #!/usr/bin/perl -w # David Faure # KMimeType -> QMimeType # find -iname "*.cpp"|xargs kde-dev-scripts/kf5/convert-kmimetype.pl use strict; use File::Basename; use lib dirname($0); use functionUtilkde; foreach my $file (@ARGV) { my $modified; my %varname = (); my $qmimedatabaseAdded; open(my $FILE, "<", $file) or warn "We can't open file $file:$!\n"; my @l = map { my $orig = $_; - # see http://community.kde.org/Frameworks/Porting_Notes#KDECore_Changes + # see https://community.kde.org/Frameworks/Porting_Notes#KDECore_Changes if (/KMimeType::Ptr\s+(\w+)/) { my $var = $1; $varname{$var} = 1; s/KMimeType::Ptr/QMimeType/g; } if (/(\w+)\->is\s*\(/) { my $var = $1; if (defined $varname{$var}) { s/(\w+)\->is/$var\.inherits/; } } if (/if\s+\(\s*(\w+)\s*\)/) { my $var = $1; if (defined $varname{$var}) { s/if\s+\(\s*$var\s*\)/if \($var\.isValid\(\)\)/; } } if (/if\s*\(\s*(\w+).isNull\s*\(\s*\)/) { my $var = $1; if (defined $varname{$var}) { s/if\s*\(\s*$var.isNull\s*\(\s*\)/if \(!$var\.isValid\(\)/; } } if (/if\s*\(\s*!(\w+).isNull\s*\(\s*\)/) { my $var = $1; if (defined $varname{$var}) { s/if\s*\(\s*!$var.isNull\s*\(\s*\)/if \($var\.isValid\(\)/; } } if (/(\w+)\->isDefault\s*\(/) { my $var = $1; if (defined $varname{$var}) { s/(\w+)\->isDefault/$var\.isDefault/; } } my $regexpMimeTypeForName = qr/ ^(\s*) # (1) Indentation (.*) # (2) before KMimeType::mimeType\s*\( (.*)$ # (3) afterreg /x; # /x Enables extended whitespace mode if (my ($indent, $before, $afterreg) = $_ =~ $regexpMimeTypeForName) { $_ = $indent . "QMimeDatabase db;\n"; $afterreg =~ s/,\s*KMimeType::ResolveAliases//; $_ .= $indent . $before . "db.mimeTypeForName(" . $afterreg . "\n"; $qmimedatabaseAdded = 1; } if (/(\w+)\->iconName\s*\(/) { my $var = $1; if (defined $varname{$var}) { s/(\w+)\->iconName/$var\.iconName/; } } if (/(\w+)\->comment\s*\(/) { my $var = $1; if (defined $varname{$var}) { s/(\w+)\->comment/$var\.comment/; } } if (/(\w+)\->patterns\s*\(/) { my $var = $1; if (defined $varname{$var}) { s/(\w+)\->patterns/$var\.globPatterns/; } } s/, KMimeType::DontResolveAlias//; my $regexFindByUrlArgs = qr/ ^(\s*) # (1) Indentation (.*) # (2) before KMimeType::findByUrl\s*\((.*),\s*0\s*,\s*true\s*\) # (3) argument (.*)$ # (3) afterreg /x; # /x Enables extended whitespace mode if (my ($indent, $before, $args, $afterreg) = $_ =~ $regexFindByUrlArgs) { my $addDataBase = ""; if (not defined $qmimedatabaseAdded) { $addDataBase = $indent . "QMimeDatabase db;\n"; } $_ = $addDataBase . $indent . "db.mimeTypeForFile($args.path(), QMimeDatabase::MatchExtension)" . "$afterreg" . "\n"; } if (/KMimeType::findByUrl\s*\(/) { if (not defined $qmimedatabaseAdded) { $_ = "QMimeDatabase db;\n" . $_; } s/KMimeType::findByUrl\s*\(/db.mimeTypeForUrl(/; } #KMimeType::Ptr mime = KMimeType::findByContent( body ); my $regexFindByContent = qr/ ^(\s*) # (1) Indentation (.*)\s+ # (2) before (\w+)\s*=\s* # (3) variable KMimeType::findByContent\s*\( (.*)$ # (4) afterreg /x; # /x Enables extended whitespace mode if (my ($indent, $before, $variable, $afterreg) = $_ =~ $regexFindByContent) { $varname{$variable} = 1; warn "variable $variable before :$before $_\n"; my $addDataBase; if (not defined $qmimedatabaseAdded) { $addDataBase = $indent . "QMimeDatabase db;\n"; } if (defined $addDataBase ) { $_ = $addDataBase . $_; } s/KMimeType::findByContent\s*\(/db.mimeTypeForData(/; } s/KMimeType::findByPath\s*\((.*),\s*0\s*,\s*true\s*\)/db.mimeTypeForFile($1, QMimeDatabase::MatchExtension)/; s/KMimeType::findByPath\s*\(/db.mimeTypeForFile(/; s/KMimeType::findByNameAndContent\s*\(/db.mimeTypeForFileNameAndData(/; s/KMimeType::findByFileContent\s*\(\s*(\w+)\s*\)/db.mimeTypeForFile($1, QMimeDatabase::MatchContent)/; s/(\w+)->name() == KMimeType::defaultMimeType/$1.isDefault/; s/allParentMimeTypes/allAncestors/; s/->name/\.name/ if (/\.mimeTypeFor/); s/KMimeType::extractKnownExtension/db.suffixForFileName/; s/KMimeType::allMimeTypes/db.allMimeTypes/; if (/(\w+)->name\s*\(\)/) { my $var = $1; if (defined $varname{$var}) { s/(\w+)\->name/$var\.name/; } } $modified ||= $orig ne $_; $_; } <$FILE>; if ($modified) { open (my $OUT, ">", $file); print $OUT @l; close ($OUT); functionUtilkde::removeIncludeInFile($file, "KMimeType"); functionUtilkde::removeIncludeInFile($file, "kmimetype.h"); functionUtilkde::addIncludeInFile($file, "QMimeDatabase"); functionUtilkde::addIncludeInFile($file, "QMimeType"); } } functionUtilkde::diffFile( "@ARGV" ); diff --git a/kf5/convert-to-cmake-automoc.pl b/kf5/convert-to-cmake-automoc.pl index d7d1899..894b9a0 100755 --- a/kf5/convert-to-cmake-automoc.pl +++ b/kf5/convert-to-cmake-automoc.pl @@ -1,71 +1,71 @@ #!/usr/bin/perl # Kevin Funk (2015) # # Attempts to remove lines such as '#include ".moc"' from cpp files # # Automoc conventions: # - include moc_.cpp <=> Q_OBJECT/Q_GADGET inside header # - include .moc <=> Q_OBJECT/Q_GADGET inside source file # # Additionally, if K_PLUGIN_FACTORY is used, we'll *have* to use '.moc' # # Now, if .moc is included, and the source file does *not* need a moc run, # CMake's automoc still performs a moc run and moc will give warnings, such as: # "foo.cpp:0: Note: No relevant classes found. No output generated." # => Remove the include in this case # -# More info: http://www.cmake.org/cmake/help/v3.0/manual/cmake-qt.7.html#automoc +# More info: https://cmake.org/cmake/help/v3.0/manual/cmake-qt.7.html#automoc # # Usual invocation: # find -iname "*.cpp" | xargs kde-dev-scripts/kf5/convert-to-cmake-automoc.pl use strict; use warnings; use File::Basename; use lib dirname($0); use functionUtilkde; foreach my $file (@ARGV) { open(my $FILE, "<", $file) or die "We can't open file $file:$!\n"; my $content = do { local $/; <$FILE> }; my $regexRequiresMoc = qr/(Q_OBJECT|Q_GADGET|K_PLUGIN_FACTORY|EXPORT_KONTACT_PLUGIN|_WITH_JSON|K_EXPORT_PLASMA_)/; my ($filenameWithoutExtension, $dirs,) = fileparse($file, qr/\.[^.]*/); # In some cases, we cannot just get rid off the '#include .moc', # and need to include '#include moc_.cpp' # E.g. in case when the moc-generated file needs information about a class declared inside the .cpp file # Try to detect these cases and include '#include moc_.cpp' instead my $headerFile = $dirs . "$filenameWithoutExtension.h"; my $requiresHeaderMocInclude = (-e $headerFile) ? `grep Q_PRIVATE_SLOT -q $headerFile` : 0; my $sourceMocFilename = "$filenameWithoutExtension.moc"; my $requiresSourceMocInclude = ($content =~ /$regexRequiresMoc/); my $includesSourceMocInclude = ($content =~ /#include \"$sourceMocFilename\"/); # rewrite file and fix moc includes if (!$requiresSourceMocInclude && $includesSourceMocInclude) { open (my $OUT, ">", $file); # TODO: Refactor, then use functionUtilkde::removeIncludeInFile? # Cannot use, because it a) only accepts <>-style includes and b) creates redundant newlines for (split /^/, $content) { # strip or replace unwanted includes if (/#include \"$sourceMocFilename\"/) { if ($requiresHeaderMocInclude) { print $OUT "#include \"moc_$filenameWithoutExtension.cpp\"\n"; } next; # remove line } print $OUT $_; } close ($OUT); } } functionUtilkde::diffFile( "@ARGV" ); diff --git a/kf5/resolve_kuit.py b/kf5/resolve_kuit.py index 7684bf8..c5b9d3f 100755 --- a/kf5/resolve_kuit.py +++ b/kf5/resolve_kuit.py @@ -1,1789 +1,1789 @@ #!/usr/bin/env python # -*- coding: UTF-8 -*- # Resolve KUIT markup in i18n strings into plain or rich text, # or switch them to xi18n calls. # # Usage: # resolve_kuit.py [OPTIONS] FILE_OR_DIRECTORY... # # By default, KUIT markup is resolved into plain or rich text. # To switch strings containing any KUIT markup to xi18n calls instead, # use -x option; to switch all strings to xi18n calls, use -X option. # For non-code files (.ui, .rc, etc.) -x behaves same like -X, # since there is no way to specify by string whether it is to be # passed through i18n or xi18n call at runtime. Instead this is specified # on the top level (per file, but normally for all such files in a project), # as described in the "Connecting Calls to Catalogs" section # of the ki18n Programmer's Guide. # # Files are modified in-place. Modified file paths are written to stdout. # If an argument is a directory, files from it are recursivelly collected. # Only files with known extensions are processed (even if file with unknown # extension is given directly in the command line, it will be ignored). # The list of known extensions by resolution type can be listed with # -k option. Option -s RESTYPE:EXT1[,EXT2...] can be used to register # additional extensions (without leading dot, case ignored) for given # resolution type. One extension may have several resolution types. # Files in version control bookkeeping directories are skipped. # # In C-like function call files (resolution type 'ccall'), # i18n strings are detected as arguments in calls with # *i18n, *i18nc, *i18np, and *i18ncp function names. # By default detection considers string arguments to be either single or # double quoted, call arguments can be split into several lines, and # strings are concatenated when separated only by whitespace. # Default set of quotes can be replaced by repeating the -q QUOTE option. # # In XML-like markup files (resolution type 'xml'), # i18n strings are detected as element texts, for a certain set of tags. # i18n contexts are detected as attributes to those elements, for a certain # set of attributes. These sets can be expanded using -T TAG1[,TAG2...] # and -A ATTR1[,ATTR2...] options. Case is ignored for both. # Markup inside the element text is expected to be XML-escaped (<, etc.), # i.e. the element text is first unescaped before resolution. # # In PO files (resolution type 'po'), i18n strings are detected # according to PO format. # To process PO files, the Pology library must be ready for use. # In msgstr fields, KUIT markup transformations for given language # are looked up in its kdelibs4.po. The pattern path to kdelibs4.po files, # which contains @lang@ placeholder, is given with -t PATTERN option. # This can be a local path or a HTTP URL (e.g. -# http://websvn.kde.org/*checkout*/trunk/l10n-kde4/@lang@/messages/kdelibs/kdelibs4.po ). +# https://websvn.kde.org/*checkout*/trunk/l10n-kde4/@lang@/messages/kdelibs/kdelibs4.po ). # Language of processed PO file is determined from its Language: header field. # If only PO files of one language are processed and they do not reliably # contain this field, the language can be forced with -l LANG option. # By default both the original and the translation fields are resolved, # which is appropriate when the PO file is being resolved before # it has been merged with new template resulting from the resolved code. # If an unresolved PO file has been merged with new template first, # then option -m should be issued to resolve only the translation fields. # In this case, on fuzzy messages, if previous original fields (which are # also resolved) and current original fields match after resolution, # the message is unfuzzied. # # For a given i18n string, the decision of whether to resolve KUIT markup # into plain or Qt rich text is made based on the context marker, # as described in KUIT documentation at -# http://techbase.kde.org/Development/Tutorials/Localization/i18n_Semantics . +# https://techbase.kde.org/Development/Tutorials/Localization/i18n_Semantics . # Target formats can also be manually specified for certain context markers # by repeating the -f option. E.g. -f @info:progress=rich would override # the default resolution into plain text for @info:progress i18n strings. # # NOTE: [INTERNAL] # If tags are added on rich text(see top_tag_res variable), # then resolution must not be run over already resolved files. # Context markers will remain but format modifiers will be removed from them, # which may cause further modification in the second run. # # NOTE: [INTERNAL] # If tags are simply removed (see numid_tag_res variable), # a warning is issued on each removal to do something manually with # its associated argument, e.g. wrap it in QString::number(). # It is probably best to look for tags and handle their arguments # before running the resolution. import locale import optparse import os import re import sys def main (): opars = optparse.OptionParser( usage="%prog FILE_OR_DIRECTORY...", description="Resolve KUIT markup in i18n strings. " "Files are recursively searched for, " "and modified in place. " "C-like i18n calls are looked for in all files, " "except in PO files which are specially treated. " "WARNING: Do not run twice over same files.") opars.add_option( "-x", dest="switch_to_xi18n", action="store_const", default=0, const=1, help="Instead of resolving markup, switch i18n calls having " "some markup to xi18n calls.") opars.add_option( "-X", dest="switch_to_xi18n", action="store_const", default=0, const=2, help="Instead of resolving markup, switch all i18n calls " "to xi18n calls.") opars.add_option( "-f", dest="formats", action="append", default=[], metavar="MARKER=FORMAT", help="Set resolution into given target format for " "strings with this context marker. " "Target format can be one of: plain, rich. " "Option can be repeated.") opars.add_option( "-q", dest="quotes", action="append", default=[], metavar="QUOTE", help="Set opening and closing quote for string arguments " "in '%s' resolution type. " "Default is single and double quote. " "Option can be repeated." % "ccall") opars.add_option( "-s", dest="add_restype_exts", action="append", default=[], metavar="RESTYPE:EXT1[,EXT2...]", help="Set additional file name extension for given resolution type. " "Option can be repeated.") opars.add_option( "-T", dest="add_xml_texttags", action="store", default=None, metavar="TAG1[,TAG2...]", help="Set additional tags from which to collect text " "in '%s' resolution type." % "xml") opars.add_option( "-A", dest="add_xml_ctxtattrs", action="store", default=None, metavar="ATTR1[,ATTR2...]", help="Set additional attributes to consider as containing " "context in '%s' resolution type." % "xml") opars.add_option( "-t", dest="kdelibs4_path_pattern", action="store", default=None, metavar="PATH_PATTERN", help="The path pattern to kdelibs4.po files, " "which contains @lang@ placeholder. " "It can be a local path or HTTP URL. " "Needed only when processing PO files.") opars.add_option( "-l", dest="kdelibs4_lang", action="store", default=None, metavar="LANG", help="The language code of translated text in processed PO files, " "if it cannot be determined reliably from PO headers. " "When this option is in effect, PO files of exactly " "one language of translation must be proceesed.") opars.add_option( "-w", dest="msgfmt_wrap", action="store_true", default=False, help="Apply Gettext tools wrapping to PO files after resolving them.") opars.add_option( "-m", dest="post_merge", action="store_true", default=False, help="Resolve only translation fields in PO files. " "This is to be used when PO file is being resolved " "after it has been merged with template resulting " "from resolved code.") opars.add_option( "-I", dest="interface_wrap", action="store", default=None, metavar="HEAD_SEP", help="[undocumented]", ) opars.add_option( "-k", dest="list_restypes", action="store_true", default=False, help="List known resolution types and associated file extensions. " "It will include additions by '%s' option." % "-s", ) options, args = opars.parse_args() # Set additional resolution types. for rtextspec in options.add_restype_exts: lst = rtextspec.split(":", 1) if len(lst) != 2: raise StandardError( "Resolution specification '%s' given in command line " "is not valid." % rtextspec) rt, extspec = lst if rt not in _map_restype_ext: raise StandardError( "Unknown resolution type '%s' in resolution specification '%s' " "given in command line." % (rt, rtextspec)) exts = [e.lower() for e in extspec.split(",")] _map_restype_ext[rt][0].update(exts) if options.list_restypes: for rt, (exts, rf, ons) in _map_restype_ext.items(): report("%s: %s" % (rt, " ".join(sorted(exts)))) exit(1) # Update target format by context marker specification. for fmtspec in options.formats: try: cmk, fmt = fmtspec.split("=", 1) except: raise StandardError( "Invalid target format specification '%s' " "given in command line." % fmtspec) if fmt not in _known_formats.values(): raise StandardError( "Unknown target format '%s' given in command line." % fmt) _cmarker_to_format[cmk] = fmt # Set KUIT resolving options. if options.kdelibs4_path_pattern: if "@lang@" not in options.kdelibs4_path_pattern: raise StandardError( "Path pattern for kdelibs4.po files given in command line " "does not contain %s placeholder." % "@lang@") _kuit_spec.kdelibs4_path_pattern = options.kdelibs4_path_pattern _kuit_spec.force_lang = options.kdelibs4_lang _kuit_spec.interface_wrap = options.interface_wrap # Set C-call resolving options. _ccall_options.switch_to_xi18n = options.switch_to_xi18n if options.quotes: squotes = list(reversed(sorted(options.quotes))) # longest first _ccall_options.quotes[:] = squotes # Set XML resolving options. _xml_options.switch_to_xi18n = options.switch_to_xi18n if options.add_xml_texttags: tags = options.add_xml_texttags.split(",") _xml_options.text_tags.update(tags) if options.add_xml_ctxtattrs: attrs = options.add_xml_ctxtattrs.split(",") _xml_options.ctxt_attrs[:0] = attrs # higher priority # Set PO resolving options. _po_options.switch_to_xi18n = options.switch_to_xi18n _po_options.msgfmt_wrap = options.msgfmt_wrap _po_options.post_merge = options.post_merge # Collect all files. file_paths = [] for path in args: if os.path.isdir(path): for root, dirns, filens in os.walk(path): for filen in filens: file_paths.append(os.path.join(root, filen)) elif os.path.isfile(path): file_paths.append(path) else: raise StandardError( "Command line argument '%s' is neither a file " "nor a directory." % path) # Filter out VCS bookkeeping. mod_file_paths = [] vcs_dirns = set(["CVS", ".svn", ".git"]) for fp in file_paths: els = set(fp.split(os.path.sep)) if not els.intersection(vcs_dirns): mod_file_paths.append(fp) file_paths = mod_file_paths # Resolve files. file_paths.sort() test_encs = ["utf8", "iso8859-1", "iso8859-15", "cp1252"] for fp in file_paths: rspecs = get_resolvers_for_file(fp) modified = False for restype, resolvef, onstring in rspecs: if onstring: fstr = open(fp, "rb").read() badpos = -1 for fenc in test_encs: try: fstr = fstr.decode(fenc) except UnicodeDecodeError, e: if badpos < 0: badpos = e.start else: badpos = -1 break if badpos < 0: res_fstr = resolvef(fstr, fp) if res_fstr != fstr: tmpfp = fp + "~tmp" fh = open(tmpfp, "wb") fh.write(res_fstr.encode("utf8")) fh.close() os.rename(tmpfp, fp) modified = True else: warning("%s: Cannot decode file using any of " "test encodings (UTF-8 try produces problem " "in line %d, column %d), skipping it." % (fp, lno_to(fstr, badpos), cno_to(fstr, badpos))) else: if resolvef(fp): modified = True if modified: report(fp) def report (msg): lenc = locale.getpreferredencoding() emsg = ("%s\n" % msg).encode(lenc) sys.stdout.write(emsg) def warning (msg): lenc = locale.getpreferredencoding() emsg = ("[warning] %s\n" % msg).encode(lenc) sys.stderr.write(emsg) class Data: pass _kuit_spec = Data() _kuit_spec.kdelibs4_path_pattern = None _kuit_spec.force_lang = None _kuit_spec.interface_wrap = None _kuit_spec.langdata = {} _space_in_place_tag_rx = re.compile(r"(<[^>]*\S)(/\s*>)", re.U | re.S) def get_language_data (lang): langdata = _kuit_spec.langdata.get(lang) if langdata: return langdata kl4cat = None if lang != "en_US": # Fetch kdelibs4.po for this catalog's language. if not _kuit_spec.kdelibs4_path_pattern: raise StandardError( "Path pattern for kdelibs4.po not set (-t option).") kl4path = _kuit_spec.kdelibs4_path_pattern.replace("@lang@", lang) from urllib import urlopen kl4fh = urlopen(kl4path) from pology.catalog import Catalog kl4cat = Catalog("kdelibs4.po", readfh=kl4fh) langdata = Data() langdata.transform = {} for spec in _kuit_transforms.items(): ktrkey, (msgctxt, msgid, subsmap, prepend, postpend, textmodf) = spec pattern = msgid if kl4cat is not None: msgs = kl4cat.select_by_key(msgctxt, msgid) if msgs and msgs[0].translated: pattern = msgs[0].msgstr[0] fmt = ktrkey[2] if fmt == "rich": # Add space before /> in in-place closed rich-text tags, # as Qt may fail to guess format as rich-text otherwise. pattern = _space_in_place_tag_rx.sub(r"\1 \2", pattern) tr = Data() tr.pattern = pattern tr.subsmap = subsmap tr.prepend = prepend tr.postpend = postpend tr.textmodf = textmodf langdata.transform[ktrkey] = tr langdata.shcdelim = {} for spec in _kuit_shortcut_delimiters.items(): fmt, (msgctxt, msgid) = spec delim = msgid if kl4cat is not None: msgs = kl4cat.select_by_key(msgctxt, msgid) if msgs and msgs[0].translated: delim = msgs[0].msgstr[0] langdata.shcdelim[fmt] = delim langdata.keyname = {} for spec in _kuit_key_names: msgctxt, msgid = spec keyname = msgid if kl4cat is not None: msgs = kl4cat.select_by_key(msgctxt, msgid) if msgs and msgs[0].translated: keyname = msgs[0].msgstr[0] langdata.keyname[msgid] = keyname langdata.guidelim = {} for spec in _kuit_guipath_delimiters.items(): fmt, (msgctxt, msgid) = spec delim = msgid if kl4cat is not None: msgs = kl4cat.select_by_key(msgctxt, msgid) if msgs and msgs[0].translated: delim = msgs[0].msgstr[0] langdata.guidelim[fmt] = delim langdata.ifacewrap = None if _kuit_spec.interface_wrap: langdata.ifacewrap = _kuit_spec.interface_wrap _kuit_spec.langdata[lang] = langdata return langdata def lno_to (fstr, p): lno = fstr.count("\n", 0, p) + 1 return lno def cno_to (fstr, p): pb = fstr.rfind("\n", 0, p) # If no \n found, -1 is exactly what's needed below. cno = p - pb return cno _ccall_options = Data() # Call specification. _ccall_options.calls = { # "callname": (ctxt_pos, text_pos, plural_pos) "i18n": (-1, 0, -1), "i18nc": (0, 1, -1), "i18np": (-1, 0, 1), "i18ncp": (0, 1, 2), "ki18n": (-1, 0, -1), "ki18nc": (0, 1, -1), "ki18np": (-1, 0, 1), "ki18ncp": (0, 1, 2), "I18N_NOOP": (-1, 0, -1), "I18N_NOOP2": (0, 1, -1), "I18N_NOOP2_NOSTRIP": (0, 1, -1), } # Equip with total number of strings. _ccall_options.calls = dict([(cn, inds + (len([i for i in inds if i >= 0]),)) for cn, inds in _ccall_options.calls.items()]) # Default string quotes (must be sorted from longest to shortest). _ccall_options.quotes = list(reversed(sorted([ "\"", "'", ]))) # To-EOL and delimited comments which may mingle with # concatenated string literals. _ccall_options.midcstr_eolcmnts = set([ "//", "#", ]) _ccall_options.midcstr_delimcmnts = set([ ("/*", "*/"), ]) _ccall_head_rx = re.compile(r"([\w\d_]+)\s*\(", re.U | re.S) _mask_chr = "\x04" _print_mask_chr = u"¬" def resolve_ccall (fstr, path): showparse = False if showparse: report("%s: >>>>> start >>>>>" % path) langdata = get_language_data("en_US") toxi18n = _ccall_options.switch_to_xi18n segs = [] p1 = 0 while True: m = _ccall_head_rx.search(fstr, p1) if not m: segs.append(fstr[p1:]) break p2, p3 = m.span() callname = m.group(1) callspec = _ccall_options.calls.get(callname) if callspec: ictxt, itext, iplural, total = callspec p1a = p3 argspecs = [] all_strings = True end_call = False for k in range(total): if showparse: report("%s:%d: iarg=%d spos=%d" % (path, lno_to(fstr, p1a), k, p1a)) ret = _parse_cstr(fstr, p1a, (",", ")"), path, _ccall_options.midcstr_eolcmnts, _ccall_options.midcstr_delimcmnts) if not ret: all_strings = False break p2a, msarg, quote, outs = ret argspecs.append((msarg, quote, outs)) p1a = p2a if outs[-1].endswith(")"): end_call = True break if len(argspecs) == total: if showparse: report("%s:%d: call=[%s]%s" % (path, lno_to(fstr, p3), callname, "".join("{%s||%s}" % (_ppmasked(s[0]), s[1]) for s in argspecs))) csegs = [] lno = lno_to(fstr, p3) mctxt = argspecs[ictxt][0] if ictxt >= 0 else None res_callname = None for iarg, (msarg, quote, outs) in enumerate(argspecs): if iarg != ictxt: ret = resolve_kuit(mctxt, msarg, quote, langdata, path, lno, toxi18n=toxi18n) res_mctxt, res_msarg, xi18n = ret[:3] if xi18n and not res_callname: if callname.startswith("i"): res_callname = "x" + callname elif callname.startswith("k"): res_callname = "kx" + callname[1:] res_sarg = _unmask(res_msarg, outs) csegs.append(res_sarg) else: csegs.append("") if not res_callname: res_callname = callname if ictxt >= 0: outs_ctxt = argspecs[ictxt][2] res_ctxt = _unmask(res_mctxt, outs_ctxt) csegs[ictxt] = res_ctxt if showparse: report("%s:%d: res-segs=%s" % (path, lno_to(fstr, p3), "".join("{%s}" % s for s in csegs))) segs.append(fstr[p1:p2]) segs.append(res_callname) segs.append(fstr[p2 + len(callname):p3]) segs.append("".join(csegs)) p3 = p1a elif all_strings and end_call: if showparse: report("%s:%d: bad-call" % (path, lno_to(fstr, p3))) warning("%s:%d: Too little string arguments to call " "(expected %d, got %d)." % (path, lno_to(fstr, p3), total, len(argspecs))) segs.append(fstr[p1:p3]) p3 = p1a else: if showparse: report("%s:%d: not-literal-call" % (path, lno_to(fstr, p3))) segs.append(fstr[p1:p3]) else: segs.append(fstr[p1:p3]) p1 = p3 res_fstr = "".join(segs) if showparse: report("%s: <<<<< end <<<<<" % path) return res_fstr def _ppmasked (s): return s.replace(_mask_chr, _print_mask_chr) def _unmask (ms, outs): segs = [] p1 = 0 io = 0 while True: p2 = ms.find(_mask_chr, p1) if p2 < 0: segs.append(ms[p1:]) break segs.append(ms[p1:p2]) segs.append(outs[io]) io += 1 p1 = p2 + len(_mask_chr) s = "".join(segs) return s def _parse_cstr (fstr, spos, ends, path=None, eolcmnts=[], delimcmnts=[]): showparse = False l = len(fstr) p = spos if showparse: report("parse-cstr-start %d" % p) segs = [] outs = [] quote = None while True: pp = p while p < l and fstr[p].isspace(): p += 1 segs.append(_mask_chr) outs.append(fstr[pp:p]) if p == l: break at_quote = False if quote is None: for q in _ccall_options.quotes: if fstr[p:p + len(q)] == q: at_quote = True quote = q lq = len(quote) break else: if fstr[p:p + lq] == quote: at_quote = True if at_quote: pp = p p += lq p = find_esc(fstr, quote, "\\", p) if p < 0: if path: warning("%s:%d: Unterminated string literal." % (path, lno_to(fstr, pp))) return None p += lq segs.append(fstr[pp:p]) if showparse: report("parse-cstr-quote-end %d" % p) continue at_end = False for end in ends: if fstr[p:p + len(end)] == end: pp = p p += len(end) at_end = True segs.append(_mask_chr) outs.append(fstr[pp:p]) if showparse: report("parse-cstr-end-end %d" % p) break if at_end: break cmnt_end = False for ec in eolcmnts: if fstr[p:p + len(ec)] == ec: pp = p p += len(ec) while p < l and fstr[p] != "\n": p += 1 if p < l: p += 1 cmnt_end = True segs.append(_mask_chr) outs.append(fstr[pp:p]) if showparse: report("parse-cstr-eol-cmnt-end %d" % p) break if cmnt_end: continue for dc1, dc2 in delimcmnts: if fstr[p:p + len(dc1)] == dc1: pp = p p += len(dc1) while p < l and fstr[p:p + len(dc2)] != dc2: p += 1 if p == l: warning("%s:%d: Unterminated comment." % (path, lno_to(fstr, pp))) return None p += len(dc2) cmnt_end = True segs.append(_mask_chr) outs.append(fstr[pp:p]) if showparse: report("parse-cstr-delim-cmnt-end %d" % p) break if cmnt_end: continue break if quote is None: return None mstr = "".join(segs) return p, mstr, quote, outs _xml_options = Data() # Default tags and attributes to extract from. # Ordering of attributes is significant, first found is taken as context. # According to extractrc from kdesdk/scripts/. _xml_options.text_tags = set([ "text", "title", "string", "whatsthis", "tooltip", "label", ]) _xml_options.ctxt_attrs = [ "context", "comment", ] _xml_rx = Data() _xml_rx.inited = False def _init_xml_regexes (): if _xml_rx.inited: return tagins = "|".join(sorted(_xml_options.text_tags)) rx = re.compile(r"<\s*(%s)\b([^>]*)>([^<]*)<\s*/\s*\1\s*>" % tagins, re.U | re.S | re.I) _xml_rx.i18n_el = rx attrins = "|".join(_xml_options.ctxt_attrs) rx = re.compile(r"""^(.*\b(?:%s)\s*=\s*['"])(.*?)(['"].*)$""" % attrins, re.U | re.S | re.I) _xml_rx.ctxt_attr = rx _xml_rx.inited = True def resolve_xml (fstr, path): showparse = False if showparse: report("%s: >>>>> start >>>>>" % path) _init_xml_regexes() langdata = get_language_data("en_US") toxi18n = _xml_options.switch_to_xi18n segs = [] p1 = 0 while True: m = _xml_rx.i18n_el.search(fstr, p1) if not m: segs.append(fstr[p1:]) break p2, p3 = m.span() lno = lno_to(fstr, p2) segs.append(fstr[p1:p2]) tag, attr_str, etext = m.groups() ctxt = None m = _xml_rx.ctxt_attr.search(attr_str) if m: attr_head, ectxt, attr_tail = m.groups() ctxt, noesc_ctxt = unescape_xml(ectxt, testnoesc=True) text, noesc_text = unescape_xml(etext, testnoesc=True) if showparse: if ctxt is not None: report("%s:%d: ctxt-text={%s}{%s}" % (path, lno, ectxt, etext)) else: report("%s:%d: text={%s}" % (path, lno, etext)) ret = resolve_kuit(ctxt, text, None, langdata, path, lno, toxi18n=toxi18n) res_ctxt, res_text = ret[:2] res_etext = escape_xml(res_text, noesc=noesc_text) if ctxt is not None: res_ectxt = escape_xml(res_ctxt, noesc=noesc_ctxt) seg = ("<%s%s%s%s>%s" % (tag, attr_head, res_ectxt, attr_tail, res_etext, tag)) else: seg = "<%s%s>%s" % (tag, attr_str, res_etext, tag) if showparse: if ctxt is not None: report("%s:%d: res-ctxt-text={%s}{%s}" % (path, lno, res_ectxt, res_etext)) else: report("%s:%d: res-text={%s}" % (path, lno, res_etext)) segs.append(seg) p1 = p3 res_fstr = "".join(segs) if showparse: report("%s: <<<<< end <<<<<" % path) return res_fstr _po_options = Data() _po_options.msgfmt_wrap = False def resolve_po (path): from pology.catalog import Catalog from pology.gtxtools import msgfilter cat = Catalog(path) langdata_src = get_language_data("en_US") lang = _kuit_spec.force_lang or cat.language() if not lang: raise StandardError( "%s: Cannot determine language of PO file." % path) langdata_trn = get_language_data(lang) toxi18n_global = _po_options.switch_to_xi18n seen_keys = set() for ind, msg in enumerate(cat): toxi18n = toxi18n_global # Override resolution setting by message xi18n flag. if "kde-kuit-format" in msg.flag: toxi18n = 2 # Original fields. ctxt = msg.msgctxt forcerich = False if not _po_options.post_merge: ret = resolve_kuit(ctxt, msg.msgid, None, langdata_src, path, msg.refline, toxi18n=toxi18n) msg.msgid = ret[1] if ctxt is not None: msg.msgctxt = ret[0] if msg.msgid_plural is not None: ret = resolve_kuit(ctxt, msg.msgid_plural, None, langdata_src, path, msg.refline, toxi18n=toxi18n) msg.msgid_plural = ret[1] else: # Check if to not touch existing KUIT or # to force rich text in non-original fields. if not forcerich: ret = resolve_kuit(ctxt, msg.msgid, None, langdata_src, path, msg.refline, toxi18n=toxi18n) has_any_html_tag, has_any_kuit_tag = ret[3:5] if has_any_kuit_tag: toxi18n = 2 else: forcerich = has_any_html_tag if not forcerich: ret = resolve_entities(msg.msgid, path, msg.refline) any_entity_resolved = ret[1] forcerich = any_entity_resolved # Previous original fields. ctxt_prev = msg.msgctxt_previous has_previous = False if msg.msgid_previous is not None: has_previous = True ret = resolve_kuit(ctxt_prev, msg.msgid_previous, None, langdata_src, path, msg.refline, toxi18n=toxi18n, forcerich=forcerich) msg.msgid_previous = ret[1] if ctxt_prev is not None: msg.msgctxt_previous = ret[0] if msg.msgid_plural_previous is not None: ret = resolve_kuit(ctxt_prev, msg.msgid_plural_previous, None, langdata_src, path, msg.refline, toxi18n=toxi18n, forcerich=forcerich) msg.msgid_plural_previous = ret[1] # Translation fields. ctxt_trn = ctxt if (not msg.fuzzy or not has_previous) else ctxt_prev for i in range(len(msg.msgstr)): ret = resolve_kuit(ctxt_trn, msg.msgstr[i], None, langdata_trn, path, msg.refline, toxi18n=toxi18n, forcerich=forcerich) msg.msgstr[i] = ret[1] if msg.translated: if msg.msgid.endswith("\n") and not msg.msgstr[i].endswith("\n"): msg.msgstr[i] += "\n" elif not msg.msgid.endswith("\n") and msg.msgstr[i].endswith("\n"): msg.msgstr[i] = msg.msgstr[i][:-1] # In post-merge mode, maybe it can be unfuzzied now. if _po_options.post_merge and msg.fuzzy and all(list(msg.msgstr)): if ( msg.msgctxt == msg.msgctxt_previous and msg.msgid == msg.msgid_previous and msg.msgid_plural == msg.msgid_plural_previous ): msg.unfuzzy() # Conversion may make a message with same key as a previous one, # remove the current message in that case. if msg.key in seen_keys: cat.remove_on_sync(ind) else: seen_keys.add(msg.key) modified = cat.sync() if modified and _po_options.msgfmt_wrap: msgfilter(["cat"])(cat.filename) return modified _map_restype_ext = { "ccall": (set([ "cpp", "cxx", "cc", "c", "h", "hpp", "hxx", "hh", "py", "js", "rb", "qml", #"kcfg", won't work due to XML escaping; but there is # no existing case of embedded i18n() with KUIT in KDE repos. ]), resolve_ccall, True), "xml": (set([ "ui", "rc", "kcfg", ]), resolve_xml, True), "po": (set([ "po", "pot", ]), resolve_po, False), } # Inverted resolution types by extension. _map_ext_restype = {} def _init_map_ext_restype (): if _map_ext_restype: return for rt, (exts, rf, ons) in _map_restype_ext.items(): for ext in exts: if ext not in _map_ext_restype: _map_ext_restype[ext] = [] _map_ext_restype[ext].append((rt, rf, ons)) def get_resolvers_for_file (path): _init_map_ext_restype() p = path.rfind(".") if p >= 0: ext = path[p + 1:] else: ext = "" rspecs = _map_ext_restype.get(ext, []) return rspecs # KUIT keyboard shortcut delimiters and lookup key in PO files, as # format: (msgctxt, msgid). # According to kuitsemantics.cpp from kdecore. _kuit_raw_shortcut_delimiter_rx = re.compile(r"\+|-", re.U) _kuit_shortcut_delimiters = { "plain": (u"shortcut-key-delimiter/plain", u"+"), "rich": (u"shortcut-key-delimiter/rich", u"+"), } # Add delimiters for term format, same as plain. _kuit_shortcut_delimiters["term"] = _kuit_shortcut_delimiters["plain"] # KUIT keyboard key names and lookup in PO files, # as set((msgctxt, msgid)). F%1 is special. _kuit_key_names_raw = set([ u"Alt", u"AltGr", u"Backspace", u"CapsLock", u"Control", u"Ctrl", u"Del", u"Delete", u"Down", u"End", u"Enter", u"Esc", u"Escape", u"Home", u"Hyper", u"Ins", u"Insert", u"Left", u"Menu", u"Meta", u"NumLock", u"PageDown", u"PageUp", u"PgDown", u"PgUp", u"PauseBreak", u"PrintScreen", u"PrtScr", u"Return", u"Right", u"ScrollLock", u"Shift", u"Space", u"Super", u"SysReq", u"Tab", u"Up", u"Win", u"F%1", ]) _kuit_key_names = set((u"keyboard-key-name", kn) for kn in _kuit_key_names_raw) def textmod_shortcut (text, quote, fmt, langdata): segs = [] p1 = 0 while True: m = _kuit_raw_shortcut_delimiter_rx.search(text, p1) if not m: keyname = text[p1:].strip() else: p2, p3 = m.span() keyname = text[p1:p2].strip() if keyname[:1] == "F" and keyname[1:].isdigit(): lkeypattern = langdata.keyname.get(u"F%1", u"F%1") lkeyname = lkeypattern.replace("%1", keyname[1:]) else: lkeyname = langdata.keyname.get(keyname, keyname) segs.append(lkeyname) if not m: break segs.append(langdata.shcdelim[fmt]) p1 = p3 res_text = "".join(segs) if quote: res_text = escape_c(res_text, quote) return res_text # KUIT UI path delimiters and lookup key in PO files, as # format: (msgctxt, msgid). # According to kuitsemantics.cpp from kdecore. _kuit_raw_guipath_delimiter_rx = re.compile(r"->", re.U) _kuit_guipath_delimiters = { "plain": (u"gui-path-delimiter/plain", u"→"), "rich": (u"gui-path-delimiter/rich", u"→"), } # Add delimiters for term format, same as plain. _kuit_guipath_delimiters["term"] = _kuit_guipath_delimiters["plain"] def textmod_interface (text, quote, fmt, langdata): segs = [] p1 = 0 while True: m = _kuit_raw_guipath_delimiter_rx.search(text, p1) if not m: pathel = text[p1:].strip() else: p2, p3 = m.span() pathel = text[p1:p2].strip() if langdata.ifacewrap: head, sep = langdata.ifacewrap[:-1], langdata.ifacewrap[-1:] pathel = "%s%s%s" % (head, pathel, sep) segs.append(pathel) if not m: break segs.append(langdata.guidelim[fmt]) p1 = p3 res_text = "".join(segs) if quote: res_text = escape_c(res_text, quote) return res_text # KUIT transformation patterns and lookup key in PO files, as # (tag, attributes, format): (msgctxt, msgid, subsmap, prepend, postpend, textmodf). # According to kuitsemantics.cpp from kdecore. _kuit_transforms = { (u"title", frozenset([]), "plain"): (u"@title/plain", u"== %1 ==", {"%1": "title"}, "", "\n", None), (u"title", frozenset([]), "rich"): (u"@title/rich", u"

%1

", {"%1": "title"}, "", "", None), (u"subtitle", frozenset([]), "plain"): (u"@subtitle/plain", u"~ %1 ~", {"%1": "subtitle"}, "", "\n", None), (u"subtitle", frozenset([]), "rich"): (u"@subtitle/rich", u"

%1

", {"%1": "subtitle"}, "", "", None), (u"para", frozenset([]), "plain"): (u"@para/plain", u"%1", {"%1": "para"}, "", "\n", None), (u"para", frozenset([]), "rich"): (u"@para/rich", u"

%1

", {"%1": "para"}, "", "", None), (u"list", frozenset([]), "plain"): (u"@list/plain", u"%1", {"%1": "list"}, "\n", "", None), (u"list", frozenset([]), "rich"): (u"@list/rich", u"
    %1
", {"%1": "list"}, "", "", None), (u"item", frozenset([]), "plain"): (u"@item/plain", u" * %1", {"%1": "item"}, "", "\n", None), (u"item", frozenset([]), "rich"): (u"@item/rich", u"
  • %1
  • ", {"%1": "item"}, "", "", None), (u"note", frozenset([]), "plain"): (u"@note/plain", u"Note: %1", {"%1": "note"}, "", "", None), (u"note", frozenset([]), "rich"): (u"@note/rich", u"Note: %1", {"%1": "note"}, "", "", None), (u"note", frozenset([u"label"]), "plain"): (u"@note-with-label/plain\n" u"%1 is the note label, %2 is the text", u"%1: %2", {"%1": "label", "%2": "note"}, "", "", None), (u"note", frozenset([u"label"]), "rich"): (u"@note-with-label/rich\n" u"%1 is the note label, %2 is the text", u"%1: %2", {"%1": "label", "%2": "note"}, "", "", None), (u"warning", frozenset([]), "plain"): (u"@warning/plain", u"WARNING: %1", {"%1": "warning"}, "", "", None), (u"warning", frozenset([]), "rich"): (u"@warning/rich", u"Warning: %1", {"%1": "warning"}, "", "", None), (u"warning", frozenset([u"label"]), "plain"): (u"@warning-with-label/plain\n" u"%1 is the warning label, %2 is the text", u"%1: %2", {"%1": "label", "%2": "warning"}, "", "", None), (u"warning", frozenset([u"label"]), "rich"): (u"@warning-with-label/rich\n" u"%1 is the warning label, %2 is the text", u"%1: %2", {"%1": "label", "%2": "warning"}, "", "", None), (u"link", frozenset([]), "plain"): (u"@link/plain", u"%1", {"%1": "link"}, "", "", None), (u"link", frozenset([]), "rich"): (u"@link/rich", u"%1", {"%1": "link"}, "", "", None), (u"link", frozenset([u"url"]), "plain"): (u"@link-with-description/plain\n" u"%1 is the URL, %2 is the descriptive text", u"%2 (%1)", {"%2": "link", "%1": "url"}, "", "", None), (u"link", frozenset([u"url"]), "rich"): (u"@link-with-description/rich\n" u"%1 is the URL, %2 is the descriptive text", u"%2", {"%2": "link", "%1": "url"}, "", "", None), (u"filename", frozenset([]), "plain"): (u"@filename/plain", u"‘%1’", {"%1": "filename"}, "", "", None), (u"filename", frozenset([]), "rich"): (u"@filename/rich", u"%1", {"%1": "filename"}, "", "", None), (u"application", frozenset([]), "plain"): (u"@application/plain", u"%1", {"%1": "application"}, "", "", None), (u"application", frozenset([]), "rich"): (u"@application/rich", u"%1", {"%1": "application"}, "", "", None), (u"command", frozenset([]), "plain"): (u"@command/plain", u"%1", {"%1": "command"}, "", "", None), (u"command", frozenset([]), "rich"): (u"@command/rich", u"%1", {"%1": "command"}, "", "", None), (u"command", frozenset([u"section"]), "plain"): (u"@command-with-section/plain\n" u"%1 is the command name, %2 is its man section", u"%1(%2)", {"%1": "command", "%2": "section"}, "", "", None), (u"command", frozenset([u"section"]), "rich"): (u"@command-with-section/rich\n" u"%1 is the command name, %2 is its man section", u"%1(%2)", {"%1": "command", "%2": "section"}, "", "", None), (u"resource", frozenset([]), "plain"): (u"@resource/plain", u"“%1”", {"%1": "resource"}, "", "", None), (u"resource", frozenset([]), "rich"): (u"@resource/rich", u"“%1”", {"%1": "resource"}, "", "", None), (u"icode", frozenset([]), "plain"): (u"@icode/plain", u"“%1”", {"%1": "icode"}, "", "", None), (u"icode", frozenset([]), "rich"): (u"@icode/rich", u"%1", {"%1": "icode"}, "", "", None), (u"bcode", frozenset([]), "plain"): (u"@bcode/plain", u"\n%1\n", {"%1": "bcode"}, "", "", None), (u"bcode", frozenset([]), "rich"): (u"@bcode/rich", u"
    %1
    ", {"%1": "bcode"}, "", "", None), (u"shortcut", frozenset([]), "plain"): (u"@shortcut/plain", u"%1", {"%1": "shortcut"}, "", "", textmod_shortcut), (u"shortcut", frozenset([]), "rich"): (u"@shortcut/rich", u"%1", {"%1": "shortcut"}, "", "", textmod_shortcut), (u"interface", frozenset([]), "plain"): (u"@interface/plain", u"|%1|", {"%1": "interface"}, "", "", textmod_interface), (u"interface", frozenset([]), "rich"): (u"@interface/rich", u"%1", {"%1": "interface"}, "", "", textmod_interface), (u"emphasis", frozenset([]), "plain"): (u"@emphasis/plain", u"*%1*", {"%1": "emphasis"}, "", "", None), (u"emphasis", frozenset([]), "rich"): (u"@emphasis/rich", u"%1", {"%1": "emphasis"}, "", "", None), (u"emphasis", frozenset([u"strong"]), "plain"): (u"@emphasis-strong/plain", u"**%1**", {"%1": "emphasis"}, "", "", None), (u"emphasis", frozenset([u"strong"]), "rich"): (u"@emphasis-strong/rich", u"%1", {"%1": "emphasis"}, "", "", None), (u"placeholder", frozenset([]), "plain"): (u"@placeholder/plain", u"<%1>", {"%1": "placeholder"}, "", "", None), (u"placeholder", frozenset([]), "rich"): (u"@placeholder/rich", u"<%1>", {"%1": "placeholder"}, "", "", None), (u"email", frozenset([]), "plain"): (u"@email/plain", u"<%1>", {"%1": "email"}, "", "", None), (u"email", frozenset([]), "rich"): (u"@email/rich", u"<%1>", {"%1": "email"}, "", "", None), (u"email", frozenset([u"address"]), "plain"): (u"@email-with-name/plain\n" u"%1 is name, %2 is address", u"%1 <%2>", {"%1": "email", "%2": "address"}, "", "", None), (u"email", frozenset([u"address"]), "rich"): (u"@email-with-name/rich\n" u"%1 is name, %2 is address", u"%1", {"%1": "email", "%2": "address"}, "", "", None), (u"envar", frozenset([]), "plain"): (u"@envar/plain", u"$%1", {"%1": "envar"}, "", "", None), (u"envar", frozenset([]), "rich"): (u"@envar/rich", u"$%1", {"%1": "envar"}, "", "", None), (u"message", frozenset([]), "plain"): (u"@message/plain", u"/%1/", {"%1": "message"}, "", "", None), (u"message", frozenset([]), "rich"): (u"@message/rich", u"%1", {"%1": "message"}, "", "", None), (u"nl", frozenset([]), "plain"): (u"@nl/plain", u"%1\n", {"%1": "nl"}, "", "", None), (u"nl", frozenset([]), "rich"): (u"@nl/rich", u"%1
    ", {"%1": "nl"}, "", "", None), } # Add patterns for term format, same as plain. for (tag, attrs, fmt), trspec in _kuit_transforms.items(): if fmt == "plain": _kuit_transforms[(tag, attrs, "term")] = trspec # Collect all known tags and formats. _kuit_tags = set() _known_formats = set() for (tag, attrs, fmt), trspec in _kuit_transforms.items(): _kuit_tags.add(tag) _known_formats.add(fmt) # Qt rich text tags (used for implicit determination of rich format). _html_tags = set([ "a", "address", "b", "big", "blockquote", "body", "br", "center", "cita", "code", "dd", "dfn", "div", "dl", "dt", "em", "font", "h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html", "i", "img", "kbd", "meta", "li", "nobr", "ol", "p", "pre", "qt", "s", "samp", "small", "span", "strong", "sup", "sub", "table", "tbody", "td", "tfoot", "th", "thead", "title", "tr", "tt", "u", "ul", "var", ]) # Default target formats by context marker. # According to kuitsemantics.cpp from kdecore. _cmarker_to_format = { "@action": "plain", "@title": "plain", "@label": "plain", "@option": "plain", "@item": "plain", "@info": "rich", "@info:progress": "plain", "@info:status": "plain", "@info:credit": "plain", "@info:shell": "plain", } _top_tag_rx = re.compile(r"<\s*(qt|html)\b[^>]*>(.*)<\s*/\s*qt\s*>", re.U | re.S | re.I) def resolve_kuit (ctxt, text, quote, langdata, path, lno, toxi18n=0, forcerich=False): xi18n = False fmt_cm, fmt_rc, res_ctxt, has_cmarker = format_from_cmarker(ctxt, quote) if forcerich: fmt_cm = "rich" fmt_rc = "rich" if fmt_cm and fmt_cm not in _known_formats: warning("%s:%d: Unknown format modifier '%s' in context marker. " "The string will not be resolved until this is fixed." % (path, lno, fmt_cm)) has_any_html_tag = False has_any_kuit_tag = False return ctxt, text, xi18n, has_any_html_tag, has_any_kuit_tag if toxi18n in (1, 2) and fmt_cm != fmt_rc and not path.endswith(".po"): warning("%s:%d: Manual format modifier '%s' does not match " "the implicit format modifier '%s' based on context marker. " "Manual format modifiers are no longer supported, " "replace them with another format selection method." % (path, lno, fmt_cm, fmt_rc)) # Recover original context with modifier still inside. res_ctxt = ctxt fmt = fmt_cm or format_from_tags(text, quote) or "plain" ret = _resolve_kuit_r(text, quote, fmt, langdata, path, lno) res_text, has_any_kuit_tag, has_any_html_tag, has_top_tag = ret if (toxi18n == 1 and has_any_kuit_tag) or toxi18n == 2: if has_any_html_tag: warning("%s:%d: Mixed KUIT and HTML tags. " "This should be changed to all-KUIT tags." % (path, lno)) xi18n = True return res_ctxt, text, xi18n, has_any_html_tag, has_any_kuit_tag if fmt_cm != "rich" and not has_any_html_tag: ret = resolve_entities(res_text, path, lno) res_text, any_entity_resolved = ret else: any_entity_resolved = False if not has_cmarker and not has_any_kuit_tag and not any_entity_resolved: # In this case the resolution should have been no-op, # so return the original input just in case. return ctxt, text, xi18n, has_any_html_tag, has_any_kuit_tag if has_top_tag or fmt_cm == "rich": # What to do with top tag in rich text. # 0 - As in KUIT processing in kdecore. But this would cause # tags to appear in otherwise plain text which happens # to be sent to rich-text capable output. People may not like it. # (It would also cause that running resolution over already # resolved files leads to spurious additon of tags, # e.g. 1st resolution @info/plain -> @info and no tag, # 2nd resolution @info -> @info and tag.) # 1 - Original top tag is removed and then tag added only if # there is another tag or entity in the text. # 2 - Top tag is neither added nor removed, but left as it is # in the literal text. top_tag_res = 2 if top_tag_res in (0, 1): if has_top_tag: res_text = _top_tag_rx.sub(r"\2", res_text) if top_tag_res == 0 or ("<" in res_text or "&" in res_text): p1 = 0 p2 = len(res_text) if quote: p1 = res_text.find(quote) + len(quote) p2 = res_text.rfind(quote) res_text = ("%s%s%s" % (res_text[:p1], res_text[p1:p2], res_text[p2:])) elif top_tag_res == 2: pass else: raise StandardError( "Unknown top tag resolution choice '%d'." % top_tag_res) return res_ctxt, res_text, xi18n, has_any_html_tag, has_any_kuit_tag _element_rx = re.compile(r"<\s*(\w+)(?:([^>]*)>(.*?)<\s*/\s*\1|\s*/)\s*>", re.U | re.S) _attribute_rx = re.compile(r"""\b(\w+)\s*=\s*["'](.*?)["']""") def _resolve_kuit_r (text, quote, fmt, langdata, path, lno): segs = [] p1 = 0 has_any_kuit_tag = False has_any_html_tag = False has_top_tag = False while True: m = _element_rx.search(text, p1) if not m: segs.append(text[p1:]) break p2, p3 = m.span() segs.append(text[p1:p2]) tag, attrstr, etext = m.groups() if etext is None: in_place = True attrstr, etext = "", "" else: in_place = False ret = _resolve_kuit_r(etext, quote, fmt, langdata, path, lno) res_etext, has_any_kuit_tag_1, has_any_html_tag_1, has_top_tag_1 = ret has_any_html_tag = has_any_html_tag or has_any_html_tag_1 has_any_kuit_tag = has_any_kuit_tag or has_any_kuit_tag_1 res_span = text[p2:p3] # in case no other resolution if tag in _kuit_tags: has_any_kuit_tag = True attrmap = dict(_attribute_rx.findall(attrstr)) has_top_tag = has_top_tag or has_top_tag_1 trkey = (tag, frozenset(attrmap.keys()), fmt) tr = langdata.transform.get(trkey) if tr is not None: if tr.textmodf: res_etext = tr.textmodf(res_etext, quote, fmt, langdata) res_span = tr.pattern if quote: res_span = escape_c(res_span, quote) replmap = attrmap replmap[tag] = res_etext # Replace in one pass, because replacement might contain %N. p1a = 0 csegs = [] seen_pls = set() while True: p2a = res_span.find("%", p1a) if p2a < 0: csegs.append(res_span[p1a:]) break csegs.append(res_span[p1a:p2a]) if res_span[p2a + 1:p2a + 2].isdigit(): pl = res_span[p2a:p2a + 2] nm = tr.subsmap[pl] cseg = replmap[nm] # cannot fail if quote and pl in seen_pls: # If placeholder was already replaced once, # further replacements have to eliminate # masking chars and quotes, because # total number of masking chars must not change. cseg = join_quoted(cseg, quote, invert=True, strip=True) seen_pls.add(pl) csegs.append(cseg) p1a = p2a + 2 else: csegs.append("%") p1a = p2a + 1 res_span = "".join(csegs) res_span = tr.prepend + res_span + tr.postpend else: warning("%s:%d: No transformation for tag '%s' and format '%s'." % (path, lno, tag, fmt)) elif tag == "numid": has_any_kuit_tag = True # What to do with numid tag. # 0 - Simply remove numid tag, with a warning to manually convert # associated argument into digit string. # 1 - Modify all placeholders in the text wrapped with numid # to %I form, which indicates numeric identifier formatting. numid_tag_res = 0 if numid_tag_res == 0: if not path.endswith((".po", ".pot")): warning("%s:%d: A '%s' tag has been removed, do something " "manually with the affected argument " "(e.g. wrap it in QString::number())." % (path, lno, tag)) res_span = res_etext elif numid_tag_res == 1: nisegs = [] p1b = 0 while True: p2b = res_etext.find("%", p1b) if p2b < 0: nisegs.append(res_etext[p1b:]) break nisegs.append(res_etext[p1b:p2b]) if res_etext[p2b + 1:p2b + 2].isdigit(): p3b = p2b + 1 while p3b < len(res_etext) and res_etext[p3b].isdigit(): p3b += 1 nisegs.append("%I" + res_etext[p2b + 1:p3b]) p1b = p3b else: nisegs.append("%") p1b += 1 res_span = "".join(nisegs) else: raise StandardError( "Unknown '%s' tag resolution choice '%d'." % ("numid", numid_tag_res)) elif tag in _html_tags: has_any_html_tag = True if tag.lower() in ("qt", "html"): has_top_tag = True if not in_place: res_span = "<%s%s>%s" % (tag, attrstr, res_etext, tag) segs.append(res_span) p1 = p3 res_text = "".join(segs) return res_text, has_any_kuit_tag, has_any_html_tag, has_top_tag _entity_rx = re.compile(r"&([a-z]+|#[0-9]+|#x[0-9a-fA-F]+);", re.U | re.S) _xml_entities = { "lt": "<", "gt": ">", "amp": "&", "apos": "'", "quot": "\"", } def resolve_entities (text, path, lno): any_entity_resolved = False segs = [] p1 = 0 while True: m = _entity_rx.search(text, p1) if not m: segs.append(text[p1:]) break p2, p3 = m.span() segs.append(text[p1:p2]) span = text[p2:p3] ent = m.group(1) if ent.startswith("#"): # numeric character try: if ent[1] == "x": c = unichr(int(ent[2:], 16)) else: c = unichr(int(ent[1:], 10)) except: warning("%s:%d: Invalid numeric XML entity '%s'." % (path, lno, ent)) segs.append(c) any_entity_resolved = True elif ent in _xml_entities: segs.append(_xml_entities[ent]) any_entity_resolved = True else: # Don't warn, may be some HTML entity. segs.append(span) p1 = p3 res_text = "".join(segs) return res_text, any_entity_resolved _cmarker_rx = re.compile(r"@(\w+):?(\w+)?/?(\w+)?", re.U | re.S) def format_from_cmarker (ctxt, quote): fmt = None fmt_rc = None res_ctxt = ctxt has_cmarker = False if ctxt is not None: p1 = 0 if quote: p1 = ctxt.find(quote) + len(quote) m = _cmarker_rx.match(ctxt, p1) if m: has_cmarker = True role, cue, fmt = m.groups() if role and cue: # implicit format by role and cue fmt_rc = _cmarker_to_format.get("@%s:%s" % (role, cue)) if not fmt_rc: # implicit format by role alone fmt_rc = _cmarker_to_format.get("@%s" % role) if fmt: # explicit format modifier p2 = ctxt.find("/", p1) res_ctxt = ctxt[:p2] + ctxt[p2 + 1 + len(fmt):] else: fmt = fmt_rc return fmt, fmt_rc, res_ctxt, has_cmarker _opentag_rx = re.compile(r"<\s*(\w+)[^>]*>", re.U | re.S) def format_from_tags (text, quote): fmt = None for tag in _opentag_rx.findall(text): if tag in _html_tags: fmt = "rich" break return fmt def escape_c (text, quote): text = text.replace("\\", "\\\\") # must be first if quote: text = text.replace(quote, "\\" + quote) text = text.replace("\t", "\\t") text = text.replace("\n", "\\n") return text def join_quoted (s, quote, invert=False, strip=False): segs1 = [] segs2 = [] p1 = 0 l = len(s) lq = len(quote) while True: p2 = find_esc(s, quote, "\\", p1) if p2 < 0: segs2.append(s[p1:]) break segs2.append(s[p1:p2]) p2 += len(quote) p3 = find_skip_esc(s, quote, "\\", p2) if p3 < 0: raise StandardError( "Malformed concatenated string literal '%s'." % s) segs1.append(s[p2:p3]) p1 = p3 + len(quote) js1 = "".join(segs1) js2 = "".join(segs2) js = js1 if not invert else js2 if not strip: js = quote + js + quote return js def find_esc (s, f, e, p=0): ls = len(s) le = len(e) while p < ls: if s.startswith(e, p): p += le + 1 elif s.startswith(f, p): break else: p += 1 if p >= ls: p = -1 return p _xml_entities_escape_ordered = [ ("&", "&"), # must be first ("<", "<"), (">", ">"), ("\"", """), ("'", "'"), ] _xml_entities_unescape_ordered = [ tuple(reversed(x)) for x in reversed(_xml_entities_escape_ordered)] def unescape_xml (es, testnoesc=False): s = es if testnoesc: noesc = set() for ent, val in _xml_entities_unescape_ordered: if testnoesc: p = s.find(val) if p >= 0 and not s.startswith(ent, p): # for & -> & noesc.add(ent) s = s.replace(ent, val) if testnoesc: return s, noesc else: return s def escape_xml (s, noesc=None): es = s for val, ent in _xml_entities_escape_ordered: if not noesc or ent not in noesc: es = es.replace(val, ent) return es if __name__ == "__main__": main() diff --git a/relicensecheck.pl b/relicensecheck.pl index d961ee8..f6004ea 100755 --- a/relicensecheck.pl +++ b/relicensecheck.pl @@ -1,697 +1,697 @@ #!/usr/bin/perl -w # vim:sw=4:et # (c) Dirk Mueller. GPLv2+ # I would love to be a python script, but os.popen just sucks use strict; use List::Util qw(any); ### Please add your KDE (svn/git) account name in *alphabetical* order to the list ### below, then answer the following questions: ### ### 1. Include 'gplv23' if you are okay with contributions you've made under ### "GPLv2" being relicensed as "GPLv2 or GPLv3". ### ### 2. Include 'lgplv23' if you are okay with contributions you've made under ### "LGPLv2" being relicensed as "LGPLv2 or LGPLv3". ### ### 3. Include 'gplv2+' if you are okay with contributions you've made under ### "GPLv2" being relicensed as "GPLv2 or later". ### ### 4. Include 'lgplv2+' if you are okay with contributions you've made under ### "LGPLv2" being relicensed as "LGPLv2 or later". ### ### 5. Include '+eV' if you are okay with the KDE e.V. deciding on a future ### licensing change to your code if necessary. ### ### 5. Include 'CCBYSA4+' if you are okay with contributions you've made under ### "GNU FDL" being relicensed as "Creative Commons Attribution-ShareAlike 4.0 International". -### For more information, see http://techbase.kde.org/Projects/KDE_Relicensing +### For more information, see https://community.kde.org/Guidelines_and_HOWTOs/Relicensing/KDE_Relicensing my %license_table = ( 'acrouthamel' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'aheinecke' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'arichardson' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'athurhfree' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'cgerloff' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'davidre' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'emmanuelp' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'flherne' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'huoni' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'jpoelen' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'jriddell' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'kezik' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'kleag' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', 'CCBYSA4' ], 'lnj' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'ltoscano' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'lueck' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'meven' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'michelh' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'muhlenpfordt' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'ngraham' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'nicolasfella' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'notmart' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'ostroffjh' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'rkflx' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'safaalfulaij' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'sitter' => ['CCBYSA4'], 'sredman' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'sstjames' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'thomassc' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ], 'vladz' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV', 'CCBYSA4' ] ); my %old_license_table_2 = ( # From before CCBYSA4 was added, if you get an update for one of these people move it to %license_table 'aacid' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'abryant' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'adawit' => ['gplv23', 'lgplv23', '+eV' ], 'ademko' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'adiaferia' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'afiestas' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'alexmerry' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'alund' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'amantia' => ['gplv23', 'lgplv23' , '+eV' ], 'amth' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'antlarr' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'apol' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'arnolddumas' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'asensi' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'aseigo' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'asserhal' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'beaulen' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'bensi' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'beschow' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'bgupta' => [ 'gplv2+', 'lgplv2+', '+eV' ], 'bhards' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'bieker' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'bischoff' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'bks' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'blackie' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'bport' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'bram' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'broulik' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'bruggie' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'bshah' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'capel' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'carewolf' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'cfeck' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'cgiboudeaux' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'chani' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'chehrlic' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'clee' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'cmollekopf' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'coates' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'codrea' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'cordlandwehr' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'craig' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'cramblitt' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'cschumac' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'ctennis' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'cullmann' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'dakon' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'danimo' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'dannya' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'deller' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'denis' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'deniskuplyakov'=> ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'dfaure' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'dhaumann' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'dherberth' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'domi' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'djarvie' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'dyp' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'dvratil' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'egorov' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'ehamberg' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'eliasp' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'elvisangelaccio'=>['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'epignet' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'ereslibre' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'eros' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'ervin' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'eschepers' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', ], 'eva' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'fabiank' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'fawcett' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'fengchao' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'fischer' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'fizz' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'flocati' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'fujioka' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'fux' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'garbanzo' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'gateau' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'geralds' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'giannaros' => ['gplv23', 'lgplv23' ], 'gioele' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'graesslin' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'granroth' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'gregormi' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'groszdaniel' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'grulich' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'guymaurel' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'haeber' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'haeckel' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', ], 'harris' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'hausmann' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'heikobecker' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', ], 'hdhoang' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'hindenburg' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'hoelzer' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'hrvojes' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'hubner' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'huerlimann' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'huftis' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'ilic' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'ikomissarov' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'ivan' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'jbrouault' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'jehrichs' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'jekyllwu' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'jlee' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'johnflux' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'jones' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'jowenn' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'jschroeder' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'jtamate' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'kainhofe' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'kfunk' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'kloecker' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'knight' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'knauss' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'kossebau' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'kylafas' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'lbeltrame' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'leinir' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'leonh' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'leonhard' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'lilachaze' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'lliehu' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'lvsouza' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'lypanov' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'majewsky' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'mardelle' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'martyn' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'mbritton' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'mbroadst' => ['gplv23', 'lgplv23' , '+eV' ], 'mecir' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'michaelhowell' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'michalhumpula' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'milliams' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'mirko' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'mkoller' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'mlaurent' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'mludwig' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'mmrozowski' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'mpyne' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'mssola' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'mueller' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'mwolff' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'nalvarez' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'narvaez' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'nhasan' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'nikitas' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'nsams' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'ogoffart' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'orcsik' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'palant' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'palimaka' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'pdamsten' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'peifengyu' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'pgquiles' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'pino' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'pletourn' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'pupeno' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'raabe' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'rahn' => ['gplv23', 'lgplv23' , '+eV' ], 'ralfjung' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'ralsina' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'rdale' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'reiher' => ['gplv23', 'lgplv23', '+eV' ], 'rich' => ['gplv23', 'lgplv23' , '+eV' ], 'richih' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'rkcosta' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'robbilla' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'romariorios' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'rpreukschas' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'rthomsen' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'ruedigergad' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'sanders' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'sandsmark' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'sars' => ['gplv23', 'lgplv23', '+eV' ], 'saschpe' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'savernik' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'scarpino' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'schmeisser' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'schroder' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'schwarzer' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'sebas' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'skelly' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'smartins' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'sping' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'staikos' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'staniek' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'sune' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'taj' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'tenharmsel' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'teske' => ['gplv23', 'lgplv23', ], 'tfry' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'thiago' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'tjansen' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'tmcguire' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'tnyblom' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'treat' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'turbov' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'uga' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'uwolfer' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'vandenoever' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'vhanda' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'vitters' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'vkrause' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'vonreth' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'vrusu' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'waba' => ['gplv23', 'lgplv23', '+eV' ], 'wheeler' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'whiting' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'willy' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'woebbe' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'wstephens' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'zack' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ], 'zecke' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+', '+eV' ] # Do not add anything here. Use license_table instead. ); my %old_license_table = ( ### below is the older table -- from before we offered the +eV option. ### This means that in theory some of these contributors might accept ### to add the +eV if we ask them nicely. If they refuse, move the line ### to the above part of the table so that we don't ask them again. 'adridg' => ['gplv23', 'lgplv23' ], 'ahartmetz' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'annma' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'apaku' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'arendjr' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'aumuell' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'bbroeksema' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'binner' => ['gplv23', 'lgplv23' ], 'bjacob' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'bmeyer' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'boemann' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'borgese' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'braxton' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'bvirlet' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'cartman' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'cconnell' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'charles' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'cies' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'cniehaus' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'coolo' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'cschlaeg' => ['gplv23', 'lgplv23' ], 'dimsuz' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'djurban' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'dmacvicar' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'dymo' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'edghill' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'emmott' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'espen' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'fela' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'fredrik' => ['gplv23', 'lgplv23' ], 'gladhorn' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'gogolok' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'goossens' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'gyurco' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'harald' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'hedlund' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'helio' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'howells' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'hschaefer' => ['gplv23' ], 'ingwa' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'isaac' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'jens' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'jlayt' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'johach' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'krake' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'laidig' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'lunakl' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'lure' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'marchand' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'mattr' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'mcamen' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'menard' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'mfranz' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'mhunter' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'micron' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'mkretz' => ['gplv23', 'lgplv23' ], 'mlarouche' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'mm' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'mrudolf' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'msoeken' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'mstocker' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'mutz' => [ 'gplv2+', 'lgplv2+' ], 'mvaldenegro' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'mwoehlke' => ['gplv23', 'lgplv23' ], 'nielsslot' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'okellogg' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'onurf' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'orzel' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'ossi' => [ 'gplv2+', 'lgplv2+' ], 'osterfeld' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'pfeiffer' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'piacentini' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'pitagora' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'ppenz' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'pstirnweiss' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'putzer' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'pvicente' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'quique' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'raggi' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'rempt' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'rjarosz' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'rodda' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'roffet' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'rohanpm' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'sebsauer' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'shaforo' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'shipley' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'silberstorff' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'thorbenk' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'tilladam' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'tokoe' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'toma' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'troeder' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'trueg' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], # NOTE: except k3b 'wgreven' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'winterz' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'zachmann' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ], 'zander' => ['gplv23', 'lgplv23', 'gplv2+', 'lgplv2+' ] # Do not add anything here. Use license_table instead. ); my %secondary_mail_addresses = ( '13thguards@gmail.com' => 'isemenov', 'ABBAPOH@me.com' => 'ikomissarov', 'aetf@unlimitedcodeworks.xyz' => 'peifengyu', 'aleixpol@kde.org' => 'apol', 'asmond@gmx.com' => 'asensi', 'athurh@gmail.com' => 'athurhfree', 'bero@arklinux.org' => 'bero', 'caleb@gentoo.org' => 'ctennis', 'christoph@maxiom.de' => 'cfeck', 'devel@the-user.org' => 'schmidt-domine', 'devriese@kde.org' => 'domi', 'ervin+bluesystems@kde.org' => 'ervin', 'faure+bluesystems@kde.org' => 'dfaure', 'git@the-user.org' => 'schmidt-domine', 'guy.maurel@kde.org' => 'guymaurel', 'john@layt.net' => 'jlayt', 'kde@randomguy3.me.uk' => 'alexmerry', 'kde@rusu.info' => 'valir', 'keziolio123@gmail.com' => 'kezik', 'kubito@gmail.com' => 'rkcosta', 'lamarque@gmail.com' => 'lvsouza', 'malte@kde.org' => 'malte', 'mk-lists@email.de' => 'kaning', 'neoclust.kde@free.fr' => 'nlecureuil', 'olav@vitters.nl' => 'vitters', 'richard@goteborg.utfors.se' => 'larkang', 'sune@vuorela.dk' => 'sune', 'thb@net-bembel.de' => 'tbaumgart', 'trev@adblockplus.org' => 'palant', 'tsdgeos@terra.es' => 'aacid', ); my %ruletable; my %blacklist; my %whitelist; my %unknown_authors; my @blacklist_revs; foreach my $who (keys %old_license_table) { die "$who in both tables" if defined $license_table{$who}; $license_table{$who} = $old_license_table{$who}; } foreach my $who (keys %old_license_table_2) { die "$who in both tables" if defined $license_table{$who}; $license_table{$who} = $old_license_table_2{$who}; } foreach my $who (keys %license_table) { foreach my $license(@{$license_table{$who}}) { $ruletable{$license}->{$who} = 1; } } # Read kde-common/accounts for email->name mapping. my $configfile = $ENV{HOME}. "/.config/KDE/relicensecheck.conf"; open(CONFIG, $configfile) or die "Please write the path to kde-common/accounts in $configfile"; my $accountfile; while () { if (not /^#/) { chomp; $accountfile = $_; } } close CONFIG; defined $accountfile or die "Please write the path to kde-common/accounts in $configfile"; my %authors = (); my %authornames = (); sub parseAccountsFile($) { my ($accountfile) = @_; open(ACCOUNTS, $accountfile) || die "Account file not found: $accountfile"; while () { # The format is nick name email. if (/([^\s]*)\s+([^\s].*[^\s])\s+([^\s]+)/) { $authors{$3} = "$1"; $authornames{$1} = "$2"; } #elsif (/([^\s]*)\s+([^\s]*)/) { # $authors{$1} = $2; #} else { die "$accountfile: couldn't parse $_"; } } close ACCOUNTS; } if ($accountfile) { parseAccountsFile($accountfile); # Also read the "disabled accounts" file my $disabledaccountsfile = $accountfile; $disabledaccountsfile =~ s/accounts$/disabled-accounts/; die "I expected this to end with 'accounts': $accountfile" if ($accountfile eq $disabledaccountsfile); parseAccountsFile($disabledaccountsfile); } sub resolveEmail($) { my ($email) = @_; my $resolved = $authors{$email}; if (not defined $resolved) { $resolved = $secondary_mail_addresses{$email}; } if (not defined $resolved) { $unknown_authors{$email} = 1; return $email; } return $resolved; } sub skipCommitByAuthor($) { my ($author) = @_; return ($author eq "scripty" or $author eq "(no" or $author eq "nobody\@localhost" or $author eq "not.committed.yet" or $author eq "null\@kde.org"); } sub usage() { print << "EOM"; Usage: relicensecheck.pl file Output information on relicensing possibilities for relicensecheck.pl -g relicensecheck.pl --generate-wiki Generate the table for the wiki page EOM } my $generate_wiki = 0; my @arguments; sub parse_arguments(@) { while (scalar @_) { my $arg = shift @_; if ($arg eq "-g" || $arg eq "--generate-wiki") { $generate_wiki = 1; } elsif ($arg eq "-?" || $arg eq "--?" || $arg eq "-h" || $arg eq "--help") { usage(); exit 0; } elsif ($arg eq "--") { push @arguments, @_; return; } else { push @arguments, $arg; } } } parse_arguments(@ARGV); if ($generate_wiki) { print "{| border=\"1\"\n"; print "! Name !! GPLv2->GPLv2+ !! LGPLv2 -> LGPLv2+ !! GPLv2 -> GPLv2+v3 !! LGPLv2 -> LGPLv2+LGPLv3 !! KDE e.V. decides !! FDL -> CC-BY-SA 4.0 \n"; print "|-\n"; my @lines = (); foreach my $who (keys %license_table) { if (!defined $authornames{$who}) { die "ERROR: unknown author $who\n"; } # Example: print "|Adam, Till || YES || YES || YES || YES || NO\n"; my @licenses = @{$license_table{$who}}; my %licensesHash = map { $_ => 1 } @licenses; my $gplv23 = exists($licensesHash{'gplv23'}) ? "YES" : "NO"; my $lgplv23 = exists($licensesHash{'lgplv23'}) ? "YES" : "NO"; my $gplv2plus = exists($licensesHash{'gplv2+'}) ? "YES" : "NO"; my $lgplv2plus = exists($licensesHash{'lgplv2+'}) ? "YES" : "NO"; my $eV = exists($licensesHash{'+eV'}) ? "YES" : "NO"; $eV = "" if (exists $old_license_table{$who}); my $ccbysa4 = exists($licensesHash{'CCBYSA4'}) ? "YES" : "NO"; $ccbysa4 = "" if (exists $old_license_table_2{$who} || exists $old_license_table{$who}); push @lines, "|$authornames{$who} || $gplv2plus || $lgplv2plus || $gplv23 || $lgplv23 || $eV || $ccbysa4\n"; } use locale; foreach my $line (sort @lines) { print $line; print "|-\n"; } print "|}\n"; exit 0; } my $file = $arguments[0] || ""; die "need existing file: $file" if (! -r $file); my $svn = (-d ".svn"); if ($svn) { open(IN, "-|") || exec 'svn', 'log', '-q', $file; } else { # Format the git output to match the format of svn log. open(IN, "-|") || exec 'git', 'log', '--follow', '--abbrev-commit', '--pretty=format:r%h | %ae ', $file; } while() { if (/^r(\S+) \| (\S+) /) { my ($rev, $author) = ($1, $2); #print STDERR "rev=$rev author=$author\n"; next if skipCommitByAuthor($author); if (not $svn) { # Resolve email to account name $author = resolveEmail($author); } foreach my $license(keys %ruletable) { if (!defined($ruletable{$license}->{$author})) { push(@{$blacklist{$license}->{$author}}, $rev); } else { push(@{$whitelist{$license}->{$author}}, $rev); } } } } close(IN); my %loc_author = (); if (-f $file) { if ($svn) { open(IN, "-|") || exec 'svn', 'ann', '-x', '-w', $file; while() { my ($author) = (split)[1]; $loc_author{$author}++; } close(IN); } else { open(IN, "-|") || exec 'git', 'blame', '-f', '-w', '-e', $file; while() { # The format is: # b061712b kdecore/klockfile.cpp ( [...] if (m/^(\S+) (\S+) +\(<([^>]+)>/) { my ($author) = $3; next if skipCommitByAuthor($author); $author = resolveEmail($author); $loc_author{$author}++; } else { print STDERR "Parse error on git blame output: $_"; } } close(IN); } } if (%unknown_authors) { print "The following emails do not appear in the accounts file:\n\n"; foreach my $who(keys %unknown_authors) { print "$who\n"; } print "\n"; } if (defined (keys %blacklist)) { print "Need permission for licensing:\n\n"; my %stat; foreach my $license(keys %blacklist) { print "- $license: ". join(' ', (keys %{$blacklist{$license}})) . "\n"; foreach my $who(keys %{$blacklist{$license}}) { next if not defined $loc_author{$who}; $stat{$license} += scalar(@{$blacklist{$license}->{$who}}); printf "%9s (%4d LOC): %s \n", $who, $loc_author{$who} || 0, join(",", @{$blacklist{$license}->{$who}}); } print "\n"; } print "\n"; print "Summary:\n"; foreach my $license(sort { $stat{$a} <=> $stat{$b} } keys %stat) { printf "%5d commits preventing relicensing to %s\n", $stat{$license}, $license } } my @allowed_list = (); if (defined (keys %whitelist)) { foreach my $license(keys %whitelist) { next if defined($blacklist{$license}); push(@allowed_list, $license); } } if ($#allowed_list >= 0) { print "\nRelicensing allowed: ". join(' ', @allowed_list) . "\n"; } print "\nDo not forget to check copyright headers and for patches committed in the name of others!\n";