diff --git a/scripts/create_rules_from_koffice_autocorrect.py b/scripts/create_rules_from_koffice_autocorrect.py index aa443592..7c3fa1ca 100755 --- a/scripts/create_rules_from_koffice_autocorrect.py +++ b/scripts/create_rules_from_koffice_autocorrect.py @@ -1,57 +1,57 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 # -*- coding: UTF-8 -*- """ Create Pology rules from the KOffice KWord autocorrect xml file. This script is intended to be run standalone. Usage:: python create_rules_from_koffice_autocorrect.py @author: Sébastien Renard @license: GPLv3 """ import re import sys from codecs import open import locale def main(): locale.setlocale(locale.LC_ALL, "") if len(sys.argv)!=3: usage() #TODO: check file is readable kofficeFile=open(sys.argv[1], "r", "utf-8") #TODO: check path is writable ruleFile=open(sys.argv[2], "w", "utf-8") # Regexp to find autocorrect items regexp=re.compile('') #Header ruleFile.write("# Generated rules from KOffice autocorrect file\n") ruleFile.write("# by the KOffice project (http://www.koffice.org)\n") ruleFile.write("# License: GPLv3\n\n") #TODO: exceptions should be in a separated file, not hardcoded. exceptions=["http:/", "http://", "etc...", "language"] for line in kofficeFile: match=regexp.match(line.strip()) if match: find=match.group(1) replace=match.group(2) if find not in exceptions: ruleFile.write(u'[&lwb;%s&rwb;]\nhint="%s => %s (d\'après le fichier de correction de KOffice)"\n\n' % (find, find, replace)) #Footer ruleFile.write("\n#End of rule file\n") ruleFile.close() def usage(): print "\t%s " % sys.argv[0] sys.exit(1) if __name__ == '__main__': main() diff --git a/scripts/normalize-aspell-word-list.py b/scripts/normalize-aspell-word-list.py index 1f94da35..d0ef9652 100755 --- a/scripts/normalize-aspell-word-list.py +++ b/scripts/normalize-aspell-word-list.py @@ -1,136 +1,136 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 # -*- coding: UTF-8 -*- """ Organize dictionary file: - sort entries - remove duplicate - update header This script is intended to be run standalone. Usage:: python @author: Sébastien Renard @license: GPLv3 """ import locale from codecs import open from os.path import abspath, basename import re import sys try: import fallback_import_paths except: pass from pology import _, n_ from pology.report import report, error def main(): locale.setlocale(locale.LC_ALL, "") # FIXME: Use pology.colors.ColorOptionParser. reminv=False paths=[] for arg in sys.argv[1:]: if arg.startswith("-"): if arg in ("-r", "--remove-invalid"): reminv = True else: error(_("@info", "Unknown option '%(opt)s' in command line.", opt=arg)) else: paths.append(arg) if len(paths)<1: usage() for path in paths: organize(path, reminv) def organize (dictPath, reminv=False): report(dictPath) dictEncDefault = "UTF-8" dictFile=open(dictPath, "r", dictEncDefault) # Parse the header for language and encoding. header=dictFile.readline() m=re.search(r"^(\S+)\s+(\S+)\s+(\d+)\s+(\S+)\s*", header) if not m: error(_("@info", "Malformed header of the dictionary file '%(file)s'.", file=dictPath)) dictType, dictLang, numWords, dictEnc=m.groups() expDictType = "personal_ws-1.1" if dictType != expDictType: dictType = expDictType report(" " + _("@item:inlist", "dictionary type changed to '%(dtype)s'", dtype=expDictType)) # Reopen in correct encoding if not the default. if dictEnc.lower() != dictEncDefault.lower(): dictFile.close() dictFile=open(dictPath, "r", dictEnc) # Read all words and eliminate duplicates. words=set() validCharacters=re.compile(ur"^[\w\d\'・-]+$", re.UNICODE) lno = 0 for word in dictFile: lno += 1 word=word.strip() if not word or word.startswith("personal_ws"): continue if word in words: report(" " + _("@item:inlist", "duplicate removed: %(word)s", word=word)) elif not validCharacters.match(word): if not reminv: report(" " + _("@item:inlist", "*** invalid word at %(line)s: %(word)s", line=lno, word=word)) words.add(word) else: report(" " + _("@item:inlist", "invalid word removed: %(word)s", word=word)) else: words.add(word) dictFile.close() words=list(words) numWords=len(words) # Sort the list according to current locale, ignoring case. words.sort(lambda x, y: locale.strcoll(x.lower(), y.lower())) # Write back the updated dictionary. dictFile=open(dictPath, "w", dictEnc) dictFile.write("%s %s %d %s\n" % (dictType, dictLang, numWords, dictEnc)) dictFile.write("\n".join(words)) dictFile.write("\n") dictFile.close() report(" " + n_("@item:inlist", "written %(num)d word", "written %(num)d words", num=len(words))) def usage(): report(_("@info", "Usage: %(cmd)s [-r|--remove-invalid] DICTFILE...", cmd=basename(sys.argv[0]))) sys.exit(1) if __name__ == '__main__': main() diff --git a/scripts/poascribe.py b/scripts/poascribe.py index e84579b0..fcb20815 100755 --- a/scripts/poascribe.py +++ b/scripts/poascribe.py @@ -1,1526 +1,1526 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 # -*- coding: UTF-8 -*- try: import fallback_import_paths except: pass import datetime import locale import os import re import sys from tempfile import NamedTemporaryFile import time from pology import PologyError, version, _, n_, t_ from pology.ascript import collect_ascription_associations from pology.ascript import collect_ascription_history from pology.ascript import collect_ascription_history_segment from pology.ascript import ascription_equal, merge_modified from pology.ascript import ascribe_modification, ascribe_review from pology.ascript import first_non_fuzzy, has_tracked_parts from pology.ascript import make_ascription_selector from pology.ascript import AscPoint from pology.catalog import Catalog from pology.header import Header, TZInfo, format_datetime from pology.message import Message, MessageUnsafe from pology.gtxtools import msgfmt from pology.colors import ColorOptionParser, cjoin import pology.config as pology_config from pology.diff import msg_ediff, msg_ediff_to_new from pology.diff import editprob from pology.fsops import str_to_unicode, unicode_to_str from pology.fsops import collect_paths_cmdline, collect_catalogs from pology.fsops import mkdirpath, join_ncwd from pology.fsops import exit_on_exception from pology.getfunc import get_hook_ireq from pology.merge import merge_pofile from pology.monitored import Monlist from pology.msgreport import warning_on_msg, report_msg_content from pology.msgreport import report_msg_to_lokalize from pology.report import report, error, format_item_list from pology.report import init_file_progress from pology.stdcmdopt import add_cmdopt_incexc, add_cmdopt_filesfrom from pology.tabulate import tabulate # Wrapping in ascription catalogs. _ascwrapping = ["fine"] # Flag used to mark diffed messages. # NOTE: All diff flags should start with 'ediff', as some other scripts # only need to check if any of them is present. _diffflag = u"ediff" _diffflag_tot = u"ediff-total" _diffflag_ign = u"ediff-ignored" # Flags used to explicitly mark messages as reviewed or unreviewed. _revdflags = (u"reviewed", u"revd", u"rev") # synonyms _urevdflags = (u"unreviewed", u"nrevd", u"nrev") # synonyms # Comment used to show ascription chain in messages marked for review. _achncmnt = "~ascto:" # String used to separate tags to review flags. _flagtagsep = "/" _diffflags = (_diffflag, _diffflag_tot, _diffflag_ign) _all_flags = _diffflags + _revdflags + _urevdflags _all_flags = sorted(_all_flags, key=lambda x: (-len(x), x)) # ...this order is necessary for proper |-linking in regexes. _all_cmnts = (_achncmnt,) # Datetime at the moment the script is started. _dt_start = datetime.datetime(*(time.localtime()[:6] + (0, TZInfo()))) def main (): locale.setlocale(locale.LC_ALL, "") mode_spec = ( ("status", ("st",)), ("commit", ("co", "ci", "mo")), ("diff", ("di",)), ("purge", ("pu",)), ("history", ("hi",)), ) mode_allnames = set() mode_tolong = {} for name, syns in mode_spec: mode_allnames.add(name) mode_allnames.update(syns) mode_tolong[name] = name mode_tolong.update((s, name) for s in syns) known_editors = { "lokalize": report_msg_to_lokalize, } # Setup options and parse the command line. usage = _("@info command usage", "%(cmd)s MODE [OPTIONS] [PATHS...]", cmd="%prog") desc = _("@info command description", "Keep track of who, when, and how, has translated, modified, " "or reviewed messages in a collection of PO files.") ver = _("@info command version", u"%(cmd)s (Pology) %(version)s\n" u"Copyright © 2008, 2009, 2010 " u"Chusslove Illich (Часлав Илић) <%(email)s>", cmd="%prog", version=version(), email="caslav.ilic@gmx.net") opars = ColorOptionParser(usage=usage, description=desc, version=ver) opars.add_option( "-a", "--select-ascription", metavar=_("@info command line value placeholder", "SELECTOR[:ARGS]"), action="append", dest="aselectors", default=None, help=_("@info command line option description", "Select a message from ascription history by this selector. " "Can be repeated, in which case the message is selected " "if all selectors match it.")) opars.add_option( "-A", "--min-adjsim-diff", metavar=_("@info command line value placeholder", "RATIO"), action="store", dest="min_adjsim_diff", default=None, help=_("@info command line option description", "Minimum adjusted similarity between two versions of a message " "needed to show the embedded difference. " "Range 0.0-1.0, where 0 means always to show the difference, " "and 1 never to show it; a convenient range is 0.6-0.8. " "When the difference is not shown, the '%(flag)s' flag is " "added to the message.", flag=_diffflag_ign)) opars.add_option( "-b", "--show-by-file", action="store_true", dest="show_by_file", default=False, help=_("@info command line option description", "Next to global summary, also present results by file.")) opars.add_option( "-C", "--no-vcs-commit", action="store_false", dest="vcs_commit", default=None, help=_("@info command line option description", "Do not commit catalogs to version control " "(when version control is used).")) opars.add_option( "-d", "--depth", metavar=_("@info command line value placeholder", "LEVEL"), action="store", dest="depth", default=None, help=_("@info command line option description", "Consider ascription history up to this level into the past.")) opars.add_option( "-D", "--diff-reduce-history", metavar=_("@info command line value placeholder", "SPEC"), action="store", dest="diff_reduce_history", default=None, help=_("@info command line option description", "Reduce each message in history to a part of the difference " "from the first earlier modification: to added, removed, or " "equal segments. " "The value begins with one of the characters 'a', 'r', or 'e', " "followed by substring that will be used to separate " "selected difference segments in resulting messages " "(if this substring is empty, space is used).")) opars.add_option( "-F", "--filter", metavar=_("@info command line value placeholder", "NAME"), action="append", dest="filters", default=None, help=_("@info command line option description", "Pass relevant message text fields through a filter before " "matching or comparing them (relevant in some modes). " "Can be repeated to add several filters.")) opars.add_option( "-G", "--show-filtered", action="store_true", dest="show_filtered", default=False, help=_("@info command line option description", "When operating under a filter, also show filtered versions " "of whatever is shown in original (e.g. in diffs).")) opars.add_option( "-k", "--keep-flags", action="store_true", dest="keep_flags", default=False, help=_("@info command line option description", "Do not remove review-significant flags from messages " "(possibly convert them as appropriate).")) opars.add_option( "-m", "--message", metavar=_("@info command line value placeholder", "TEXT"), action="store", dest="message", default=None, help=_("@info command line option description", "Version control commit message for original catalogs, " "when %(opt)s is in effect.", opt="-c")) opars.add_option( "-o", "--open-in-editor", metavar=("|".join(sorted(known_editors))), action="store", dest="po_editor", default=None, help=_("@info command line option description", "Open selected messages in one of the supported PO editors.")) opars.add_option( "-L", "--max-fraction-select", metavar=_("@info command line value placeholder", "FRACTION"), action="store", dest="max_fraction_select", default=None, help=_("@info command line option description", "Select messages in a catalog only if the total number " "of selected messages in that catalog would be at most " "the given fraction (0.0-1.0) of total number of messages.")) opars.add_option( "-s", "--selector", metavar=_("@info command line value placeholder", "SELECTOR[:ARGS]"), action="append", dest="selectors", default=None, help=_("@info command line option description", "Consider only messages matched by this selector. " "Can be repeated, in which case the message is selected " "if all selectors match it.")) opars.add_option( "-t", "--tag", metavar=_("@info command line value placeholder", "TAG"), action="store", dest="tags", default=None, help=_("@info command line option description", "Tag to add or consider in ascription records. " "Several tags may be given separated by commas.")) opars.add_option( "-u", "--user", metavar=_("@info command line value placeholder", "USER"), action="store", dest="user", default=None, help=_("@info command line option description", "User in whose name the operation is performed.")) opars.add_option( "-U", "--update-headers", action="store_true", dest="update_headers", default=None, help=_("@info command line option description", "Update headers in catalogs which contain modifications " "before committing them, with user's translator information.")) opars.add_option( "-v", "--verbose", action="store_true", dest="verbose", default=False, help=_("@info command line option description", "Output more detailed progress info.")) opars.add_option( "-w", "--write-modified", metavar=_("@info command line value placeholder", "FILE"), action="store", dest="write_modified", default=None, help=_("@info command line option description", "Write paths of all original catalogs modified by " "ascription operations into the given file.")) opars.add_option( "-x", "--externals", metavar=_("@info command line value placeholder", "PYFILE"), action="append", dest="externals", default=[], help=_("@info command line option description", "Collect optional functionality from an external Python file " "(selectors, etc).")) opars.add_option( "--all-reviewed", action="store_true", dest="all_reviewed", default=False, help=_("@info command line option description", "Ascribe all messages as reviewed on commit, " "overriding any existing review elements. " "Tags given by %(opt)s apply. " "This should not be done in day-to-day practice; " "the primary use is initial review ascription.", opt="--tag")) add_cmdopt_filesfrom(opars) add_cmdopt_incexc(opars) (options, free_args) = opars.parse_args(str_to_unicode(sys.argv[1:])) # Parse operation mode and its arguments. if len(free_args) < 1: error(_("@info", "Operation mode not given.")) rawmodename = free_args.pop(0) modename = mode_tolong.get(rawmodename) if modename is None: flatmodes = ["/".join((x[0],) + x[1]) for x in mode_spec] error(_("@info", "Unknown operation mode '%(mode)s' " "(known modes: %(modelist)s).", mode=rawmodename, modelist=format_item_list(flatmodes))) # For options not issued, read values from user configuration. # Configuration values can also be issued by mode using # C{afield/amode = value} syntax, which takes precedence over # general fields (e.g. C{filters/review} vs. C{filters}). cfgsec = pology_config.section("poascribe") for optname, getvalf, defval in ( ("aselectors", cfgsec.strdlist, []), ("vcs-commit", cfgsec.boolean, True), ("po-editor", cfgsec.string, None), ("filters", cfgsec.strslist, []), ("min-adjsim-diff", cfgsec.real, 0.0), ("selectors", cfgsec.strdlist, []), ("tags", cfgsec.string, ""), ("user", cfgsec.string, None), ("update-headers", cfgsec.boolean, False), ("diff-reduce-history", cfgsec.string, None), ("max-fraction-select", cfgsec.real, 1.01), ): uoptname = optname.replace("-", "_") if getattr(options, uoptname) is None: for fldname in ("%s/%s" % (optname, modename), optname): fldval = getvalf(fldname, None) if fldval is not None: break if fldval is None: fldval = defval setattr(options, uoptname, fldval) # Convert options to non-string types. def valconv_editor (edkey): msgrepf = known_editors.get(edkey) if msgrepf is None: error(_("@info", "PO editor '%(ed)s' is not among " "the supported editors: %(edlist)s.", ed=edkey, edlist=format_item_list(sorted(known_editors)))) return msgrepf def valconv_tags (cstr): return set(x.strip() for x in cstr.split(",")) for optname, valconv in ( ("max-fraction-select", float), ("min-adjsim-diff", float), ("po-editor", valconv_editor), ("tags", valconv_tags), ): uoptname = optname.replace("-", "_") valraw = getattr(options, uoptname, None) if valraw is not None: try: value = valconv(valraw) except TypeError: error(_("@info", "Value '%(val)s' to option '%(opt)s' is of wrong type.", val=valraw, opt=("--" + optname))) setattr(options, uoptname, value) # Collect any external functionality. for xmod_path in options.externals: collect_externals(xmod_path) # Create history filter if requested, store it in options. options.hfilter = None options.sfilter = None if options.filters: hfilters = [] for hspec in options.filters: hfilters.append(get_hook_ireq(hspec, abort=True)) def hfilter_composition (text): for hfilter in hfilters: text = hfilter(text) return text options.hfilter = hfilter_composition if options.show_filtered: options.sfilter = options.hfilter # Create specification for reducing historical messages to diffs. options.addrem = None if options.diff_reduce_history: options.addrem = options.diff_reduce_history if options.addrem[:1] not in ("a", "e", "r"): error(_("@info", "Value '%(val)s' to option '%(opt)s' must start " "with '%(char1)s', '%(char2)s', or '%(char3)s'.", val=options.addrem, opt="--diff-reduce-history", char1="a", char2="e", char3="r")) # Create selectors if any explicitly given. selector = None if options.selectors: selector = make_ascription_selector(options.selectors) aselector = None if options.aselectors: aselector = make_ascription_selector(options.aselectors, hist=True) # Assemble operation mode. needuser = False canselect = False canaselect = False class _Mode: pass mode = _Mode() mode.name = modename if 0: pass elif mode.name == "status": mode.execute = status mode.selector = selector or make_ascription_selector(["any"]) canselect = True elif mode.name == "commit": mode.execute = commit mode.selector = selector or make_ascription_selector(["any"]) needuser = True canselect = True elif mode.name == "diff": mode.execute = diff mode.selector = selector or make_ascription_selector(["modar"]) mode.aselector = aselector canselect = True canaselect = True elif mode.name == "purge": mode.execute = purge mode.selector = selector or make_ascription_selector(["any"]) canselect = True elif mode.name == "history": mode.execute = history mode.selector = selector or make_ascription_selector(["any"]) canselect = True else: error(_("@info", "Unhandled operation mode '%(mode)s'.", mode=mode.name)) mode.user = None if needuser: if not options.user: error(_("@info", "Operation mode '%(mode)s' requires a user " "to be specified.", mode=mode.name)) mode.user = options.user if not canselect and selector: error(_("@info", "Operation mode '%(mode)s' does not accept selectors.", mode=mode.name)) if not canaselect and aselector: error(_("@info", "Operation mode '%(mode)s' does not accept history selectors.", mode=mode.name)) # Collect list of catalogs supplied through command line. # If none supplied, assume current working directory. catpaths = collect_paths_cmdline(rawpaths=free_args, incnames=options.include_names, incpaths=options.include_paths, excnames=options.exclude_names, excpaths=options.exclude_paths, filesfrom=options.files_from, elsecwd=True, respathf=collect_catalogs, abort=True) # Split catalogs into lists by ascription config, # and link them to their ascription catalogs. aconfs_catpaths = collect_ascription_associations(catpaths) assert_review_tags(aconfs_catpaths, options.tags) # Execute operation. mode.execute(options, aconfs_catpaths, mode) # Write out list of modified original catalogs if requested. if options.write_modified and _modified_cats: lfpath = options.write_modified f = open(lfpath, "w") f.write(("\n".join(sorted(_modified_cats)) + "\n").encode("utf-8")) f.close() report(_("@info", "Paths of modified catalogs written to '%(file)s'.", file=lfpath)) def vcs_commit_catalogs (aconfs_catpaths, user, message=None, onabortf=None): report(_("@info:progress VCS is acronym for \"version control system\"", ">>>>> VCS is committing catalogs:")) # Attach paths to each distinct config, to commit them all at once. aconfs = [] catpaths_byconf = {} for aconf, catpaths in aconfs_catpaths: if aconf not in catpaths_byconf: catpaths_byconf[aconf] = [] aconfs.append(aconf) for catpath, acatpath in catpaths: catpaths_byconf[aconf].append(catpath) if os.path.isfile(acatpath): catpaths_byconf[aconf].append(acatpath) # Commit by config. for aconf in aconfs: cmsg = message cmsgfile = None if not cmsg: cmsg = aconf.commitmsg if not cmsg: cmsgfile, cmsgfile_orig = get_commit_message_file_path(user) else: cmsg += " " + fmt_commit_user(user) added, apaths = aconf.vcs.add(catpaths_byconf[aconf], repadd=True) if not added: if onabortf: onabortf() error(_("@info", "VCS reports that some catalogs cannot be added.")) cpaths = sorted(set(map(join_ncwd, catpaths_byconf[aconf] + apaths))) if not aconf.vcs.commit(cpaths, message=cmsg, msgfile=cmsgfile, incparents=False): if onabortf: onabortf() if not cmsgfile: error(_("@info", "VCS reports that some catalogs cannot be committed.")) else: os.remove(cmsgfile) error(_("@info", "VCS reports that some catalogs cannot be committed " "(commit message preserved in '%(file)s').", file=cmsgfile_orig)) if cmsgfile: os.remove(cmsgfile) os.remove(cmsgfile_orig) def fmt_commit_user (user): return "[>%s]" % user def get_commit_message_file_path (user): while True: tfmt = time.strftime("%Y-%m-%d-%H-%M-%S") prefix = "poascribe-commit-message" ext = "txt" fpath = "%s-%s.%s" % (prefix, tfmt, ext) fpath_asc = "%s-%s-asc.%s" % (prefix, tfmt, ext) if not os.path.isfile(fpath) and not os.path.isfile(fpath_asc): break edcmd = None if not edcmd: edcmd = os.getenv("ASC_EDITOR") if not edcmd: edcmd = pology_config.section("poascribe").string("editor") if not edcmd: edcmd = os.getenv("EDITOR") if not edcmd: edcmd = "/usr/bin/vi" cmd = "%s %s" % (edcmd, fpath) if os.system(cmd): error(_("@info", "Еrror from editor command '%(cmd)s' for commit message.", cmd=cmd)) if not os.path.isfile(fpath): error(_("@info", "Editor command '%(cmd)s' did not produce a file.", cmd=cmd)) cmsg = open(fpath, "r").read() if not cmsg.endswith("\n"): cmsg += "\n" fmt_user = unicode_to_str(fmt_commit_user(user)) if cmsg.count("\n") == 1: cmsg = cmsg[:-1] + " " + fmt_user + "\n" else: cmsg += fmt_user + "\n" fh = open(fpath_asc, "w") fh.write(cmsg) fh.close() return fpath_asc, fpath def assert_mode_user (aconfs_catpaths, mode): for aconf, catpaths in aconfs_catpaths: if mode.user not in aconf.users: error(_("@info", "User '%(user)s' not defined in '%(file)s'.", user=mode.user, file=aconf.path)) def assert_review_tags (aconfs_catpaths, tags): for aconf, catpaths in aconfs_catpaths: for tag in tags: if tag not in aconf.revtags: error(_("@info", "Review tag '%(tag)s' not defined in '%(file)s'.", tag=tag, file=aconf.path)) def assert_syntax (aconfs_catpaths, onabortf=None): checkf = msgfmt(options=["--check"]) numerr = 0 for aconf, catpaths in aconfs_catpaths: for catpath, acatpath in catpaths: numerr += checkf(catpath) if numerr: if onabortf: onabortf() error(_("@info", "Invalid syntax in some files, see the reports above. " "Ascription aborted.")) return numerr def setup_progress (aconfs_catpaths, addfmt): acps = [y[0] for x in aconfs_catpaths for y in x[1]] return init_file_progress(acps, addfmt=addfmt) # Exclusive states of a message, as reported by Message.state(). _st_tran = "T" _st_fuzzy = "F" _st_untran = "U" _st_otran = "OT" _st_ofuzzy = "OF" _st_ountran = "OU" _all_states = ( _st_tran, _st_fuzzy, _st_untran, _st_otran, _st_ofuzzy, _st_ountran, ) def status (options, aconfs_catpaths, mode): # Count ascribed and unascribed messages through catalogs. counts_a = dict([(x, {}) for x in _all_states]) counts_na = dict([(x, {}) for x in _all_states]) upprog = setup_progress(aconfs_catpaths, t_("@info:progress", "Examining state: %(file)s")) for aconf, catpaths in aconfs_catpaths: for catpath, acatpath in catpaths: upprog(catpath) # Open current and ascription catalog. cat = Catalog(catpath, monitored=False) acat = Catalog(acatpath, create=True, monitored=False) # Count ascribed and non-ascribed by original catalog. nselected = 0 for msg in cat: purge_msg(msg) ahist = collect_ascription_history( msg, acat, aconf, hfilter=options.hfilter, addrem=options.addrem, nomrg=True) if ahist[0].user is None and not has_tracked_parts(msg): continue # pristine if not mode.selector(msg, cat, ahist, aconf): continue # not selected counts = ahist[0].user is None and counts_na or counts_a st = msg.state() if catpath not in counts[st]: counts[st][catpath] = 0 counts[st][catpath] += 1 nselected += 1 # Count non-ascribed by ascription catalog. for amsg in acat: if amsg not in cat: ast = amsg.state() st = None if ast == _st_tran: st = _st_otran elif ast == _st_fuzzy: st = _st_ofuzzy elif ast == _st_untran: st = _st_ountran if st: if catpath not in counts_na[st]: counts_na[st][catpath] = 0 counts_na[st][catpath] += 1 # Cancel counts if maximum selection fraction exceeded. if float(nselected) / len(cat) > options.max_fraction_select: for counts in (counts_a, counts_na): for st in _all_states: if catpath in counts[st]: counts[st].pop(catpath) upprog() # Some general data for tabulation of output. coln = [_("@title:column translated messages", "msg/t"), _("@title:column fuzzy messages", "msg/f"), _("@title:column untranslated messages", "msg/u"), _("@title:column obsolete translated messages", "msg/ot"), _("@title:column obsolete fuzzy messages", "msg/of"), _("@title:column obsolete untranslated messages", "msg/ou")] none="-" # NOTE: When reporting, do not show anything if there are # neither ascribed nor non-ascribed messages selected. # If there are some ascribed and none non-ascribed, # show only the row for ascribed. # However, if there are some non-ascribed but none ascribed, # still show the row for ascribed, to not accidentally confuse # non-ascribed for ascribed. # Report totals. totals_a, totals_na = {}, {} for totals, counts in ((totals_a, counts_a), (totals_na, counts_na)): for st, cnt_per_cat in counts.items(): totals[st] = sum(cnt_per_cat.values()) # See previous NOTE. if sum(totals_a.values()) > 0 or sum(totals_na.values()) > 0: rown = [_("@title:row number of ascribed messages", "ascribed")] data = [[totals_a[x] or None] for x in _all_states] if sum(totals_na.values()) > 0: rown.append(_("@title:row number of unascribed messages", "unascribed")) for i in range(len(_all_states)): data[i].append(totals_na[_all_states[i]] or None) report(tabulate(data=data, coln=coln, rown=rown, none=none, colorize=True)) # Report counts per catalog if requested. if options.show_by_file: catpaths = set() for counts in (counts_a, counts_na): catpaths.update(sum([x.keys() for x in counts.values()], [])) catpaths = sorted(catpaths) if catpaths: coln.insert(0, _("@title:column", "catalog")) coln.insert(1, _("@title:column state (asc/nasc)", "st")) data = [[] for x in _all_states] for catpath in catpaths: cc_a = [counts_a[x].get(catpath, 0) for x in _all_states] cc_na = [counts_na[x].get(catpath, 0) for x in _all_states] # See previous NOTE. if sum(cc_a) > 0 or sum(cc_na) > 0: data[0].append(catpath) data[1].append( _("@item:intable number of ascribed messages", "asc")) for datac, cc in zip(data[2:], cc_a): datac.append(cc or None) if sum(cc_na) > 0: data[0].append("^^^") data[1].append( _("@item:intable number of unascribed messages", "nasc")) for datac, cc in zip(data[2:], cc_na): datac.append(cc or None) if any(data): dfmt = ["%%-%ds" % max([len(x) for x in catpaths])] report("-") report(tabulate(data=data, coln=coln, dfmt=dfmt, none=none, colorize=True)) # FIXME: Factor out into message module. _fields_current = ( "msgctxt", "msgid", "msgid_plural", ) _fields_previous = ( "msgctxt_previous", "msgid_previous", "msgid_plural_previous", ) def msg_to_previous (msg, copy=True): if msg.fuzzy and msg.msgid_previous is not None: pmsg = MessageUnsafe(msg) if copy else msg for fcurr, fprev in zip(_fields_current, _fields_previous): setattr(pmsg, fcurr, pmsg.get(fprev)) pmsg.unfuzzy() return pmsg def restore_reviews (aconfs_catpaths, revspecs_by_catmsg): upprog = setup_progress(aconfs_catpaths, t_("@info:progress", "Restoring reviews: %(file)s")) nrestored = 0 for aconf, catpaths in aconfs_catpaths: for catpath, acatpath in catpaths: upprog(catpath) revels_by_msg = revspecs_by_catmsg.get(catpath) if revels_by_msg: cat = Catalog(catpath, monitored=True) for msgref, revels in sorted(revels_by_msg.items()): msg = cat[msgref - 1] revtags, unrevd, revok = revels restore_review_flags(msg, revtags, unrevd) nrestored += 1 sync_and_rep(cat, shownmod=False) if aconf.vcs.is_versioned(acatpath): aconf.vcs.revert(acatpath) # ...no else: because revert may cause the file # not to be versioned any more. if not aconf.vcs.is_versioned(acatpath): os.remove(acatpath) if nrestored > 0: report(n_("@info:progress", "===== Review elements restored to %(num)d message.", "===== Review elements restored to %(num)d messages.", num=nrestored)) def restore_review_flags (msg, revtags, unrevd): for tag in revtags: flag = _revdflags[0] if tag: flag += _flagtagsep + tag msg.flag.add(flag) if unrevd: msg.flag.add(_urevdflags[0]) return msg def commit (options, aconfs_catpaths, mode): assert_mode_user(aconfs_catpaths, mode) # Ascribe modifications and reviews. upprog = setup_progress(aconfs_catpaths, t_("@info:progress", "Ascribing: %(file)s")) revels = {} counts = dict([(x, [0, 0]) for x in _all_states]) aconfs_catpaths_ascmod = [] aconf_by_catpath = {} for aconf, catpaths in aconfs_catpaths: aconfs_catpaths_ascmod.append((aconf, [])) for catpath, acatpath in catpaths: upprog(catpath) res = commit_cat(options, aconf, mode.user, catpath, acatpath, mode.selector) ccounts, crevels, catmod = res for st, (nmod, nrev) in ccounts.items(): counts[st][0] += nmod counts[st][1] += nrev revels[catpath] = crevels if catmod: aconfs_catpaths_ascmod[-1][1].append((catpath, acatpath)) aconf_by_catpath[catpath] = aconf upprog() onabortf = lambda: restore_reviews(aconfs_catpaths_ascmod, revels) # Assert that all reviews were good. unknown_revtags = [] for catpath, revels1 in sorted(revels.items()): aconf = aconf_by_catpath[catpath] for msgref, (revtags, unrevd, revok) in sorted(revels1.items()): if not revok: onabortf() error("Ascription aborted due to earlier warnings.") assert_syntax(aconfs_catpaths_ascmod, onabortf=onabortf) # ...must be done after committing, to have all review elements purged coln = [_("@title:column number of modified messages", "modified")] rown = [] data = [[]] for st, stlabel in ( (_st_tran, _("@title:row number of translated messages", "translated")), (_st_fuzzy, _("@title:row number of fuzzy messages", "fuzzy")), (_st_untran, _("@title:row number of untranslated messages", "untranslated")), (_st_otran, _("@title:row number of obsolete translated messages", "obsolete/t")), (_st_ofuzzy, _("@title:row number of obsolete fuzzy messages", "obsolete/f")), (_st_ountran, _("@title:row number of obsolete untranslated messages", "obsolete/u")), ): if counts[st][1] > 0 and len(coln) < 2: coln.append(_("@title:column number of reviewed messages", "reviewed")) data.append([]) if counts[st][0] > 0 or counts[st][1] > 0: rown.append(stlabel) data[0].append(counts[st][0] or None) if len(coln) >= 2: data[1].append(counts[st][1] or None) if rown: report(_("@info:progress", "===== Ascription summary:")) report(tabulate(data, coln=coln, rown=rown, none="-", colorize=True)) if options.vcs_commit: vcs_commit_catalogs(aconfs_catpaths, mode.user, message=options.message, onabortf=onabortf) # ...not configs_catpaths_ascmod, as non-ascription relevant # modifications may exist (e.g. new pristine catalog added). def diff (options, aconfs_catpaths, mode): upprog = setup_progress(aconfs_catpaths, t_("@info:progress", "Diffing for review: %(file)s")) ndiffed = 0 for aconf, catpaths in aconfs_catpaths: for catpath, acatpath in catpaths: upprog(catpath) ndiffed += diff_cat(options, aconf, catpath, acatpath, mode.selector, mode.aselector) upprog() if ndiffed > 0: report(n_("@info:progress", "===== %(num)d message diffed for review.", "===== %(num)d messages diffed for review.", num=ndiffed)) def purge (options, aconfs_catpaths, mode): upprog = setup_progress(aconfs_catpaths, t_("@info:progress", "Purging review elements: %(file)s")) npurged = 0 for aconf, catpaths in aconfs_catpaths: for catpath, acatpath in catpaths: upprog(catpath) npurged += purge_cat(options, aconf, catpath, acatpath, mode.selector) upprog() if npurged > 0: if not options.keep_flags: report(n_("@info:progress", "===== Review elements purged from %(num)d message.", "===== Review elements purged from %(num)d messages.", num=npurged)) else: report(n_("@info:progress", "===== Review elements purged from %(num)d message " "(flags kept).", "===== Review elements purged from %(num)d messages " "(flags kept).", num=npurged)) return npurged def history (options, aconfs_catpaths, mode): upprog = setup_progress(aconfs_catpaths, t_("@info:progress", "Computing histories: %(file)s")) nshown = 0 for aconf, catpaths in aconfs_catpaths: for catpath, acatpath in catpaths: upprog(catpath) nshown += history_cat(options, aconf, catpath, acatpath, mode.selector) upprog() if nshown > 0: report(n_("@info:progress", "===== Histories computed for %(num)d message.", "===== Histories computed for %(num)d messages.", num=nshown)) def commit_cat (options, aconf, user, catpath, acatpath, stest): # Open current catalog and ascription catalog. # Monitored, for removal of review elements. cat = Catalog(catpath, monitored=True) acat = prep_write_asc_cat(acatpath, aconf) revtags_ovr = None if options.all_reviewed: revtags_ovr = options.tags # Collect unascribed messages, but ignoring pristine ones # (those which are both untranslated and without history). # Collect and purge any review elements. # Check if any modification cannot be due to merging # (if header update is requested). mod_msgs = [] rev_msgs = [] revels_by_msg = {} counts = dict([(x, [0, 0]) for x in _all_states]) counts0 = counts.copy() any_nonmerges = False prev_msgs = [] check_mid_msgs = [] for msg in cat: mod, revtags, unrevd = purge_msg(msg) if mod: revels_by_msg[msg.refentry] = [revtags, unrevd, True] ahist = collect_ascription_history(msg, acat, aconf) # after purging # Do not ascribe anything if the message is new and untranslated. if ( ahist[0].user is None and len(ahist) == 1 and not has_tracked_parts(msg) ): continue # Possibly ascribe review only if the message passes the selector. if stest(msg, cat, ahist, aconf) and (mod or revtags_ovr): if revtags_ovr: revtags = revtags_ovr unrevd = False if revtags and not unrevd: # unreviewed flag overrides rev_msgs.append((msg, revtags)) counts[msg.state()][1] += 1 # Check and record if review tags are not valid. unknown_revtags = revtags.difference(aconf.revtags) if unknown_revtags: revels_by_msg[msg.refentry][-1] = False tagfmt = format_item_list(sorted(unknown_revtags)) warning_on_msg(_("@info", "Unknown review tags: %(taglist)s.", taglist=tagfmt), msg, cat) # Ascribe modification regardless of the selector. if ahist[0].user is None: mod_msgs.append(msg) counts[msg.state()][0] += 1 if options.update_headers and not any_nonmerges: if len(ahist) == 1 or not merge_modified(ahist[1].msg, msg): any_nonmerges = True # Record that reconstruction of the post-merge message # should be tried if this message has no prior history # but it is not pristine (it may be that the translator # has merged the catalog and updated fuzzy messages in one step, # without committing the catalog right after merging). if len(ahist) == 1: check_mid_msgs.append(msg) # Collect latest historical version of the message, # in case reconstruction of post-merge messages is needed. if ahist[0].user is not None or len(ahist) > 1: pmsg = ahist[1 if ahist[0].user is None else 0].msg prev_msgs.append(pmsg) # Collect non-obsolete ascribed messages that no longer have # original counterpart, to ascribe as obsolete. # If reconstruction of post-merge messages is needed, # also collect latest historical versions. cat.sync_map() # in case key fields were purged for amsg in acat: if amsg not in cat: ast = amsg.state() st = None if ast == _st_tran: st = _st_otran elif ast == _st_fuzzy: st = _st_ofuzzy elif ast == _st_untran: st = _st_ountran if st or check_mid_msgs: msg = collect_ascription_history_segment(amsg, acat, aconf)[0].msg if check_mid_msgs: prev_msgs.append(msg) if st: msg.obsolete = True mod_msgs.append(msg) counts[st][0] += 1 # Shortcut if nothing to do, because sync_and_rep later are expensive. if not mod_msgs and not revels_by_msg: # No messages to commit. return counts0, revels_by_msg, False # Construct post-merge messages. mod_mid_msgs = [] if check_mid_msgs and not acat.created(): mid_cat = create_post_merge_cat(cat, prev_msgs) for msg in check_mid_msgs: mid_msg = mid_cat.get(msg) if ( mid_msg is not None and mid_msg.fuzzy and not ascription_equal(mid_msg, msg) ): mod_mid_msgs.append(mid_msg) # Ascribe modifications. for mid_msg in mod_mid_msgs: # ascribe post-merge before actual ascribe_modification(mid_msg, user, _dt_start, acat, aconf) for msg in mod_msgs: ascribe_modification(msg, user, _dt_start, acat, aconf) # Ascribe reviews. for msg, revtags in rev_msgs: ascribe_review(msg, user, _dt_start, revtags, acat, aconf) # Update header if requested and translator's modifications detected. if options.update_headers and any_nonmerges: cat.update_header(project=cat.name, title=aconf.title, name=aconf.users[user].name, email=aconf.users[user].email, teamemail=aconf.teamemail, langname=aconf.langteam, langcode=aconf.langcode, plforms=aconf.plforms) nmod = [len(mod_msgs)] if len(rev_msgs) > 0: nmod.append(len(rev_msgs)) catmod = False if sync_and_rep(cat, nmod=nmod): catmod = True if asc_sync_and_rep(acat, shownmod=False, nmod=[0]): catmod = True return counts, revels_by_msg, catmod def diff_cat (options, aconf, catpath, acatpath, stest, aselect): cat = Catalog(catpath, monitored=True) acat = Catalog(acatpath, create=True, monitored=False) # Select messages for diffing. msgs_to_diff = [] for msg in cat: purge_msg(msg) ahist = collect_ascription_history( msg, acat, aconf, hfilter=options.hfilter, addrem=options.addrem, nomrg=True) # Makes no sense to review pristine messages. if ahist[0].user is None and not has_tracked_parts(msg): continue sres = stest(msg, cat, ahist, aconf) if not sres: continue msgs_to_diff.append((msg, ahist, sres)) # Cancel selection if maximum fraction exceeded. if float(len(msgs_to_diff)) / len(cat) > options.max_fraction_select: msgs_to_diff = [] if not msgs_to_diff: return 0 # Diff selected messages. diffed_msgs = [] tagfmt = _flagtagsep.join(options.tags) for msg, ahist, sres in msgs_to_diff: # Try to select ascription to differentiate from. # (Note that ascription indices returned by selectors are 1-based.) i_asc = None if aselect: asres = aselect(msg, cat, ahist, aconf) i_asc = (asres - 1) if asres else None elif not isinstance(sres, bool): # If there is no ascription selector, but basic selector returned # an ascription index, use first earlier non-fuzzy for diffing. i_asc = sres - 1 i_asc = first_non_fuzzy(ahist, i_asc + 1) # Differentiate and flag. amsg = i_asc is not None and ahist[i_asc].msg or None if amsg is not None: if editprob(amsg.msgid, msg.msgid) > options.min_adjsim_diff: msg_ediff(amsg, msg, emsg=msg, pfilter=options.sfilter) flag = _diffflag else: # If to great difference, add special flag and do not diff. flag = _diffflag_ign else: # If no previous ascription selected, add special flag. flag = _diffflag_tot if tagfmt: flag += _flagtagsep + tagfmt msg.flag.add(flag) # Add ascription chain comment. ascfmts = [] i_from = (i_asc - 1) if i_asc is not None else len(ahist) - 1 for i in range(i_from, -1, -1): a = ahist[i] shtype = {AscPoint.ATYPE_MOD: "m", AscPoint.ATYPE_REV: "r"}[a.type] if a.tag: ascfmt = "%s:%s(%s)" % (a.user, shtype, a.tag) else: ascfmt = "%s:%s" % (a.user, shtype) ascfmts.append(ascfmt) achnfmt = u"%s %s" % (_achncmnt, " ".join(ascfmts)) msg.auto_comment.append(achnfmt) diffed_msgs.append(msg) sync_and_rep(cat) # Open in the PO editor if requested. if options.po_editor: for msg in diffed_msgs: options.po_editor(msg, cat, report=_("@info note on selected message", "Selected for review.")) return len(diffed_msgs) _subreflags = "|".join(_all_flags) _subrecmnts = "|".join(_all_cmnts) _any_to_purge_rx = re.compile(r"^\s*(#,.*\b(%s)|#\.\s*(%s))" % (_subreflags, _subrecmnts), re.M|re.U) # Quickly check if it may be that some messages in the PO file # have review elements (diffs, flags). def may_have_revels (catpath): return bool(_any_to_purge_rx.search(open(catpath).read())) def purge_cat (options, aconf, catpath, acatpath, stest): if not may_have_revels(catpath): return 0 cat = Catalog(catpath, monitored=True) acat = Catalog(acatpath, create=True, monitored=False) # Select messages to purge. msgs_to_purge = [] for msg in cat: cmsg = MessageUnsafe(msg) purge_msg(cmsg) ahist = collect_ascription_history( cmsg, acat, aconf, hfilter=options.hfilter, addrem=options.addrem, nomrg=True) if not stest(cmsg, cat, ahist, aconf): continue msgs_to_purge.append(msg) # Does observing options.max_fraction_select makes sense for purging? ## Cancel selection if maximum fraction exceeded. #if float(len(msgs_to_purge)) / len(cat) > options.max_fraction_select: #msgs_to_purge = [] # Purge selected messages. npurged = 0 for msg in msgs_to_purge: res = purge_msg(msg, keepflags=options.keep_flags) mod, revtags, unrevd = res if mod: npurged += 1 sync_and_rep(cat) return npurged def history_cat (options, aconf, catpath, acatpath, stest): cat = Catalog(catpath, monitored=False) acat = Catalog(acatpath, create=True, monitored=False) # Select messages for which to compute histories. msgs_to_hist = [] for msg in cat: purge_msg(msg) ahist = collect_ascription_history( msg, acat, aconf, hfilter=options.hfilter, addrem=options.addrem, nomrg=True) if not stest(msg, cat, ahist, aconf): continue msgs_to_hist.append((msg, ahist)) # Cancel selection if maximum fraction exceeded. if float(len(msgs_to_hist)) / len(cat) > options.max_fraction_select: msgs_to_hist = [] # Compute histories for selected messages. for msg, ahist in msgs_to_hist: unasc = ahist[0].user is None if unasc: ahist.pop(0) hlevels = len(ahist) if options.depth is not None: hlevels = int(options.depth) if ahist[0].user is None: hlevels += 1 if hlevels > len(ahist): hlevels = len(ahist) hinfo = [] if hlevels > 0: hinfo += [_("@info:progress", ">>> History follows:")] hfmt = "%%%dd" % len(str(hlevels)) for i in range(hlevels): a = ahist[i] if a.type == AscPoint.ATYPE_MOD: anote = _("@item:intable", "#%(pos)d " "modified by %(user)s on %(date)s", pos=a.pos, user=a.user, date=a.date) elif a.type == AscPoint.ATYPE_REV: if not a.tag: anote = _("@item:intable", "#%(pos)d " "reviewed by %(user)s on %(date)s", pos=a.pos, user=a.user, date=a.date) else: anote = _("@item:intable", "#%(pos)d " "reviewed (%(tag)s) by %(user)s on %(date)s", pos=a.pos, user=a.user, tag=a.tag, date=a.date) else: warning_on_msg( _("@info", "Unknown ascription type '%(type)s' found in history.", type=a.type), msg, cat) continue hinfo += [anote] if not a.type == AscPoint.ATYPE_MOD: # Nothing more to show if this ascription is not modification. continue i_next = i + 1 if i_next == len(ahist): # Nothing more to show at end of history. continue dmsg = MessageUnsafe(a.msg) nmsg = ahist[i_next].msg if dmsg != nmsg: msg_ediff(nmsg, dmsg, emsg=dmsg, pfilter=options.sfilter, colorize=True) dmsgfmt = dmsg.to_string(force=True, wrapf=cat.wrapf()).rstrip("\n") hindent = " " * (len(hfmt % 0) + 2) hinfo += [hindent + x for x in dmsgfmt.split("\n")] hinfo = cjoin(hinfo, "\n") if unasc or msg.fuzzy: pmsg = None i_nfasc = first_non_fuzzy(ahist) if i_nfasc is not None: pmsg = ahist[i_nfasc].msg elif msg.fuzzy and msg.msgid_previous is not None: pmsg = msg_to_previous(msg) if pmsg is not None: for fprev in _fields_previous: setattr(msg, fprev, None) msg_ediff(pmsg, msg, emsg=msg, pfilter=options.sfilter, colorize=True) report_msg_content(msg, cat, note=(hinfo or None), delim=("-" * 20)) return len(msgs_to_hist) _revflags_rx = re.compile(r"^(%s)(?: */(.*))?" % "|".join(_all_flags), re.I) def purge_msg (msg, keepflags=False): modified = False # Remove review flags. diffed = False revtags = set() unrevd = False for flag in list(msg.flag): # modified inside m = _revflags_rx.search(flag) if m: sflag = m.group(1) tagstr = m.group(2) or "" tags = [x.strip() for x in tagstr.split(_flagtagsep)] if sflag not in _urevdflags: revtags.update(tags) if sflag == _diffflag: # ...must not check ...in _diffflags because with # those other flags there is actually no diff. diffed = True else: unrevd = True msg.flag.remove(flag) modified = True # Remove review comments. i = 0 while i < len(msg.auto_comment): cmnt = msg.auto_comment[i].strip() if cmnt.startswith(_all_cmnts): msg.auto_comment.pop(i) modified = True else: i += 1 # Remove any leftover previous fields. if msg.translated: for fprev in _fields_previous: if msg.get(fprev) is not None: setattr(msg, fprev, None) modified = True if diffed: msg_ediff_to_new(msg, rmsg=msg) if keepflags: restore_review_flags(msg, revtags, unrevd) return modified, revtags, unrevd def prep_write_asc_cat (acatpath, aconf): if not os.path.isfile(acatpath): return init_asc_cat(acatpath, aconf) else: return Catalog(acatpath, monitored=True, wrapping=_ascwrapping) def init_asc_cat (acatpath, aconf): acat = Catalog(acatpath, create=True, monitored=True, wrapping=_ascwrapping) ahdr = acat.header ahdr.title = Monlist([u"Ascription shadow for %s.po" % acat.name]) translator = u"Ascriber" if aconf.teamemail: author = u"%s <%s>" % (translator, aconf.teamemail) else: author = u"%s" % translator ahdr.author = Monlist([author]) ahdr.copyright = u"Copyright same as for the original catalog." ahdr.license = u"License same as for the original catalog." ahdr.comment = Monlist([u"===== DO NOT EDIT MANUALLY ====="]) ahdr.set_field(u"Project-Id-Version", unicode(acat.name)) ahdr.set_field(u"Report-Msgid-Bugs-To", unicode(aconf.teamemail or "")) ahdr.set_field(u"PO-Revision-Date", format_datetime(_dt_start)) ahdr.set_field(u"Content-Type", u"text/plain; charset=UTF-8") ahdr.set_field(u"Content-Transfer-Encoding", u"8bit") if aconf.teamemail: ltr = "%s <%s>" % (translator, aconf.teamemail) else: ltr = translator ahdr.set_field(u"Last-Translator", unicode(ltr)) if aconf.langteam: if aconf.teamemail: tline = u"%s <%s>" % (aconf.langteam, aconf.teamemail) else: tline = aconf.langteam ahdr.set_field(u"Language-Team", unicode(tline)) else: ahdr.remove_field("Language-Team") if aconf.langcode: ahdr.set_field(u"Language", unicode(aconf.langcode)) else: ahdr.remove_field("Language") if aconf.plforms: ahdr.set_field(u"Plural-Forms", unicode(aconf.plforms)) else: ahdr.remove_field(u"Plural-Forms") return acat def update_asc_hdr (acat): acat.header.set_field(u"PO-Revision-Date", format_datetime(_dt_start)) def create_post_merge_cat (cat, prev_msgs): # Prepare previous catalog based on ascription catalog. prev_cat = Catalog("", create=True, monitored=False) prev_cat.header = Header(cat.header) for prev_msg in prev_msgs: prev_cat.add_last(prev_msg) tmpf1 = NamedTemporaryFile(prefix="pology-merged-", suffix=".po") prev_cat.filename = tmpf1.name prev_cat.sync() # Prepare template based on current catalog. tmpl_cat = Catalog("", create=True, monitored=False) tmpl_cat.header = Header(cat.header) for msg in cat: if not msg.obsolete: tmpl_msg = MessageUnsafe(msg) tmpl_msg.clear() tmpl_cat.add_last(tmpl_msg) tmpf2 = NamedTemporaryFile(prefix="pology-template-", suffix=".pot") tmpl_cat.filename = tmpf2.name tmpl_cat.sync() # Merge previous catalog using current catalog as template. mid_cat = merge_pofile(prev_cat.filename, tmpl_cat.filename, getcat=True, monitored=False, quiet=True) return mid_cat _modified_cats = [] def sync_and_rep (cat, shownmod=True, nmod=None): if shownmod and nmod is None: nmod = [0] for msg in cat: if msg.modcount: nmod[0] += 1 modified = cat.sync() if nmod and sum(nmod) > 0: # DO NOT check instead modified == True if shownmod: nmodfmt = "/".join("%d" % x for x in nmod) report("%s (%s)" % (cat.filename, nmodfmt)) else: report("%s" % cat.filename) _modified_cats.append(cat.filename) return modified def asc_sync_and_rep (acat, shownmod=True, nmod=None): if acat.modcount: update_asc_hdr(acat) mkdirpath(os.path.dirname(acat.filename)) return sync_and_rep(acat, shownmod=shownmod, nmod=nmod) # ----------------------------------------------------------------------------- if __name__ == "__main__": exit_on_exception(main) diff --git a/scripts/poediff.py b/scripts/poediff.py index 4d395391..970ed087 100755 --- a/scripts/poediff.py +++ b/scripts/poediff.py @@ -1,511 +1,511 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 # -*- coding: UTF-8 -*- """ Create embedded diffs of PO files. Documented in C{doc/user/diffpatch.docbook#sec-dppatch}. @author: Chusslove Illich (Часлав Илић) @license: GPLv3 """ import filecmp import locale import os import shutil import sys try: import fallback_import_paths except: pass from pology import version, _, n_, t_ from pology.catalog import Catalog from pology.message import MessageUnsafe from pology.colors import ColorOptionParser, set_coloring_globals, cjoin import pology.config as pology_config from pology.fsops import str_to_unicode, collect_catalogs from pology.fsops import exit_on_exception from pology.diff import msg_ediff from pology.report import error, warning, report, format_item_list from pology.report import list_options from pology.report import init_file_progress from pology.stdcmdopt import add_cmdopt_colors from pology.vcs import available_vcs, make_vcs from pology.internal.poediffpatch import MPC, EDST from pology.internal.poediffpatch import msg_eq_fields, msg_copy_fields from pology.internal.poediffpatch import msg_clear_prev_fields from pology.internal.poediffpatch import diff_cats, diff_hdrs from pology.internal.poediffpatch import init_ediff_header from pology.internal.poediffpatch import get_msgctxt_for_headers from pology.internal.poediffpatch import cats_update_effort def main (): locale.setlocale(locale.LC_ALL, "") # Get defaults for command line options from global config. cfgsec = pology_config.section("poediff") def_do_merge = cfgsec.boolean("merge", True) # Setup options and parse the command line. usage = _("@info command usage", "%(cmd)s [OPTIONS] FILE1 FILE2\n" "%(cmd)s [OPTIONS] DIR1 DIR2\n" "%(cmd)s -c VCS [OPTIONS] [PATHS...]", cmd="%prog") desc = _("@info command description", "Create embedded diffs of PO files.") ver = _("@info command version", u"%(cmd)s (Pology) %(version)s\n" u"Copyright © 2009, 2010 " u"Chusslove Illich (Часлав Илић) <%(email)s>", cmd="%prog", version=version(), email="caslav.ilic@gmx.net") showvcs = list(set(available_vcs()).difference(["none"])) showvcs.sort() opars = ColorOptionParser(usage=usage, description=desc, version=ver) opars.add_option( "-b", "--skip-obsolete", action="store_true", dest="skip_obsolete", default=False, help=_("@info command line option description", "Do not diff obsolete messages.")) opars.add_option( "-c", "--vcs", metavar=_("@info command line value placeholder", "VCS"), dest="version_control", help=_("@info command line option description", "Paths are under version control by given VCS; " "can be one of: %(vcslist)s.", vcslist=format_item_list(showvcs))) opars.add_option( "--list-options", action="store_true", dest="list_options", default=False, help=_("@info command line option description", "List the names of available options.")) opars.add_option( "--list-vcs", action="store_true", dest="list_vcs", default=False, help=_("@info command line option description", "List the keywords of known version control systems.")) opars.add_option( "-n", "--no-merge", action="store_false", dest="do_merge", default=def_do_merge, help=_("@info command line option description", "Do not try to indirectly pair messages by merging catalogs.")) opars.add_option( "-o", "--output", metavar=_("@info command line value placeholder", "POFILE"), dest="output", help=_("@info command line option description", "Output diff catalog to a file instead of stdout.")) opars.add_option( "-p", "--paired-only", action="store_true", dest="paired_only", default=False, help=_("@info command line option description", "When two directories are diffed, ignore catalogs which " "are not present in both directories.")) opars.add_option( "-q", "--quiet", action="store_true", dest="quiet", default=False, help=_("@info command line option description", "Do not display any progress info.")) opars.add_option( "-Q", "--quick", action="store_true", dest="quick", default=False, help=_("@info command line option description", "Equivalent to %(opt)s.", opt="-bns")) opars.add_option( "-r", "--revision", metavar=_("@info command line value placeholder", "REV1[:REV2]"), dest="revision", help=_("@info command line option description", "Revision from which to diff to current working copy, " "or from first to second revision (if VCS is given).")) opars.add_option( "-s", "--strip-headers", action="store_true", dest="strip_headers", default=False, help=_("@info command line option description", "Do not diff headers and do not write out the top header " "(resulting output cannot be used as patch).")) opars.add_option( "-U", "--update-effort", action="store_true", dest="update_effort", default=False, help=_("@info command line option description", "Instead of outputting the diff, calculate and output " "an estimate of the effort that was needed to update " "the translation from old to new paths. " "Ignores %(opt1)s and %(opt1)s options.", opt1="-b", opt2="-n")) add_cmdopt_colors(opars) (op, free_args) = opars.parse_args(str_to_unicode(sys.argv[1:])) if op.list_options: report(list_options(opars)) sys.exit(0) if op.list_vcs: report("\n".join(showvcs)) sys.exit(0) # Could use some speedup. try: import psyco psyco.full() except ImportError: pass set_coloring_globals(ctype=op.coloring_type, outdep=(not op.raw_colors)) if op.quick: op.do_merge = False op.skip_obsolete = True op.strip_headers = True # Create VCS. vcs = None if op.version_control: if op.version_control not in available_vcs(flat=True): error_wcl(_("@info", "Unknown VCS '%(vcs)s' selected.", vcs=op.version_control)) vcs = make_vcs(op.version_control) # Sanity checks on paths. paths = free_args if not vcs: if len(paths) != 2: error_wcl(_("@info", "Exactly two paths are needed for diffing.")) for path in paths: if not os.path.exists(path): error_wcl("path does not exist: %s" % path) p1, p2 = paths if (not ( (os.path.isfile(p1) and (os.path.isfile(p2))) or (os.path.isdir(p1) and (os.path.isdir(p2)))) ): error_wcl(_("@info", "Both paths must be either files or directories.")) else: # Default to current working dir if no paths given. paths = paths or ["."] for path in paths: if not os.path.exists(path): error_wcl(_("@info", "Path '%(path)s' does not exist.", path=path)) if not vcs.is_versioned(path): error_wcl(_("@info", "Path '%(path)s' is not under version control.", path=path)) # Collect and pair PO files in given paths. # Each pair specification is in the form of # ((path1, path2), (vpath1, vpath2)) # where path* are the real paths, and vpath* the visual paths to be # presented in diff output. if not vcs: fpairs = collect_file_pairs(paths[0], paths[1], op.paired_only) pspecs = [(x, x) for x in fpairs] else: lst = op.revision and op.revision.split(":", 1) or [] if len(lst) > 2: error_wcl(_("@info", "Too many revisions given: %(revlist)s.", revspec=format_item_list(lst))) elif len(lst) == 2: revs = lst # diff between revisions elif len(lst) == 1: revs = [lst[0], None] # diff from revision to working copy else: revs = ["", None] # diff from head to working copy # Replace original paths with modified/added catalogs. paths_nc = [] for path in paths: for path in vcs.to_commit(path): if path.endswith(".po") or path.endswith(".pot"): paths_nc.append(path) paths = paths_nc paths.sort() pspecs = collect_pspecs_from_vcs(vcs, paths, revs, op.paired_only) if not op.update_effort: ecat, ndiffed = diff_pairs(pspecs, op.do_merge, colorize=(not op.output), shdr=op.strip_headers, noobs=op.skip_obsolete, quiet=op.quiet) if ndiffed > 0: hmsgctxt = ecat.header.get_field_value(EDST.hmsgctxt_field) lines = [] msgs = list(ecat) if not op.strip_headers: msgs.insert(0, ecat.header.to_msg()) for msg in msgs: if op.strip_headers and msg.msgctxt == hmsgctxt: sepl = [] sepl += [msg.manual_comment[0]] sepl += msg.msgid.split("\n")[:2] lines.extend(["# %s\n" % x for x in sepl]) lines.append("\n") else: lines.extend(msg.to_lines(force=True, wrapf=ecat.wrapf())) diffstr = cjoin(lines)[:-1] # remove last newline if op.output: file = open(op.output, "w") file.write(diffstr.encode(ecat.encoding())) file.close() else: report(diffstr) else: updeff = pairs_update_effort(pspecs, quiet=op.quiet) ls = [] for kw, desc, val, fmtval in updeff: ls.append(_("@info", "%(quantity)s: %(value)s", quantity=desc, value=fmtval)) report("\n".join(ls)) # Clean up. cleanup_tmppaths() def diff_pairs (pspecs, merge, colorize=False, wrem=True, wadd=True, shdr=False, noobs=False, quiet=False): # Create diffs of messages. # Note: Headers will be collected and diffed after all messages, # to be able to check if any decoration to their message keys is needed. wrappings = {} ecat = Catalog("", create=True, monitored=False) hspecs = [] ndiffed = 0 update_progress = None if len(pspecs) > 1 and not quiet: update_progress = init_file_progress([vp[1] for fp, vp in pspecs], addfmt=t_("@info:progress", "Diffing: %(file)s")) for fpaths, vpaths in pspecs: upprogf = None if update_progress: upprogf = lambda: update_progress(vpaths[1]) upprogf() # Quick check if files are binary equal. if fpaths[0] and fpaths[1] and filecmp.cmp(*fpaths): continue cats = [] for fpath in fpaths: try: cats.append(Catalog(fpath, create=True, monitored=False)) except: error_wcl(_("@info", "Cannot parse catalog '%(file)s'.", file=fpath), norem=[fpath]) tpos = len(ecat) cndiffed = diff_cats(cats[0], cats[1], ecat, merge, colorize, wrem, wadd, noobs, upprogf) hspecs.append(([not x.created() and x.header or None for x in cats], vpaths, tpos, cndiffed)) ndiffed += cndiffed # Collect and count wrapping policy used for to-catalog. wrapping = cats[1].wrapping() if wrapping not in wrappings: wrappings[wrapping] = 0 wrappings[wrapping] += 1 if update_progress: update_progress() # Find appropriate length of context for header messages. hmsgctxt = get_msgctxt_for_headers(ecat) init_ediff_header(ecat.header, hmsgctxt=hmsgctxt) # Create diffs of headers. # If some of the messages were diffed, # header must be added even if there is no difference. incpos = 0 for hdrs, vpaths, pos, cndiffed in hspecs: ehmsg, anydiff = diff_hdrs(hdrs[0], hdrs[1], vpaths[0], vpaths[1], hmsgctxt, ecat, colorize) if anydiff or cndiffed: ecat.add(ehmsg, pos + incpos) incpos += 1 # Add diffed headers to total count only if header stripping not in effect. if not shdr: ndiffed += incpos # Set the most used wrapping policy for the ediff catalog. if wrappings: wrapping = sorted(wrappings.items(), key=lambda x: x[1])[-1][0] ecat.set_wrapping(wrapping) if wrapping is not None: ecat.header.set_field(u"X-Wrapping", u", ".join(wrapping)) return ecat, ndiffed # Collect and pair catalogs as list [(fpath1, fpath2)]. # Where a pair cannot be found, empty string is given for path # (unless paired_only is True, when non-paired catalogs are ignored). def collect_file_pairs (dpath1, dpath2, paired_only): if os.path.isfile(dpath1): return [(dpath1, dpath2)] bysub1, bysub2 = map(collect_and_split_fpaths, (dpath1, dpath2)) # Try to pair files by subdirectories. # FIXME: Can and should anything smarter be done? fpairs = [] subdirs = list(set(bysub1.keys() + bysub2.keys())) subdirs.sort() for subdir in subdirs: flinks1 = bysub1.get(subdir, {}) flinks2 = bysub2.get(subdir, {}) filenames = list(set(flinks1.keys() + flinks2.keys())) filenames.sort() for filename in filenames: fpath1 = flinks1.get(filename, "") fpath2 = flinks2.get(filename, "") if not paired_only or (fpath1 and fpath2): fpairs.append((fpath1, fpath2)) return fpairs # Collect all catalog paths in given root, and construct mapping # {subdir: {filename: path}}, where subdir is relative to root. def collect_and_split_fpaths (dpath): dpath = dpath.rstrip(os.path.sep) + os.path.sep fpaths = collect_catalogs(dpath) bysub = {} for fpath in fpaths: if not fpath.startswith(dpath): error_wcl(_("@info", "Internal problem with path collection (200).")) subdir = os.path.dirname(fpath[len(dpath):]) if subdir not in bysub: bysub[subdir] = {} bysub[subdir][os.path.basename(fpath)] = fpath return bysub def collect_pspecs_from_vcs (vcs, paths, revs, paired_only): pspecs = [] # FIXME: Use tempfile module. expref = "/tmp/poediff-export-" exind = 0 for path in paths: expaths = {} for rev in revs: if rev is None: expaths[rev] = path else: expath = expref + "%d-%d-%s" % (os.getpid(), exind, rev) exind += 1 if os.path.isfile(path): expath += ".po" if not vcs.export(path, rev or None, expath): error_wcl(_("@info", "Cannot export path '%(path)s' " "in revision '%(rev)s'.", path=path, rev=rev)) record_tmppath(expath) expaths[rev] = expath expaths = [os.path.normpath(expaths[x]) for x in revs] fpairs = collect_file_pairs(expaths[0], expaths[1], paired_only) for fpair in fpairs: fpaths = [] vpaths = [] for fpath, expath, rev in zip(fpair, expaths, revs): if rev is not None: if not fpath: fpath_m = "" elif os.path.isdir(path): fpath_m = fpath[len(expath) + len(os.path.sep):] fpath_m = os.path.join(path, fpath_m) else: fpath_m = path rev_m = rev or vcs.revision(path) vpath = fpath_m + EDST.filerev_sep + rev_m else: vpath = fpath fpaths.append(fpath) vpaths.append(vpath) pspecs.append((fpaths, vpaths)) return pspecs def pairs_update_effort (pspecs, quiet=False): update_progress = None if len(pspecs) > 1 and not quiet: update_progress = init_file_progress([vp[1] for fp, vp in pspecs], addfmt=t_("@info:progress", "Diffing: %(file)s")) nntw_total = 0.0 for fpaths, vpaths in pspecs: upprogf = None if update_progress: upprogf = lambda: update_progress(vpaths[1]) upprogf() # Quick check if files are binary equal. if fpaths[0] and fpaths[1] and filecmp.cmp(*fpaths): continue cats = [] for fpath in fpaths: try: cats.append(Catalog(fpath, create=True, monitored=False)) except: error_wcl(_("@info", "Cannot parse catalog '%(file)s'.", file=fpath), norem=[fpath]) nntw = cats_update_effort(cats[0], cats[1], upprogf) nntw_total += nntw if update_progress: update_progress() updeff = [ ("nntw", _("@item", "nominal newly translated words"), nntw_total, "%.0f" % nntw_total), ] return updeff # Cleanup of temporary paths. _tmppaths = set() def record_tmppath (path): _tmppaths.add(path) def cleanup_tmppaths (norem=set()): for path in _tmppaths: if path in norem: continue if os.path.isfile(path): os.unlink(path) elif os.path.isdir(path): shutil.rmtree(path) def error_wcl (msg, norem=set()): if not isinstance(norem, set): norem = set(norem) cleanup_tmppaths(norem) error(msg) if __name__ == '__main__': exit_on_exception(main, cleanup_tmppaths) diff --git a/scripts/poepatch.py b/scripts/poepatch.py index 59da4207..1f0bef93 100755 --- a/scripts/poepatch.py +++ b/scripts/poepatch.py @@ -1,824 +1,824 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 # -*- coding: UTF-8 -*- """ Patch PO files from an embedded diff. Documented in C{doc/user/diffpatch.docbook#sec-dpdiff}. @author: Chusslove Illich (Часлав Илић) @license: GPLv3 """ try: import fallback_import_paths except: pass import sys import os import locale import re from tempfile import NamedTemporaryFile from pology import version, _, n_ from pology.colors import ColorOptionParser from pology.report import error, warning, report from pology.msgreport import error_on_msg, warning_on_msg import pology.config as pology_config from pology.fsops import str_to_unicode, mkdirpath, collect_catalogs from pology.fsops import exit_on_exception from pology.catalog import Catalog from pology.message import Message, MessageUnsafe from pology.header import Header from pology.diff import msg_ediff, msg_ediff_to_new, msg_ediff_to_old from pology.internal.poediffpatch import MPC, EDST from pology.internal.poediffpatch import msg_eq_fields, msg_copy_fields from pology.internal.poediffpatch import msg_clear_prev_fields from pology.internal.poediffpatch import diff_cats from pology.internal.poediffpatch import init_ediff_header from pology.internal.poediffpatch import get_msgctxt_for_headers _flag_ediff = u"ediff" _flag_ediff_to_cur = u"%s-to-cur" % _flag_ediff _flag_ediff_to_new = u"%s-to-new" % _flag_ediff _flag_ediff_no_match = u"%s-no-match" % _flag_ediff _flags_all = ( _flag_ediff, _flag_ediff_to_cur, _flag_ediff_to_new, _flag_ediff_no_match, ) def main (): locale.setlocale(locale.LC_ALL, "") # Get defaults for command line options from global config. cfgsec = pology_config.section("poepatch") def_do_merge = cfgsec.boolean("merge", True) # Setup options and parse the command line. usage = _("@info command usage", "%(cmd)s [OPTIONS] [OPTIONS] < EDIFF\n" "%(cmd)s -u [OPTIONS] PATHS...", cmd="%prog") desc = _("@info command description", "Apply embedded diff of PO files as patch.") ver = _("@info command version", u"%(cmd)s (Pology) %(version)s\n" u"Copyright © 2009, 2010 " u"Chusslove Illich (Часлав Илић) <%(email)s>", cmd="%prog", version=version(), email="caslav.ilic@gmx.net") opars = ColorOptionParser(usage=usage, description=desc, version=ver) opars.add_option( "-a", "--aggressive", action="store_true", dest="aggressive", default=False, help=_("@info command line option description", "Apply every message to its paired message in the target file, " "irrespective of whether its non-pairing parts match too.")) opars.add_option( "-d", "--directory", metavar=_("@info command line value placeholder", "DIR"), dest="directory", help=_("@info command line option description", "Prepend this directory path to any resolved target file path.")) opars.add_option( "-e", "--embed", action="store_true", dest="embed", default=False, help=_("@info command line option description", "Instead of applying resolved newer version of the message, " "add the full embedded diff into the target file.")) opars.add_option( "-i", "--input", metavar=_("@info command line value placeholder", "FILE"), dest="input", help=_("@info command line option description", "Read the patch from the given file instead of standard input.")) opars.add_option( "-n", "--no-merge", action="store_false", dest="do_merge", default=def_do_merge, help=_("@info command line option description", "Do not try to indirectly pair messages by merging catalogs.")) opars.add_option( "-p", "--strip", metavar=_("@info command line value placeholder", "NUM"), dest="strip", help=_("@info command line option description", "Strip the smallest prefix containing NUM leading slashes from " "each file name found in the ediff file (like in patch(1)). " "If not given, only the base name of each file is taken.")) opars.add_option( "-u", "--unembed", action="store_true", dest="unembed", default=False, help=_("@info command line option description", "Instead of applying a patch, resolve all embedded differences " "in given paths to newer versions of messages.")) (op, free_args) = opars.parse_args(str_to_unicode(sys.argv[1:])) # Could use some speedup. try: import psyco psyco.full() except ImportError: pass if not op.unembed: if free_args: error(_("@info", "Too many arguments in command line: %(argspec)s", argspec=" ".join(free_args))) if op.strip and not op.strip.isdigit(): error(_("@info", "Option %(opt)s expects a positive integer value.", opt="--strip")) apply_ediff(op) else: paths = [] for path in free_args: if not os.path.exists(path): warning(_("@info", "Path '%(path)s' does not exist.", path=path)) if os.path.isdir(path): paths.extend(collect_catalogs(path)) else: paths.append(path) for path in paths: unembed_ediff(path) def apply_ediff (op): # Read the ediff PO. dummy_stream_path = "" if op.input: if not os.path.isfile(op.input): error(_("@info", "Path '%(path)s' is not a file or does not exist.", path=op.input)) edfpath = op.input readfh = None else: edfpath = dummy_stream_path readfh = sys.stdin try: ecat = Catalog(edfpath, monitored=False, readfh=readfh) except: error(_("@info ediff is shorthand for \"embedded difference\"", "Error reading ediff '%(file)s'.", file=edfpath)) # Split ediff by diffed catalog into original and new file paths, # header message, and ordinary messages. hmsgctxt = ecat.header.get_field_value(EDST.hmsgctxt_field) if hmsgctxt is None: error(_("@info", "Header field '%(field)s' is missing in the ediff.", field=EDST.hmsgctxt_field)) edsplits = [] cehmsg = None smsgid = u"\x00" ecat.add_last(MessageUnsafe(dict(msgctxt=hmsgctxt, msgid=smsgid))) # sentry for emsg in ecat: if emsg.msgctxt == hmsgctxt: if cehmsg: # Record previous section. edsplits.append((fpaths, cehmsg, cemsgs)) if emsg.msgid == smsgid: # end sentry, avoid parsing below break # Mine original and new file paths out of header. fpaths = [] for fpath in emsg.msgid.split("\n")[:2]: # Strip leading "+ "/"- " fpath = fpath[2:] # Convert to planform path separators. fpath = re.sub(r"/+", os.path.sep, fpath) # Remove revision indicator. p = fpath.find(EDST.filerev_sep) if p >= 0: fpath = fpath[:p] # Strip path and append directory as requested. if op.strip: preflen = int(op.strip) lst = fpath.split(os.path.sep, preflen) if preflen + 1 == len(lst): fpath = lst[preflen] else: fpath = os.path.basename(fpath) else: fpath = os.path.basename(fpath) if op.directory and fpath: fpath = os.path.join(op.directory, fpath) # All done. fpaths.append(fpath) cehmsg = emsg cemsgs = [] else: cemsgs.append(emsg) # Prepare catalog for rejects and merges. rcat = Catalog("", create=True, monitored=False, wrapping=ecat.wrapping()) init_ediff_header(rcat.header, hmsgctxt=hmsgctxt, extitle="rejects") # Apply diff to catalogs. for fpaths, ehmsg, emsgs in edsplits: # Open catalog for patching. fpath1, fpath2 = fpaths if fpath1: # Diff from an existing catalog, open it. if not os.path.isfile(fpath1): warning(_("@info", "Path '%(path)s' is not a file or does not exist, " "skipping it.", path=fpath1)) continue try: cat = Catalog(fpath1) except: warning(_("@info", "Error reading catalog '%(file)s', skipping it.", file=fpath1)) continue elif fpath2: # New catalog added in diff, create it (or open if it exists). try: mkdirpath(os.path.dirname(fpath2)) cat = Catalog(fpath2, create=True) if cat.created(): cat.set_wrapping(ecat.wrapping()) except: if os.path.isfile(fpath2): warning(_("@info", "Error reading catalog '%(file)s', skipping it.", file=fpath1)) else: warning(_("@info", "Cannot create catalog '%(file)s', skipping it.", file=fpath2)) continue else: error(_("@info", "Both catalogs in ediff indicated not to exist.")) # Do not try to patch catalog with embedded differences # (i.e. previously patched using -e). if cat.header.get_field_value(EDST.hmsgctxt_field) is not None: warning(_("@info", "Catalog '%(file)s' already contains " "embedded differences, skipping it.", file=cat.filename)) continue # Do not try to patch catalog if the patch contains # unresolved split differences. if reduce(lambda r, x: r or _flag_ediff_to_new in x.flag, emsgs, False): warning(_("@info", "Patch for catalog '%(file)s' contains unresolved " "split differences, skipping it.", file=cat.filename)) continue # Patch the catalog. rejected_ehmsg = patch_header(cat, ehmsg, ecat, op) rejected_emsgs_flags = patch_messages(cat, emsgs, ecat, op) any_rejected = rejected_ehmsg or rejected_emsgs_flags if fpath2 or any_rejected: created = cat.created() if cat.sync(): if not created: if any_rejected and op.embed: report(_("@info:progress E is for \"with embedding\"", "Partially patched (E): %(file)s", file=cat.filename)) elif any_rejected: report(_("@info:progress", "Partially patched: %(file)s", file=cat.filename)) elif op.embed: report(_("@info:progress E is for \"with embedding\"", "Patched (E): %(file)s", file=cat.filename)) else: report(_("@info:progress", "Patched: %(file)s", file=cat.filename)) else: if op.embed: report(_("@info:progress E is for \"with embedding\"", "Created (E): %(file)s", file=cat.filename)) else: report(_("@info:progress", "Created: %(file)s", file=cat.filename)) else: pass #report("unchanged: %s" % cat.filename) else: os.unlink(fpath1) report(_("@info:progress", "Removed: %(file)s", file=fpath1)) # If there were any rejects and reembedding is not in effect, # record the necessary to present them. if any_rejected and not op.embed: if not rejected_ehmsg: # Clean header diff. ehmsg.manual_comment = ehmsg.manual_comment[:1] ehmsg.msgstr[0] = u"" rcat.add_last(ehmsg) for emsg, flag in rejected_emsgs_flags: # Reembed to avoid any conflicts. msg1, msg2, msg1_s, msg2_s = resolve_diff_pair(emsg) emsg = msg_ediff(msg1_s, msg2_s, emsg=msg2_s, ecat=rcat, enoctxt=hmsgctxt) if flag: emsg.flag.add(flag) rcat.add_last(emsg) # If there were any rejects, write them out. if len(rcat) > 0: # Construct paths for embedded diffs of rejects. rsuff = "rej" if ecat.filename != dummy_stream_path: rpath = ecat.filename p = rpath.rfind(".") if p < 0: p = len(rpath) rpath = rpath[:p] + (".%s" % rsuff) + rpath[p:] else: rpath = "stdin.%s.po" % rsuff rcat.filename = rpath rcat.sync(force=True, noobsend=True) report(_("@info:progress file to which rejected parts of the patch " "have been written to", "*** Rejects: %(file)s", file=rcat.filename)) # Patch application types. _pt_merge, _pt_insert, _pt_remove = range(3) def patch_messages (cat, emsgs, ecat, options): # It may happen that a single message from original catalog # is paired with more than one from the diff # (e.g. single old translated message going into two new fuzzy). # Therefore paired messages must be tracked, to know if patched # message can be merged into the existing, or it must be inserted. pmsgkeys = set() # Triplets for splitting directly unapplicable patches into two. # Delay building of triplets until needed for the first time. striplets_pack = [None] def striplets (): if striplets_pack[0] is None: striplets_pack[0] = build_splitting_triplets(emsgs, cat, options) return striplets_pack[0] # Check whether diffs apply, and where and how if they do. rejected_emsgs_flags = [] patch_specs = [] for emsg in emsgs: pspecs = msg_apply_diff(cat, emsg, ecat, pmsgkeys, striplets) for pspec in pspecs: emsg_m, flag = pspec[:2] if flag == _flag_ediff or options.embed: patch_specs.append(pspec) if flag != _flag_ediff: rejected_emsgs_flags.append((emsg_m, flag)) # Sort accepted patches by position of application. patch_specs.sort(key=lambda x: x[3]) # Add accepted patches to catalog. incpos = 0 for emsg, flag, typ, pos, msg1, msg2, msg1_s, msg2_s in patch_specs: if pos is not None: pos += incpos if options.embed: # Embedded diff may conflict one of the messages in catalog. # Make a new diff of special messages, # and embed them either into existing message in catalog, # or into new message. if typ == _pt_merge: tmsg = cat[pos] tpos = pos else: tmsg = MessageUnsafe(msg2 or {}) tpos = None emsg = msg_ediff(msg1_s, msg2_s, emsg=tmsg, ecat=cat, eokpos=tpos) if 0:pass elif typ == _pt_merge: if not options.embed: cat[pos].set_inv(msg2) else: cat[pos].flag.add(flag) elif typ == _pt_insert: if not options.embed: cat.add(Message(msg2), pos) else: cat.add(Message(emsg), pos) cat[pos].flag.add(flag) incpos += 1 elif typ == _pt_remove: if pos is None: continue if not options.embed: cat.remove(pos) incpos -= 1 else: cat[pos].flag.add(flag) else: error_on_msg(_("@info", "Unknown patch type %(type)s.", type=typ), emsg, ecat) return rejected_emsgs_flags def msg_apply_diff (cat, emsg, ecat, pmsgkeys, striplets): msg1, msg2, msg1_s, msg2_s = resolve_diff_pair(emsg) # Try to select existing message from the original messages. # Order is important, should try first new, then old # (e.g. if an old fuzzy was resolved to new after diff was made). msg = None if msg2 and msg2 in cat: msg = cat[msg2] elif msg1 and msg1 in cat: msg = cat[msg1] patch_specs = [] # Try to apply the patch. if msg_patchable(msg, msg1, msg2): # Patch can be directly applied. if msg1 and msg2: if msg.key not in pmsgkeys: typ = _pt_merge pos = cat.find(msg) pmsgkeys.add(msg.key) else: typ = _pt_insert pos, weight = cat.insertion_inquiry(msg2) elif msg2: # patch adds a message if msg: typ = _pt_merge pos = cat.find(msg) pmsgkeys.add(msg.key) else: typ = _pt_insert pos, weight = cat.insertion_inquiry(msg2) elif msg1: # patch removes a message if msg: typ = _pt_remove pos = cat.find(msg) pmsgkeys.add(msg.key) else: typ = _pt_remove pos = None # no position to remove from else: # Cannot happen. error_on_msg(_("@info", "Neither the old nor the new message " "in the diff is indicated to exist."), emsg, ecat) patch_specs.append((emsg, _flag_ediff, typ, pos, msg1, msg2, msg1_s, msg2_s)) else: # Patch cannot be applied directly, # try to split into old-to-current and current-to-new diffs. split_found = False if callable(striplets): striplets = striplets() # delayed creation of splitting triplets for i in range(len(striplets)): m1_t, m1_ts, m2_t, m2_ts, m_t, m_ts1, m_ts2 = striplets[i] if msg1.inv == m1_t.inv and msg2.inv == m2_t.inv: striplets.pop(i) # remove to not slow further searches split_found = True break if split_found: # Construct new corresponding diffs. em_1c = msg_ediff(m1_ts, m_ts1, emsg=MessageUnsafe(m_t)) em_c2 = msg_ediff(m_ts2, m2_ts, emsg=MessageUnsafe(m2_t)) # Current-to-new can be merged or inserted, # and old-to-current is then inserted just before it. if m_t.key not in pmsgkeys: typ = _pt_merge pos = cat.find(m_t) pmsgkeys.add(m_t.key) else: typ = _pt_insert pos, weight = cat.insertion_inquiry(m2_t) # Order of adding patch specs here important for rejects file. patch_specs.append((em_1c, _flag_ediff_to_cur, _pt_insert, pos, m1_t, m_t, m1_ts, m_ts1)) patch_specs.append((em_c2, _flag_ediff_to_new, typ, pos, m_t, m2_t, m_ts2, m2_ts)) # The patch is totally rejected. # Will be inserted if reembedding requested, so compute insertion. if not patch_specs: typ = _pt_insert if msg2 is not None: pos, weight = cat.insertion_inquiry(msg2) else: pos = len(cat) patch_specs.append((emsg, _flag_ediff_no_match, typ, pos, msg1, msg2, msg1_s, msg2_s)) return patch_specs def msg_patchable (msg, msg1, msg2): # Check for cases where current message does not match old or new, # but there is a transformation that can also be cleanly merged. msg_m = msg if 0: pass # Old and new are translated, but current is fuzzy and has previous fields. # Transform current to its previous state, from which it may have became # fuzzy by merging with templates. elif ( msg and msg.fuzzy and msg.key_previous is not None and msg1 and not msg1.fuzzy and msg2 and not msg2.fuzzy ): msg_m = MessageUnsafe(msg) msg_copy_fields(msg, msg_m, MPC.prevcurr_fields) msg_clear_prev_fields(msg_m) msg_m.fuzzy = False # Old is None, new is translated, and current is untranslated. # Add translation of new to current, since it may have been added as # untranslated after merging with templates. elif msg and msg.untranslated and not msg1 and msg2 and msg2.translated: msg_m = MessageUnsafe(msg) msg_copy_fields(msg2, msg_m, ["msgstr"]) if msg1 and msg2: return msg and msg_m.inv in (msg1.inv, msg2.inv) elif msg2: return not msg or msg_m.inv == msg2.inv elif msg1: return not msg or msg_m.inv == msg1.inv else: return not msg def resolve_diff_pair (emsg): # Recover old and new message according to diff. # Resolve into copies of ediff message, to preserve non-inv parts. emsg1 = MessageUnsafe(emsg) msg1_s = msg_ediff_to_old(emsg1, rmsg=emsg1) emsg2 = MessageUnsafe(emsg) msg2_s = msg_ediff_to_new(emsg2, rmsg=emsg2) # Resolve any special pairings. msg1, msg2 = msg1_s, msg2_s if not msg1_s or not msg2_s: # No special cases if either message non-existant. pass # Cases f-nf-*. elif msg1_s.fuzzy and not msg2_s.fuzzy: # Case f-nf-ecc. if ( msg2_s.key_previous is None and not msg_eq_fields(msg1_s, msg2_s, MPC.curr_fields) ): msg1 = MessageUnsafe(msg1_s) msg_copy_fields(msg1_s, msg1, MPC.currprev_fields) msg_copy_fields(msg2_s, msg1, MPC.curr_fields) # Case f-nf-necc. elif msg2_s.key_previous is not None: msg1 = MessageUnsafe(msg1_s) msg2 = MessageUnsafe(msg2_s) msg_copy_fields(msg2_s, msg1, MPC.prevcurr_fields) msg_clear_prev_fields(msg2) # Cases nf-f-*. elif not msg1_s.fuzzy and msg2_s.fuzzy: # Case nf-f-ecp. if ( msg1_s.key_previous is None and not msg_eq_fields(msg1_s, msg2_s, MPC.curr_fields) ): msg2 = MessageUnsafe(msg2_s) msg_copy_fields(msg1_s, msg2, MPC.currprev_fields) # Case nf-f-necp. elif msg1_s.key_previous is not None: msg1 = MessageUnsafe(msg1_s) msg2 = MessageUnsafe(msg2_s) msg_copy_fields(msg1_s, msg2, MPC.prev_fields) msg_clear_prev_fields(msg1) return msg1, msg2, msg1_s, msg2_s def build_splitting_triplets (emsgs, cat, options): # Create catalogs of old and new messages. cat1 = Catalog("", create=True, monitored=False) cat2 = Catalog("", create=True, monitored=False) for emsg in emsgs: msg1, msg2, msg1_s, msg2_s = resolve_diff_pair(emsg) if msg1: cat1.add_last(msg1) if msg2: cat2.add_last(msg2) # Make headers same, to avoid any diffs there. cat1.header = cat.header cat2.header = cat.header # Write created catalogs to disk if # msgmerge may be used on files during diffing. if options.do_merge: tmpfs = [] # to avoid garbage collection until the function returns for tcat, tsuff in ((cat1, "1"), (cat2, "2")): tmpf = NamedTemporaryFile(prefix="poepatch-split-%s-" % tsuff, suffix=".po") tmpfs.append(tmpf) tcat.filename = tmpf.name tcat.sync(force=True) # Create the old-to-current and current-to-new diffs. ecat_1c = Catalog("", create=True, monitored=False) diff_cats(cat1, cat, ecat_1c, options.do_merge, wadd=False, wrem=False) ecat_c2 = Catalog("", create=True, monitored=False) diff_cats(cat, cat2, ecat_c2, options.do_merge, wadd=False, wrem=False) # Mine splitting triplets out of diffs. sdoublets_1c = {} for emsg in ecat_1c: m1_t, m_t, m1_ts, m_ts1 = resolve_diff_pair(emsg) sdoublets_1c[m_t.key] = [m1_t, m1_ts, m_t, m_ts1] sdoublets_c2 = {} for emsg in ecat_c2: m_t, m2_t, m_ts2, m2_ts = resolve_diff_pair(emsg) sdoublets_c2[m_t.key] = [m_t, m_ts2, m2_t, m2_ts] common_keys = set(sdoublets_1c).intersection(sdoublets_c2) striplets = [] for key in common_keys: m1_t, m1_ts, m_t, m_ts1 = sdoublets_1c[key] m_t, m_ts2, m2_t, m2_ts = sdoublets_c2[key] striplets.append((m1_t, m1_ts, m2_t, m2_ts, m_t, m_ts1, m_ts2)) return striplets def patch_header (cat, ehmsg, ecat, options): if not ehmsg.msgstr[0]: # no header diff, only metadata return None ehmsg_clean = clear_header_metadata(ehmsg) # Create reduced headers. hmsg1 = msg_ediff_to_old(ehmsg_clean) hmsg2 = msg_ediff_to_new(ehmsg_clean) hmsg = not cat.created() and cat.header.to_msg() or None hdrs = [] for m in (hmsg, hmsg1, hmsg2): h = m is not None and reduce_header_fields(Header(m)) or None hdrs.append(h) rhdr, rhdr1, rhdr2 = hdrs # Decide if the header can be cleanly patched. clean = False if not rhdr: clean = rhdr1 or rhdr2 else: clean = (rhdr1 and rhdr == rhdr1) or (rhdr2 and rhdr == rhdr2) if clean: if not options.embed: if hmsg2: cat.header = Header(hmsg2) else: # Catalog will be removed if no messages are rejected, # and otherwise the header should stay as-is. pass else: if cat.created(): cat.header = Header(hmsg2) ehmsg = MessageUnsafe(ehmsg) ehmsg.flag.add(_flag_ediff) hmsgctxt = get_msgctxt_for_headers(cat) ehmsg.msgctxt = hmsgctxt cat.header.set_field(EDST.hmsgctxt_field, hmsgctxt) cat.add(Message(ehmsg), 0) return None else: return ehmsg # Clear header diff message of metadata. # A copy of the message is returned. def clear_header_metadata (ehmsg): ehmsg = MessageUnsafe(ehmsg) ehmsg.manual_comment.pop(0) ehmsg.msgctxt = None ehmsg.msgid = u"" return ehmsg # Remove known unimportant fields from the header, # to ignore them on comparisons. def reduce_header_fields (hdr): rhdr = Header(hdr) for field in ( "POT-Creation-Date", "PO-Revision-Date", "Last-Translator", "X-Generator", ): rhdr.remove_field(field) return rhdr def unembed_ediff (path, all=False, old=False): try: cat = Catalog(path) except: warning(_("@info", "Error reading catalog '%(file)s', skipping it.", file=path)) return hmsgctxt = cat.header.get_field_value(EDST.hmsgctxt_field) if hmsgctxt is not None: cat.header.remove_field(EDST.hmsgctxt_field) uehmsg = None unembedded = {} for msg in cat: ediff_flag = None for flag in _flags_all: if flag in msg.flag: ediff_flag = flag msg.flag.remove(flag) if not ediff_flag and not all: continue if ediff_flag in (_flag_ediff_no_match, _flag_ediff_to_new): # Throw away fully rejected embeddings, i.e. reject the patch. # For split-difference embeddings, throw away the current-to-new; # this effectively rejects the patch, which is safest thing to do. cat.remove_on_sync(msg) elif hmsgctxt is not None and msg.msgctxt == hmsgctxt: if uehmsg: warning_on_msg(_("@info", "Unembedding results in duplicate header, " "previous header at %(line)d(#%(entry)d); " "skipping it.", line=uehmsg.refline, entry=uehmsg.refentry), msg, cat) return msg_ediff_to_x = not old and msg_ediff_to_new or msg_ediff_to_old hmsg = msg_ediff_to_x(clear_header_metadata(msg)) if hmsg.msgstr and hmsg.msgstr[0]: cat.header = Header(hmsg) cat.remove_on_sync(msg) uehmsg = msg else: msg1, msg2, msg1_s, msg2_s = resolve_diff_pair(msg) tmsg = (not old and (msg2,) or (msg1,))[0] if tmsg is not None: if tmsg.key in unembedded: msg_p = unembedded[tmsg.key] warning_on_msg(_("@info", "Unembedding results in " "duplicate message, previous message " "at %(line)d(#%(entry)d); skipping it.", line=msg_p.refline, entry=msg_p.refentry), msg, cat) return msg.set(Message(msg2)) unembedded[tmsg.key] = msg else: cat.remove_on_sync(msg) if cat.sync(): report(_("@info:progress", "Unembedded: %(file)s", file=cat.filename)) if __name__ == '__main__': exit_on_exception(main) diff --git a/scripts/pomtrans.py b/scripts/pomtrans.py index ced81e3e..026c6c7e 100755 --- a/scripts/pomtrans.py +++ b/scripts/pomtrans.py @@ -1,528 +1,528 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 # -*- coding: UTF-8 -*- """ Perform machine translation of PO files. Documented in C{doc/user/lingo.docbook#sec-lgmtrans}. @author: Chusslove Illich (Часлав Илић) @license: GPLv3 """ try: import fallback_import_paths except: pass import locale import subprocess import sys import os from pology import datadir, version, _, n_ from pology.catalog import Catalog from pology.colors import ColorOptionParser import pology.config as pology_config from pology.entities import read_entities from pology.fsops import collect_catalogs, collect_system from pology.fsops import str_to_unicode from pology.fsops import exit_on_exception from pology.message import MessageUnsafe from pology.remove import remove_accel_msg from pology.report import report, error, warning from pology.resolve import resolve_entities_simple def main (): locale.setlocale(locale.LC_ALL, "") # Get defaults for command line options from global config. cfgsec = pology_config.section("pomtrans") showservs = list() showservs.sort() # Setup options and parse the command line. usage = _("@info command usage", "%(cmd)s [OPTIONS] TRANSERV PATHS...", cmd="%prog") desc = _("@info command description", "Perform machine translation of PO files.") ver = _("@info command version", u"%(cmd)s (Pology) %(version)s\n" u"Copyright © 2009, 2010 " u"Chusslove Illich (Часлав Илић) <%(email)s>", cmd="%prog", version=version(), email="caslav.ilic@gmx.net") opars = ColorOptionParser(usage=usage, description=desc, version=ver) opars.add_option( "-a", "--accelerator", dest="accel", metavar=_("@info command line value placeholder", "CHAR"), help=_("@info command line option description", "Accelerator marker character used in messages. " "Detected from catalogs if not given.")) opars.add_option( "-c", "--parallel-compendium", dest="parcomp", metavar=_("@info command line value placeholder", "FILE"), help=_("@info command line option description", "Translate from translation to another language, " "found in compendium file at the given path.")) opars.add_option( "-l", "--list-transervs", action="store_true", dest="list_transervs", default=False, help="List available translation services.") opars.add_option( "-m", "--flag-%s" % _flag_mtrans, action="store_true", dest="flag_mtrans", default=False, help=_("@info command line option description", "Add '%(flag)s' flag to translated messages.", flag=_flag_mtrans)) opars.add_option( "-M", "--translation-mode", dest="tmode", metavar=_("@info command line value placeholder", "MODE"), help=_("@info command line option description", "Translation mode for the chosen translation service. " "Overrides the default translation mode constructed " "based on source and target language. " "Mode string format is translation service dependent.")) opars.add_option( "-n", "--no-fuzzy-flag", action="store_false", dest="flag_fuzzy", default=True, help=_("@info command line option description", "Do not add '%(flag)s' flag to translated messages.", flag="fuzzy")) opars.add_option( "-p", "--parallel-catalogs", dest="parcats", metavar=_("@info command line value placeholder", "SEARCH:REPLACE"), help=_("@info command line option description", "Translate from translation to another language " "found in parallel catalogs. " "For given target catalog path, the path to parallel catalog " "is constructed by replacing once SEARCH with REPLACE.")) opars.add_option( "-s", "--source-lang", dest="slang", metavar=_("@info command line value placeholder", "LANG"), help=_("@info command line option description", "Source language code. " "Detected from catalogs if not given.")) opars.add_option( "-t", "--target-lang", dest="tlang", metavar=_("@info command line value placeholder", "LANG"), help=_("@info command line option description", "Target language code. " "Detected from catalogs if not given.")) opars.add_option( "-T", "--transerv-bin", dest="transerv_bin", metavar=_("@info command line value placeholder", "PATH"), help=_("@info command line option description", "Custom path to translation service executable " "(where applicable).")) opars.add_option( "-d", "--data-directory", dest="data_directory", metavar=_("@info command line value placeholder", "FOLDER"), help=_("@info command line option description", "Custom path to a translation data directory (where applicable).")) (op, free_args) = opars.parse_args(str_to_unicode(sys.argv[1:])) # Could use some speedup. try: import psyco psyco.full() except ImportError: pass if op.list_transervs: report("\n".join(sorted(_known_transervs.keys()))) sys.exit(0) if len(free_args) < 1: error(_("@info", "Translation service not specified.")) transervkey = free_args.pop(0) if transervkey not in _known_transervs: error(_("@info", "Translation service '%(serv)s' not known.", serv=transervkey)) tsbuilder_wopts = _known_transervs[transervkey] tsbuilder = lambda slang, tlang: tsbuilder_wopts(slang, tlang, op) paths = free_args if not op.parcomp and not op.parcats: translate_direct(paths, tsbuilder, op) else: translate_parallel(paths, tsbuilder, op) def translate_direct (paths, tsbuilder, options): transervs = {} catpaths = collect_catalogs(paths) for catpath in catpaths: # Collect messages and texts to translate. cat = Catalog(catpath) if options.accel is not None: # force explicitly given accelerator cat.set_accelerator(options.accel) texts = [] msgs = [] for msg in cat: if to_translate(msg, options): msgf = MessageUnsafe(msg) remove_accel_msg(msgf, cat) texts.append(msgf.msgid) if msg.msgid_plural is not None: texts.append(msgf.msgid_plural) msgs.append(msg) # Translate collected texts. slang = options.slang or "en" transerv = get_transerv(slang, options.tlang, cat, cat, tsbuilder) texts_tr = transerv.translate(texts) if texts else [] if texts_tr is None: warning(_("@info", "Translation service failure on '%(file)s'.", file=catpath)) continue for i, text in enumerate(texts_tr): text = reduce_for_encoding(text, cat.encoding()) texts_tr[i] = text # Put translated texts into messages. singlepls = cat.plural_indices_single() for msg in msgs: msgid_tr = texts_tr.pop(0) if msg.msgid_plural is not None: msgid_plural_tr = texts_tr.pop(0) if msgid_tr: if msg.msgid_plural is not None: for i in range(len(msg.msgstr)): if i in singlepls: msg.msgstr[i] = msgid_tr else: msg.msgstr[i] = msgid_plural_tr else: msg.msgstr[0] = msgid_tr decorate(msg, options) sync_rep(cat, msgs) def translate_parallel (paths, tsbuilder, options): pathrepl = options.parcats comppath = options.parcomp slang = options.slang tlang = options.tlang ccat = None if comppath is not None: if not os.path.isfile(comppath): error(_("@info", "Compendium '%(file)s' does not exist.", file=comppath)) ccat = Catalog(comppath, monitored=False) if pathrepl is not None: lst = pathrepl.split(":") if len(lst) != 2: error(_("@info", "Invalid search and replace specification '%(spec)s'.", spec=pathrepl)) pathsrch, pathrepl = lst catpaths = collect_catalogs(paths) for catpath in catpaths: # Open parallel catalog if it exists. pcat = None if pathrepl is not None: pcatpath = catpath.replace(pathsrch, pathrepl, 1) if catpath == pcatpath: error(_("@info", "Parallel catalog and target catalog are same files " "for '%(file)s'.", file=catpath)) if os.path.isfile(pcatpath): pcat = Catalog(pcatpath, monitored=False) # If there is neither the parallel catalog nor the compendium, # skip processing current target catalog. if not pcat and not ccat: continue # Collect messages and texts to translate. cat = Catalog(catpath) pmsgs, psmsgs, ptexts = [], [], [] cmsgs, csmsgs, ctexts = [], [], [] for msg in cat: if to_translate(msg, options): # Priority: parallel catalog, then compendium. for scat, msgs, smsgs, texts in ( (pcat, pmsgs, psmsgs, ptexts), (ccat, cmsgs, csmsgs, ctexts), ): if scat and msg in scat: smsg = scat[msg] if smsg.translated: msgs.append(msg) smsgs.append(smsg) texts.extend(smsg.msgstr) break # Translate collected texts. texts_tr = [] for texts, scat in ((ptexts, pcat), (ctexts, ccat)): transerv = get_transerv(slang, tlang, scat, cat, tsbuilder) texts_tr.append(transerv.translate(texts) if texts else []) if texts_tr[-1] is None: texts_tr = None break if texts_tr is None: warning(_("@info", "Translation service failure on '%(file)s'.", file=catpath)) continue ptexts_tr, ctexts_tr = texts_tr # Put translated texts into messages. # For plural messages, assume 1-1 match to parallel language. for msgs, smsgs, texts in ( (pmsgs, psmsgs, ptexts_tr), (cmsgs, csmsgs, ctexts_tr), ): for msg, smsg in zip(msgs, smsgs): ctexts = [] for i in range(len(smsg.msgstr)): text = texts.pop(0) text = reduce_for_encoding(text, cat.encoding()) ctexts.append(text) for i in range(len(msg.msgstr)): msg.msgstr[i] = i < len(ctexts) and ctexts[i] or ctexts[-1] decorate(msg, options) sync_rep(cat, pmsgs + cmsgs) def to_translate (msg, options): return msg.untranslated _flag_mtrans = u"mtrans" def decorate (msg, options): msg.unfuzzy() # clear any previous fuzzy stuff if options.flag_fuzzy: msg.fuzzy = True if options.flag_mtrans: msg.flag.add(_flag_mtrans) # Cache of translation services by (source, target) language pair. _transervs = {} # Return translation service for (slang, tlang) pair. # If the service was not created yet, create it and cache it. # If slang or tlang are None, use target language of corresponding catalog. def get_transerv (slang, tlang, scat, tcat, tsbuilder): if not slang: slang = scat.header.get_field_value("Language") if not slang: error(_("@info", "Cannot determine language of source catalog '%(file)s'.", file=scat.filename)) if not tlang: tlang = tcat.header.get_field_value("Language") if not tlang: error(_("@info", "Cannot determine language of target catalog '%(file)s'.", file=tcat.filename)) trdir = (slang, tlang) if trdir not in _transervs: _transervs[trdir] = tsbuilder(slang, tlang) return _transervs[trdir] def sync_rep (cat, mmsgs): if cat.sync(): report("! %s (%s)" % (cat.filename, len(mmsgs))) def reduce_for_encoding (text, enc): while True: try: text.encode(enc) except UnicodeEncodeError, e: start, end = e[2], e[3] text = text[:start] + ("?" * (end - start)) + text[end:] finally: break return text # ---------------------------------------- # Apertium -- a free/open-source machine translation platform # http://www.apertium.org/ class Translator_apertium (object): def __init__ (self, slang, tlang, options): cmdpath = options.transerv_bin or "apertium" try: subprocess.call(cmdpath, stdout=subprocess.PIPE, stderr=subprocess.PIPE) except OSError: error(_("@info Apertium is machine translation software", "Apertium executable not found at '%(path)s'.", path=cmdpath)) if options.tmode is not None: mode = options.tmode else: mode = "%s-%s" % (slang, tlang) optional_parameters = u"" if options.data_directory: optional_parameters = u"-d %s" % options.data_directory self.cmdline = u"%s -u -f html-noent %s %s" % ( cmdpath, optional_parameters, mode) entpath = os.path.join(datadir(), "spec", "html.entities") self.htmlents = read_entities(entpath) def translate (self, texts): # Serialize texts to send to Apertium in one go. # Separate texts with an inplace tag followed by dot, # to have each text interpreted as standalone sentence. # FIXME: Any way to really translate each text in turn, # without it being horribly slow? sep0 = "
." sep = None nsep = 0 while not sep: # determine shortest acceptable separator sep = sep0 + sep1 * nsep + sep2 for text in texts: if sep in text: sep = None nsep += 1 break stext = sep.join(texts) # Translate empty string to test language pair. # Otherwise, if a lot of text is sent and language pair not good, # Apertium may just signal broken pipe. res = collect_system(self.cmdline, instr="") if res[2] != 0: warning(_("@info", "Executing Apertium failed:\n%(output)s", output=res[0])) # ...really res[0], error is output to stdout. Tsk. return None res = collect_system(self.cmdline, instr=stext) if res[2] != 0: warning(_("@info", "Executing Apertium failed:\n%(output)s", output=res[0])) # ...really res[0], error is output to stdout. Tsk. return None texts_tr = res[0].split(sep) if len(texts_tr) != len(texts): warning(_("@info", "Apertium reported wrong number of translations, " "%(num1)d instead of %(num2)d.", num1=len(texts_tr), num2=len(texts))) return None texts_tr = [resolve_entities_simple(x, self.htmlents) for x in texts_tr] return texts_tr # ---------------------------------------- # Google Translate # http://translate.google.com # Communication code derived from py-gtranslate library # http://code.google.com/p/py-gtranslate/ # Updated for v2.0 API by Víctor R. Rodríguez Domínguez # http://vrdominguez.es class Translator_google (object): def __init__ (self, slang, tlang, options): if options.tmode is not None: ( self.lang_in, self.lang_out ) = options.tmode.split('|') else: self.lang_in = slang self.lang_out = tlang self.apikey = pology_config.section("pomtrans").string("google-api-key") def translate (self, texts): import urllib try: import simplejson except: error(_("@info", "Python module '%(mod)s' not available. " "Try installing the '%(pkg)s' package.", mod="simplejson", pkg="python-simplejson")) baseurl = "https://www.googleapis.com/language/translate/v2" baseparams = (("key", self.apikey), ("source", self.lang_in), ("target", self.lang_out), ("target","json")) texts_tr = [] for text in texts: params = baseparams + (("q", text.encode("utf8")),) parfmt = "&".join(["%s=%s" % (p, urllib.quote_plus(v)) for p, v in params]) execurl = "%s?%s" % (baseurl, parfmt) try: res = simplejson.load(urllib.FancyURLopener().open(execurl)) text_tr = unicode(res["data"]["translations"][0]["translatedText"]) except: text_tr = u"" texts_tr.append(text_tr) return texts_tr # ---------------------------------------- # Collect defined translation services by name. _known_transervs = {} def _init (): tspref = "Translator_" for locvar, locval in globals().items(): if locvar.startswith(tspref): _known_transervs[locvar[len(tspref):]] = locval _init() if __name__ == '__main__': exit_on_exception(main) diff --git a/scripts/porewrap.py b/scripts/porewrap.py index 0bbe56b7..1ae9005a 100755 --- a/scripts/porewrap.py +++ b/scripts/porewrap.py @@ -1,90 +1,90 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 # -*- coding: UTF-8 -*- """ Rewrap message strings in PO files. Documented in C{doc/user/misctools.docbook#sec-mirewrap}. @author: Chusslove Illich (Часлав Илић) @license: GPLv3 """ import locale import os import sys try: import fallback_import_paths except: pass from pology import version, _, n_ from pology.catalog import Catalog from pology.colors import ColorOptionParser import pology.config as pology_config from pology.fsops import collect_paths_cmdline, collect_catalogs from pology.fsops import exit_on_exception from pology.report import report, error from pology.stdcmdopt import add_cmdopt_filesfrom, add_cmdopt_wrapping from pology.wrap import select_field_wrapping def main (): locale.setlocale(locale.LC_ALL, "") # Get defaults for command line options from global config. cfgsec = pology_config.section("porewrap") # Setup options and parse the command line. usage = _("@info command usage", "%(cmd)s [options] POFILE...", cmd="%prog") desc = _("@info command description", "Rewrap message strings in PO files.") ver = _("@info command version", u"%(cmd)s (Pology) %(version)s\n" u"Copyright © 2007, 2008, 2009, 2010 " u"Chusslove Illich (Часлав Илић) <%(email)s>", cmd="%prog", version=version(), email="caslav.ilic@gmx.net") opars = ColorOptionParser(usage=usage, description=desc, version=ver) opars.add_option( "-v", "--verbose", action="store_true", dest="verbose", default=False, help=_("@info command line option description", "More detailed progress information.")) add_cmdopt_wrapping(opars) add_cmdopt_filesfrom(opars) (op, fargs) = opars.parse_args() if len(fargs) < 1 and not op.files_from: error(_("@info", "No input files given.")) # Could use some speedup. try: import psyco psyco.full() except ImportError: pass # Assemble list of files. fnames = collect_paths_cmdline(rawpaths=fargs, filesfrom=op.files_from, respathf=collect_catalogs, abort=True) # Rewrap all catalogs. for fname in fnames: if op.verbose: report(_("@info:progress", "Rewrapping: %(file)s", file=fname)) cat = Catalog(fname, monitored=False) wrapping = select_field_wrapping(cfgsec, cat, op) cat.set_wrapping(wrapping) cat.sync(force=True) if __name__ == '__main__': exit_on_exception(main) diff --git a/scripts/poselfmerge.py b/scripts/poselfmerge.py index b2f44b9e..34885aac 100755 --- a/scripts/poselfmerge.py +++ b/scripts/poselfmerge.py @@ -1,191 +1,191 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 # -*- coding: UTF-8 -*- """ Merge PO file with itself or compendium, to produce fuzzy matches on similar messages. Documented in C{doc/user/misctools.docbook#sec-miselfmerge}. @author: Chusslove Illich (Часлав Илић) @license: GPLv3 """ import locale import os import shutil import sys try: import fallback_import_paths except: pass from pology import version, _, n_ from pology.catalog import Catalog from pology.message import MessageUnsafe from pology.colors import ColorOptionParser import pology.config as pology_config from pology.fsops import collect_paths_cmdline, collect_catalogs from pology.fsops import exit_on_exception from pology.merge import merge_pofile from pology.report import report, error from pology.stdcmdopt import add_cmdopt_filesfrom, add_cmdopt_wrapping from pology.wrap import select_field_wrapping def main (): locale.setlocale(locale.LC_ALL, "") # Get defaults for command line options from global config. cfgsec = pology_config.section("poselfmerge") def_minwnex = cfgsec.integer("min-words-exact", 0) def_minasfz = cfgsec.real("min-adjsim-fuzzy", 0.0) def_fuzzex = cfgsec.boolean("fuzzy-exact", False) def_refuzz = cfgsec.boolean("rebase-fuzzies", False) # Setup options and parse the command line. usage = _("@info command usage", "%(cmd)s [options] POFILE...", cmd="%prog") desc = _("@info command description", "Merge PO file with itself or compendium, " "to produce fuzzy matches on similar messages.") ver = _("@info command version", u"%(cmd)s (Pology) %(version)s\n" u"Copyright © 2009, 2010 " u"Chusslove Illich (Часлав Илић) <%(email)s>", cmd="%prog", version=version(), email="caslav.ilic@gmx.net") opars = ColorOptionParser(usage=usage, description=desc, version=ver) opars.add_option( "-A", "--min-adjsim-fuzzy", metavar=_("@info command line value placeholder", "RATIO"), action="store", dest="min_adjsim_fuzzy", default=def_minasfz, help=_("@info command line option description", "On fuzzy matches, the minimum adjusted similarity " "to accept the match, or else the message is left untranslated. " "Range is 0.0-1.0, where 0 means always to accept the match, " "and 1 never to accept; a practical range is 0.6-0.8.")) opars.add_option( "-b", "--rebase-fuzzies", action="store_true", dest="rebase_fuzzies", default=def_refuzz, help=_("@info command line option description", "Before merging, clear those fuzzy messages whose predecessor " "(determined by previous fields) is still in the catalog.")) opars.add_option( "-C", "--compendium", metavar=_("@info command line value placeholder", "POFILE"), action="append", dest="compendiums", default=[], help=_("@info command line option description", "Catalog with existing translations, to additionally use for " "direct and fuzzy matches. Can be repeated.")) opars.add_option( "-v", "--verbose", action="store_true", dest="verbose", default=False, help=_("@info command line option description", "More detailed progress information.")) opars.add_option( "-W", "--min-words-exact", metavar=_("@info command line value placeholder", "NUMBER"), action="store", dest="min_words_exact", default=def_minwnex, help=_("@info command line option description", "When using compendium, in case of exact match, " "minimum number of words that original text must have " "to accept translation without making it fuzzy. " "Zero means to always accept an exact match.")) opars.add_option( "-x", "--fuzzy-exact", action="store_true", dest="fuzzy_exact", default=def_fuzzex, help=_("@info command line option description", "When using compendium, make all exact matches fuzzy.")) add_cmdopt_wrapping(opars) add_cmdopt_filesfrom(opars) (op, fargs) = opars.parse_args() if len(fargs) < 1 and not op.files_from: error(_("@info", "No input files given.")) # Could use some speedup. try: import psyco psyco.full() except ImportError: pass # Convert non-string options to needed types. try: op.min_words_exact = int(op.min_words_exact) except: error(_("@info", "Value to option %(opt)s must be an integer number, " "given '%(val)s' instead.", opt="--min-words-exact", val=op.min_words_exact)) try: op.min_adjsim_fuzzy = float(op.min_adjsim_fuzzy) except: error(_("@info", "Value to option %(opt)s must be a real number, " "given '%(val)s' instead.", opt="--min-adjsim-fuzzy", val=op.min_ajdsim_fuzzy)) # Assemble list of files. fnames = collect_paths_cmdline(rawpaths=fargs, filesfrom=op.files_from, respathf=collect_catalogs, abort=True) # Self-merge all catalogs. for fname in fnames: if op.verbose: report(_("@info:progress", "Self-merging: %(file)s", file=fname)) self_merge_pofile(fname, op.compendiums, op.fuzzy_exact, op.min_words_exact, op.min_adjsim_fuzzy, op.rebase_fuzzies, cfgsec, op) def self_merge_pofile (catpath, compendiums=[], fuzzex=False, minwnex=0, minasfz=0.0, refuzzy=False, cfgsec=None, cmlopt=None): # Create temporary files for merging. ext = ".tmp-selfmerge" catpath_mod = catpath + ext if ".po" in catpath: potpath = catpath.replace(".po", ".pot") + ext else: potpath = catpath + ".pot" + ext shutil.copyfile(catpath, catpath_mod) shutil.copyfile(catpath, potpath) # Open catalog for pre-processing. cat = Catalog(potpath, monitored=False) # Decide wrapping policy. wrapping = select_field_wrapping(cfgsec, cat, cmlopt) # From the dummy template, clean all active messages and # remove all obsolete messages. for msg in cat: if msg.obsolete: cat.remove_on_sync(msg) else: msg.clear() cat.sync() # Merge with dummy template. merge_pofile(catpath_mod, potpath, update=True, wrapping=wrapping, cmppaths=compendiums, fuzzex=fuzzex, minwnex=minwnex, minasfz=minasfz, refuzzy=refuzzy, abort=True) # Overwrite original with temporary catalog. shutil.move(catpath_mod, catpath) os.unlink(potpath) if __name__ == '__main__': exit_on_exception(main) diff --git a/scripts/posieve.py b/scripts/posieve.py index 084ac5e5..6c1afd84 100755 --- a/scripts/posieve.py +++ b/scripts/posieve.py @@ -1,616 +1,616 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 # -*- coding: UTF-8 -*- """ Sieve messages in collections of PO files. Reference documentation in C{doc/user/sieving.docbook}. @author: Chusslove Illich (Часлав Илић) @license: GPLv3 """ try: import fallback_import_paths except: pass import glob import imp import locale import os import re import sys from pology import datadir, version, _, n_, t_ from pology.catalog import Catalog, CatalogSyntaxError from pology.colors import ColorOptionParser, set_coloring_globals import pology.config as pology_config from pology.escape import escape_sh from pology.fsops import str_to_unicode, unicode_to_str from pology.fsops import collect_catalogs, collect_system from pology.fsops import build_path_selector, collect_paths_from_file from pology.fsops import collect_paths_cmdline from pology.fsops import exit_on_exception from pology.msgreport import report_on_msg, warning_on_msg, error_on_msg from pology.report import error, warning, report, encwrite from pology.report import init_file_progress from pology.report import list_options from pology.report import format_item_list from pology.stdcmdopt import add_cmdopt_filesfrom, add_cmdopt_incexc from pology.stdcmdopt import add_cmdopt_colors from pology.subcmd import ParamParser from pology.sieve import SieveMessageError, SieveCatalogError def main (): locale.setlocale(locale.LC_ALL, "") # Get defaults for command line options from global config. cfgsec = pology_config.section("posieve") def_do_skip = cfgsec.boolean("skip-on-error", True) def_msgfmt_check = cfgsec.boolean("msgfmt-check", False) def_skip_obsolete = cfgsec.boolean("skip-obsolete", False) # Setup options and parse the command line. usage = _("@info command usage", "%(cmd)s [OPTIONS] SIEVE [POPATHS...]", cmd="%prog") desc = _("@info command description", "Apply sieves to PO paths, which may be either single PO files or " "directories to search recursively for PO files. " "Some of the sieves only examine PO files, while others " "modify them as well. " "The first non-option argument is the sieve name; " "a list of several comma-separated sieves can be given too.") ver = _("@info command version", u"%(cmd)s (Pology) %(version)s\n" u"Copyright © 2007, 2008, 2009, 2010 " u"Chusslove Illich (Часлав Илић) <%(email)s>", cmd="%prog", version=version(), email="caslav.ilic@gmx.net") opars = ColorOptionParser(usage=usage, description=desc, version=ver) opars.add_option( "-a", "--announce-entry", action="store_true", dest="announce_entry", default=False, help=_("@info command line option description", "Announce that header or message is just about to be sieved.")) opars.add_option( "-b", "--skip-obsolete", action="store_true", dest="skip_obsolete", default=def_skip_obsolete, help=_("@info command line option description", "Do not sieve obsolete messages.")) opars.add_option( "-c", "--msgfmt-check", action="store_true", dest="msgfmt_check", default=def_msgfmt_check, help=_("@info command line option description", "Check catalogs by %(cmd)s and skip those which do not pass.", cmd="msgfmt -c")) opars.add_option( "-u", "--single-entry", metavar=_("@info command line value placeholder", "ENTRY_NUMBER"), action="store", dest="single_entry", default=0, help=_("@info command line option description", "Only perform the check on this ENTRY_NUMBER.")) opars.add_option( "--force-sync", action="store_true", dest="force_sync", default=False, help=_("@info command line option description", "Force rewriting of all messages, whether modified or not.")) opars.add_option( "-H", "--help-sieves", action="store_true", dest="help_sieves", default=False, help=_("@info command line option description", "Show help for applied sieves.")) opars.add_option( "--issued-params", action="store_true", dest="issued_params", default=False, help=_("@info command line option description", "Show all issued sieve parameters " "(from command line and user configuration).")) opars.add_option( "-l", "--list-sieves", action="store_true", dest="list_sieves", default=False, help=_("@info command line option description", "List available internal sieves.")) opars.add_option( "--list-options", action="store_true", dest="list_options", default=False, help=_("@info command line option description", "List the names of available options.")) opars.add_option( "--list-sieve-names", action="store_true", dest="list_sieve_names", default=False, help=_("@info command line option description", "List the names of available internal sieves.")) opars.add_option( "--list-sieve-params", action="store_true", dest="list_sieve_params", default=False, help=_("@info command line option description", "List the parameters known to issued sieves.")) opars.add_option( "-m", "--output-modified", metavar=_("@info command line value placeholder", "FILE"), action="store", dest="output_modified", default=None, help=_("@info command line option description", "Output names of modified files into FILE.")) opars.add_option( "--no-skip", action="store_false", dest="do_skip", default=def_do_skip, help=_("@info command line option description", "Do not try to skip catalogs which signal errors.")) opars.add_option( "--no-sync", action="store_false", dest="do_sync", default=True, help=_("@info command line option description", "Do not write any modifications to catalogs.")) opars.add_option( "-q", "--quiet", action="store_true", dest="quiet", default=False, help=_("@info command line option description", "Do not display any progress info " "(does not influence sieves themselves).")) opars.add_option( "-s", metavar=_("@info command line value placeholder", "NAME[:VALUE]"), action="append", dest="sieve_params", default=[], help=_("@info command line option description", "Pass a parameter to sieves.")) opars.add_option( "-S", metavar=_("@info command line value placeholder", "NAME[:VALUE]"), action="append", dest="sieve_no_params", default=[], help=_("@info command line option description", "Remove a parameter to sieves " "(e.g. if it was issued through user configuration).")) opars.add_option( "-v", "--verbose", action="store_true", dest="verbose", default=False, help=_("@info command line option description", "Output more detailed progress information.")) add_cmdopt_filesfrom(opars) add_cmdopt_incexc(opars) add_cmdopt_colors(opars) (op, free_args) = opars.parse_args(str_to_unicode(sys.argv[1:])) if op.list_options: report(list_options(opars)) sys.exit(0) if len(free_args) < 1 and not (op.list_sieves or op.list_sieve_names): error(_("@info", "No sieve to apply given.")) op.raw_sieves = [] op.raw_paths = [] if len(free_args) > 2 and op.single_entry != 0: error(_("@info", "With single entry mode, you can only give one input file.")) if len(free_args) >= 1: op.raw_sieves = free_args[0] op.raw_paths = free_args[1:] # Could use some speedup. try: import psyco psyco.full() except ImportError: pass set_coloring_globals(ctype=op.coloring_type, outdep=(not op.raw_colors)) # Dummy-set all internal sieves as requested if sieve listing required. sieves_requested = [] if op.list_sieves or op.list_sieve_names: # Global sieves. modpaths = glob.glob(os.path.join(datadir(), "sieve", "[a-z]*.py")) modpaths.sort() for modpath in modpaths: sname = os.path.basename(modpath)[:-3] # minus .py sname = sname.replace("_", "-") sieves_requested.append(sname) # Language-specific sieves. modpaths = glob.glob(os.path.join(datadir(), "lang", "*", "sieve", "[a-z]*.py")) modpaths.sort() for modpath in modpaths: sname = os.path.basename(modpath)[:-3] # minus .py sname = sname.replace("_", "-") lang = os.path.basename(os.path.dirname(os.path.dirname(modpath))) sieves_requested.append(lang + ":" + sname) # No need to load and setup sieves if only listing sieve names requested. if op.list_sieve_names: report("\n".join(sieves_requested)) sys.exit(0) # Load sieve modules from supplied names in the command line. if not sieves_requested: sieves_requested = op.raw_sieves.split(",") sieve_modules = [] for sieve_name in sieves_requested: # Resolve sieve file. if not sieve_name.endswith(".py"): # One of internal sieves. if ":" in sieve_name: # Language-specific internal sieve. lang, name = sieve_name.split(":") sieve_path_base = os.path.join("lang", lang, "sieve", name) else: sieve_path_base = os.path.join("sieve", sieve_name) sieve_path_base = sieve_path_base.replace("-", "_") + ".py" sieve_path = os.path.join(datadir(), sieve_path_base) else: # Sieve name is its path. sieve_path = sieve_name try: sieve_file = open(unicode_to_str(sieve_path)) # ...unicode_to_str because of exec below. except IOError: error(_("@info", "Cannot load sieve '%(file)s'.", file=sieve_path)) # Load file into new module. sieve_mod_name = "sieve_" + str(len(sieve_modules)) sieve_mod = imp.new_module(sieve_mod_name) exec sieve_file in sieve_mod.__dict__ sieve_file.close() sys.modules[sieve_mod_name] = sieve_mod # to avoid garbage collection sieve_modules.append((sieve_name, sieve_mod)) if not hasattr(sieve_mod, "Sieve"): error(_("@info", "Module '%(file)s' does not define %(classname)s class.", file=sieve_path, classname="Sieve")) # Setup sieves (description, known parameters...) pp = ParamParser() snames = [] for name, mod in sieve_modules: scview = pp.add_subcmd(name) if hasattr(mod, "setup_sieve"): mod.setup_sieve(scview) snames.append(name) # If info on sieves requested, report and exit. if op.list_sieves: report(_("@info", "Available internal sieves:")) report(pp.listcmd(snames)) sys.exit(0) elif op.list_sieve_params: params = set() for scview in pp.cmdviews(): params.update(scview.params(addcol=True)) report("\n".join(sorted(params))) sys.exit(0) elif op.help_sieves: report(_("@info", "Help for sieves:")) report("") report(pp.help(snames)) sys.exit(0) # Prepare sieve parameters for parsing. sieve_params = list(op.sieve_params) # - append paramaters according to configuration sieve_params.extend(read_config_params(pp.cmdviews(), sieve_params)) # - remove paramaters according to command line if op.sieve_no_params: sieve_params_mod = [] for parspec in sieve_params: if parspec.split(":", 1)[0] not in op.sieve_no_params: sieve_params_mod.append(parspec) sieve_params = sieve_params_mod # If assembly of issued parameters requested, report and exit. if op.issued_params: escparams = [] for parspec in sieve_params: if ":" in parspec: param, value = parspec.split(":", 1) escparam = "%s:%s" % (param, escape_sh(value)) else: escparam = parspec escparams.append(escparam) fmtparams = " ".join(["-s%s" % x for x in sorted(escparams)]) if fmtparams: report(fmtparams) sys.exit(0) # Parse sieve parameters. sparams, nacc_params = pp.parse(sieve_params, snames) if nacc_params: error(_("@info", "Parameters not accepted by any of issued subcommands: " "%(paramlist)s.", paramlist=format_item_list(nacc_params))) # ======================================== # FIXME: Think of something less ugly. # Add as special parameter to each sieve: # - root paths from which the catalogs are collected # - whether destination independent coloring is in effect # - test function for catalog selection root_paths = [] if op.raw_paths: root_paths.extend(op.raw_paths) if op.files_from: for ffpath in op.files_from: root_paths.extend(collect_paths_from_file(ffpath)) if not op.raw_paths and not op.files_from: root_paths = ["."] is_cat_included = build_path_selector(incnames=op.include_names, incpaths=op.include_paths, excnames=op.exclude_names, excpaths=op.exclude_paths) for p in sparams.values(): p.root_paths = root_paths p.raw_colors = op.raw_colors p.is_cat_included = is_cat_included # ======================================== # Create sieves. sieves = [] for name, mod in sieve_modules: sieves.append(mod.Sieve(sparams[name])) # Get the message monitoring indicator from the sieves. # Monitor unless all sieves have requested otherwise. use_monitored = False for sieve in sieves: if getattr(sieve, "caller_monitored", True): use_monitored = True break if op.verbose and not use_monitored: report(_("@info:progress", "--> Not monitoring messages.")) # Get the sync indicator from the sieves. # Sync unless all sieves have requested otherwise, # and unless syncing is disabled globally in command line. do_sync = False for sieve in sieves: if getattr(sieve, "caller_sync", True): do_sync = True break if not op.do_sync: do_sync = False if op.verbose and not do_sync: report(_("@info:progress", "--> Not syncing after sieving.")) # Open in header-only mode if no sieve has message processor. # Categorize sieves by the presence of message/header processors. use_headonly = True header_sieves = [] header_sieves_last = [] message_sieves = [] for sieve in sieves: if hasattr(sieve, "process"): use_headonly = False message_sieves.append(sieve) if hasattr(sieve, "process_header"): header_sieves.append(sieve) if hasattr(sieve, "process_header_last"): header_sieves_last.append(sieve) if op.verbose and use_headonly: report(_("@info:progress", "--> Opening catalogs in header-only mode.")) # Collect catalog paths. fnames = collect_paths_cmdline(rawpaths=op.raw_paths, incnames=op.include_names, incpaths=op.include_paths, excnames=op.exclude_names, excpaths=op.exclude_paths, filesfrom=op.files_from, elsecwd=True, respathf=collect_catalogs, abort=True) if op.do_skip: errwarn = warning errwarn_on_msg = warning_on_msg else: errwarn = error errwarn_on_msg = error_on_msg # Prepare inline progress indicator. if not op.quiet: update_progress = init_file_progress(fnames, addfmt=t_("@info:progress", "Sieving: %(file)s")) # Sieve catalogs. modified_files = [] for fname in fnames: if op.verbose: report(_("@info:progress", "Sieving %(file)s...", file=fname)) elif not op.quiet: update_progress(fname) if op.msgfmt_check: d1, oerr, ret = collect_system(["msgfmt", "-o", "/dev/null", "-c", fname]) if ret != 0: oerr = oerr.strip() errwarn(_("@info:progress", "%(file)s: %(cmd)s check failed:\n" "%(msg)s", file=fname, cmd="msgfmt -c", msg=oerr)) warning(_("@info:progress", "Skipping catalog due to syntax check failure.")) continue try: cat = Catalog(fname, monitored=use_monitored, headonly=use_headonly, single_entry=int(op.single_entry)) except CatalogSyntaxError, e: errwarn(_("@info:progress", "%(file)s: Parsing failed: %(msg)s", file=fname, msg=e)) warning(_("@info:progress", "Skipping catalog due to parsing failure.")) continue skip = False # First run all header sieves. if header_sieves and op.announce_entry: report(_("@info:progress", "Sieving header of %(file)s...", file=fname)) for sieve in header_sieves: try: ret = sieve.process_header(cat.header, cat) except SieveCatalogError, e: errwarn(_("@info:progress", "%(file)s:header: Sieving failed: %(msg)s", file=fname, msg=e)) skip = True break if ret not in (None, 0): break if skip: warning(_("@info:progress", "Skipping catalog due to header sieving failure.")) continue # Then run all message sieves on each message, # unless processing only the header. if not use_headonly: for msg in cat: if op.skip_obsolete and msg.obsolete: continue if not op.quiet: update_progress(fname) if op.announce_entry: report(_("@info:progress", "Sieving %(file)s:%(line)d(#%(entry)d)...", file=fname, line=msg.refline, entry=msg.refentry)) for sieve in message_sieves: try: ret = sieve.process(msg, cat) except SieveMessageError, e: errwarn_on_msg(_("@info:progress", "Sieving failed: %(msg)s", msg=e), msg, cat) break except SieveCatalogError, e: errwarn_on_msg(_("@info:progress", "Sieving failed: %(msg)s", msg=e), msg, cat) skip = True break if ret not in (None, 0): break if skip: break if skip: warning(_("@info:progress", "Skipping catalog due to message sieving failure.")) continue # Finally run all header-last sieves. if header_sieves_last and op.announce_entry: report(_("@info:progress", "Sieving header (after messages) in %(file)s...", file=fname)) for sieve in header_sieves_last: try: ret = sieve.process_header_last(cat.header, cat) except SieveCatalogError, e: errwarn(_("@info:progress", "%(file)s:header: Sieving (after messages) " "failed: %(msg)s", file=fname, msg=e)) skip = True break if ret not in (None, 0): break if skip: warning(_("@info:progress", "Skipping catalog due to header sieving " "(after messages) failure.")) continue if do_sync and cat.sync(op.force_sync): if op.verbose: report(_("@info:progress leading ! is a shorthand " "state indicator", "! (MODIFIED) %(file)s", file=fname)) elif not op.quiet: report(_("@info:progress leading ! is a shorthand " "state indicator", "! %(file)s", file=fname)) modified_files.append(fname) if not op.quiet: update_progress() # clear last progress line, if any for sieve in sieves: if hasattr(sieve, "finalize"): try: sieve.finalize() except SieveCatalogError, e: warning(_("@info:progress", "Finalization failed: %(msg)s", msg=e)) if op.output_modified: ofh = open(op.output_modified, "w") ofh.write("\n".join(modified_files) + "\n") ofh.close def read_config_params (scviews, cmdline_parspecs): # Collect parameters defined in the config. cfgsec = pology_config.section("posieve") pref = "param-" config_params = [] for field in cfgsec.fields(): if field.startswith(pref): parspec = field[len(pref):] only_sieves = None inverted = False if "/" in parspec: param, svspec = parspec.split("/", 1) if svspec.startswith("~"): inverted = True svspec = svspec[1:] only_sieves = set(svspec.split(",")) else: param = parspec if "." in param: param, d1 = param.split(".", 1) config_params.append((field, param, only_sieves, inverted)) if not config_params: return [] # Collect parameters known to issued sieves and issued in command line. sieves = set([x.name() for x in scviews]) acc_raw_params = set(sum([x.params(addcol=True) for x in scviews], [])) acc_params = set([x.rstrip(":") for x in acc_raw_params]) acc_flag_params = set([x for x in acc_raw_params if not x.endswith(":")]) cmd_params = set([x.split(":", 1)[0] for x in cmdline_parspecs]) # Select parameters based on issued sieves. sel_params = [] for field, param, only_sieves, inverted in config_params: if param in acc_params and param not in cmd_params: if only_sieves is not None: overlap = bool(sieves.intersection(only_sieves)) add_param = overlap if not inverted else not overlap else: add_param = True if add_param: if param in acc_flag_params: if cfgsec.boolean(field): sel_params.append(param) else: sel_params.append("%s:%s" % (param, cfgsec.string(field))) return sel_params if __name__ == '__main__': exit_on_exception(main) diff --git a/scripts/posummit.py b/scripts/posummit.py index e711eb81..819a8256 100755 --- a/scripts/posummit.py +++ b/scripts/posummit.py @@ -1,2930 +1,2930 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 # -*- coding: UTF-8 -*- import copy from difflib import SequenceMatcher import filecmp import hashlib import imp import locale import os import re import shutil import sys import time try: import fallback_import_paths except: pass from pology import version, _, n_, t_, PologyError from pology.ascript import collect_ascription_associations from pology.ascript import collect_ascription_history from pology.ascript import make_ascription_selector from pology.catalog import Catalog from pology.header import Header, format_datetime from pology.message import Message, MessageUnsafe from pology.colors import ColorOptionParser from pology.fsops import str_to_unicode, unicode_to_str from pology.fsops import mkdirpath, assert_system, collect_system from pology.fsops import getucwd, join_ncwd from pology.fsops import collect_paths_cmdline, build_path_selector from pology.fsops import exit_on_exception from pology.merge import merge_pofile from pology.monitored import Monpair, Monlist from pology.msgreport import report_on_msg from pology.report import report, error, warning, format_item_list from pology.report import init_file_progress from pology.stdcmdopt import add_cmdopt_incexc, add_cmdopt_filesfrom from pology.vcs import make_vcs from pology.wrap import select_field_wrapping SUMMIT_ID = "+" # must not start with word-character (\w) def main (): locale.setlocale(locale.LC_ALL, "") # Setup options and parse the command line. usage = _("@info command usage", "\n" " %(cmd)s [OPTIONS] CFGFILE LANG OPMODE [PARTIAL...]\n" " (if there is no '%(cfgfile)s' file in a parent directory)\n" " %(cmd)s [OPTIONS] OPMODE [PARTIAL...]\n" " (if there is a '%(cfgfile)s' file in a parent directory)", cmd="%prog", cfgfile="summit-config") desc = _("@info command description", "Translate PO files spread across different branches " "in a unified fashion.") ver = _("@info command version", u"%(cmd)s (Pology) %(version)s\n" u"Copyright © 2007, 2008, 2009, 2010 " u"Chusslove Illich (Часлав Илић) <%(email)s>", cmd="%prog", version=version(), email="caslav.ilic@gmx.net") opars = ColorOptionParser(usage=usage, description=desc, version=ver) opars.add_option( "-a", "--asc-filter", action="store", dest="asc_filter", default=None, help=_("@info command line option description", "Apply a non-default ascription filter on scatter.")) opars.add_option( "--create", action="store_true", dest="create", default=False, help=_("@info command line option description", "Allow creation of new summit catalogs.")) opars.add_option( "--force", action="store_true", dest="force", default=False, help=_("@info command line option description", "Force some operations that are normally not advised.")) opars.add_option( "-q", "--quiet", action="store_true", dest="quiet", default=False, help=_("@info command line option description", "Output less detailed progress info.")) opars.add_option( "-v", "--verbose", action="store_true", dest="verbose", default=False, help=_("@info command line option description", "Output more detailed progress info")) add_cmdopt_filesfrom(opars) add_cmdopt_incexc(opars) options, free_args = opars.parse_args(str_to_unicode(sys.argv[1:])) # Look for the config file through parent directories. parent = getucwd() cfgpath = None while True: for cfgname in ("summit-config",): cfgpath1 = os.path.join(parent, cfgname) if os.path.isfile(cfgpath1): cfgpath = cfgpath1 break if cfgpath: break pparent = parent parent = os.path.dirname(parent) if parent == pparent: break # If config file not found, expect it and language as arguments. if not cfgpath: if len(free_args) < 1: error(_("@info", "Summit configuration file neither found " "as '%(cfgfile)s' in parent directories, " "nor given in command line.", cfgfile="summit-config")) cfgpath = free_args.pop(0) if not os.path.isfile(cfgpath): error(_("@info", "Summit configuration file '%(file)s' does not exist.", file=cfgpath)) if len(free_args) < 1: error(_("@info", "Language code not given.")) lang = free_args.pop(0) else: lang = None # ...will be read from config file. if len(free_args) < 1: error(_("@info", "Operation mode not given.")) opmodes = free_args.pop(0).split(",") opmodes_uniq = [] for opmode in opmodes: if opmode not in opmodes_uniq: if opmode not in ("gather", "scatter", "merge", "deps"): error(_("@info", "Unknown operation mode '%(mode)s'.", mode=opmode)) opmodes_uniq.append(opmode) opmodes = opmodes_uniq # Could use some speedup. try: import psyco psyco.full() except ImportError: pass # Read project definition. project = Project(lang, opmodes, options) project.include(cfgpath) # In summit-over-templates mode, determine if templates are dynamic. project.templates_dynamic = ( project.over_templates and not project.summit.get("topdir_templates")) # If config file was found in parent directories, # it should have defined the language itself. # Otherwise, its language is set to language given in command line. if not lang: if not project.lang: error(_("@info", "Language code not set in configuration file.")) lang = project.lang else: project.lang = lang # In summit-over-templates mode, derive special project data # for implicitly gathering templates on merge. if project.templates_dynamic and "merge" in project.opmodes: project.toptions = copy.copy(options) project.toptions.quiet = True project.tproject = Project(project.templates_lang, ["gather"], project.toptions) project.tproject.include(cfgpath) project.tproject.templates_dynamic = False project.tproject.summit_version_control = "none" project.tproject.summit_wrap = False # performance project.tproject.summit_fine_wrap = False # performance tpd = project.tproject.summit.get("topdir_templates") if tpd is None: # FIXME: Portability. tpd = "/tmp/summit-templates-%d" % os.getpid() project.tproject.summit["topdir"] = tpd for tb in project.tproject.branches: tbpd = tb.get("topdir_templates") if tbpd is not None: tb["topdir"] = tbpd project.tproject.lang = project.templates_lang project.tproject = derive_project_data(project.tproject, project.toptions, project.summit["topdir"]) project.summit["topdir_templates"] = tpd # Explicit gathering in summit-over-templates mode # may be useful to check if gathering works. # Make some adjustments for this to go smoothly. if ( project.templates_dynamic and "gather" in project.opmodes and project.lang == project.templates_lang ): options.create = True project.summit["topdir"] = project.summit["topdir_templates"] project.summit_version_control = "none" # Derive project data. project = derive_project_data(project, options) # Collect partial processing specs and inclusion-exclusion test. specargs, ffself = collect_paths_cmdline(rawpaths=free_args, filesfrom=options.files_from, getsel=True, abort=True) options.partspecs, options.partbids = collect_partspecs(project, specargs) if not options.files_from: # If there was no from-file input and no partial processing specs # were collected, indicate operation on the whole summit. if not options.partspecs: options.partspecs = None if not options.partbids: options.partbids = None cmdself = build_path_selector(incnames=options.include_names, incpaths=options.include_paths, excnames=options.exclude_names, excpaths=options.exclude_paths) options.selcatf = lambda x: cmdself(x) and ffself(x) # Invoke the appropriate operations on collected bundles. for opmode in opmodes: if options.verbose: report(_("@info:progress", "-----> Processing mode: %(mode)s", mode=opmode)) if opmode == "gather": summit_gather(project, options) elif opmode == "scatter": summit_scatter(project, options) elif opmode == "merge": summit_merge(project, options) elif opmode == "deps": summit_deps(project, options) class Project (object): def __init__ (self, lang, opmodes, options): self.__dict__.update({ "lang" : lang, "opmodes" : opmodes, "options" : options, "summit" : "", "branches" : [], "mappings" : [], "subdir_mappings" : [], "subdir_precedence" : [], "over_templates" : False, "templates_lang" : "templates", "summit_wrap" : False, "summit_fine_wrap" : True, "summit_fuzzy_merging" : True, "branches_wrap" : True, "branches_fine_wrap" : True, "branches_fuzzy_merging" : True, "version_control" : "", "summit_version_control" : "", "branches_version_control" : "", "hook_on_scatter_msgstr" : [], "hook_on_scatter_msg" : [], "hook_on_scatter_cat" : [], "hook_on_scatter_file" : [], "hook_on_scatter_branch": [], "hook_on_gather_msg" : [], "hook_on_gather_msg_branch" : [], "hook_on_gather_cat" : [], "hook_on_gather_cat_branch" : [], "hook_on_gather_file" : [], "hook_on_gather_file_branch" : [], "hook_on_merge_msg" : [], "hook_on_merge_head" : [], "hook_on_merge_cat" : [], "hook_on_merge_file" : [], "header_propagate_fields" : [], "header_skip_fields_on_scatter" : [], "vivify_on_merge" : False, "vivify_w_translator" : "Simulacrum", "vivify_w_langteam" : "Nevernessian", "vivify_w_language" : "", "vivify_w_charset" : "UTF-8", "vivify_w_plurals" : "", "compendium_on_merge" : "", "compendium_fuzzy_exact" : False, "compendium_min_words_exact" : 0, "merge_min_adjsim_fuzzy" : 0.0, "merge_rebase_fuzzy" : False, "scatter_min_completeness" : 0.0, "scatter_acc_completeness" : 0.0, "ascription_filters" : [], "ascription_history_filter" : None, }) self.__dict__["locked"] = False self.inclusion_trail = [] def __setattr__ (self, att, val): # TODO: Do extensive checks. if self.locked and att not in self.__dict__: error(_("@info", "Unknown summit configuration field '%(field)s'.", field=att)) self.__dict__[att] = val def relpath (self, path): rootdir = os.path.dirname(self.inclusion_trail[-1]) if not os.path.isabs(path): path = join_ncwd(rootdir, path) return path # FIXME: Temporary for backward compatibility, remove at some point. def resolve_path_rooted (self, path): return self.relpath(path) def include (self, path): path = os.path.abspath(path) if path in self.inclusion_trail: error(_("@info", "Circular inclusion of '%(file)s' attempted " "in summit configuration.", file=path)) self.inclusion_trail.append(path) self.locked = True exec open(unicode_to_str(path)) in {"S" : self} self.locked = False self.inclusion_trail.pop() def derive_project_data (project, options, nwgrefpath=None): p = project # shortcut # Create summit object from summit dictionary. class Summit: pass s = Summit() sd = p.summit s.id = SUMMIT_ID s.by_lang = False s.topdir = sd.pop("topdir", None) s.topdir_templates = sd.pop("topdir_templates", None) # Assert that there are no misnamed keys in the dictionary. if sd: error(_("@info", "Unknown keys in summit configuration: %(keylist)s.", keylist=format_item_list(sd.keys()))) # Assert that all necessary fields in summit specification exist. if s.topdir is None: error(_("@info", "Top directory not set in summit configuration.")) s.split_path = None # needed only on some checks later p.summit = s # Create branch objects from branch dictionaries. class Branch: pass branches = [] for bd in p.branches: b = Branch() branches.append(b) b.id = bd.pop("id", None) b.topdir = bd.pop("topdir", None) b.topdir_templates = bd.pop("topdir_templates", None) # If operation is performed on templates and branch template directory # is defined, override plain branch directory with it. if p.lang == p.templates_lang and b.topdir_templates is not None: b.topdir = b.topdir_templates b.by_lang = bd.pop("by_lang", False) if b.by_lang and isinstance(b.by_lang, bool): b.by_lang = project.lang # If separate templates directory is not defined in by-language mode, # set it to same as catalogs directory. if b.by_lang and b.topdir_templates is None: b.topdir_templates = b.topdir b.scatter_create_filter = bd.pop("scatter_create_filter", None) b.skip_version_control = bd.pop("skip_version_control", False) # FIXME: merge_locally retained for backward compatibility, # replace at some point with b.merge = bd.pop("merge", False). b.merge = bd.pop("merge", None) if b.merge is None: b.merge = bd.pop("merge_locally", False) b.split_path, b.join_path = bd.pop("transform_path", (None, None)) b.insert_nosim = bd.pop("insert_nosim", False) # Assemble include-exclude functions. includes = bd.pop("includes", []) excludes = bd.pop("excludes", []) def regex_to_func (rxstr): try: rx = re.compile(rxstr, re.U) except: error(_("@info", "Invalid regular expression '%(regex)s' " "in include-exclude specification " "of branch '%(branch)s'.", branch=b.id, regex=rxstr)) return lambda x: bool(rx.search(x)) def chain_tests (tests): testfs = [] for test in tests: if isinstance(test, basestring): testfs.append(regex_to_func(test)) elif callable(test): testfs.append(test) else: error(_("@info", "Invalid test type '%(type)s' " "in include-exclude specification " "of branch '%(branch)s'.", branch=b.id, type=type(test))) return lambda x: reduce(lambda s, y: s or y(x), testfs, False) if includes: includef = chain_tests(includes) if excludes: excludef = chain_tests(excludes) if includes and excludes: b.ignored = lambda x: not includef(x) or excludef(x) elif includes: b.ignored = lambda x: not includef(x) elif excludes: b.ignored = lambda x: excludef(x) else: b.ignored = lambda x: False # Assert that there are no misnamed keys in the dictionary. if bd: error(_("@info", "Unknown keys in specification of branch '%(branch)s': " "%(keylist)s.", branch=b.id, keylist=format_item_list(bd.keys()))) p.branches = branches # Assert that all necessary fields in branch specifications exist. p.branch_ids = [] for branch in p.branches: if branch.id is None: error(_("@info", "Branch with undefined ID.")) if branch.id in p.branch_ids: error(_("@info", "Non-unique branch ID '%(branch)s'.", branch=branch.id)) p.branch_ids.append(branch.id) if branch.topdir is None: error(_("@info", "Top directory not set for branch '%(branch)s'.", branch=branch.id)) # Dictionary of branches by branch id. p.bdict = dict([(x.id, x) for x in p.branches]) # Create version control operators if given. p.summit_vcs = None p.branches_vcs = None if p.summit_version_control: p.summit_vcs = make_vcs(p.summit_version_control.lower()) if p.branches_version_control: p.branches_vcs = make_vcs(p.branches_version_control.lower()) if p.version_control: if p.summit_vcs is None: p.summit_vcs = make_vcs(p.version_control.lower()) if p.branches_vcs is None: p.branches_vcs = make_vcs(p.version_control.lower()) # Decide wrapping policies. class D: pass dummyopt = D() dummyopt.do_wrap = p.summit_wrap dummyopt.do_fine_wrap = p.summit_fine_wrap p.summit_wrapping = select_field_wrapping(cmlopt=dummyopt) dummyopt.do_wrap = p.branches_wrap dummyopt.do_fine_wrap = p.branches_fine_wrap p.branches_wrapping = select_field_wrapping(cmlopt=dummyopt) # Decide the extension of catalogs. if p.over_templates and p.lang == p.templates_lang: catext = ".pot" else: catext = ".po" # Collect catalogs from branches. p.catalogs = {} for b in p.branches: p.catalogs[b.id] = collect_catalogs(b.topdir, catext, b.by_lang, b.ignored, b.split_path, project, options) # ...and from the summit. p.catalogs[SUMMIT_ID] = collect_catalogs(p.summit.topdir, catext, None, None, None, project, options) if ( p.lang == p.templates_lang and "gather" in p.opmodes and nwgrefpath is not None ): # Also add summit templates which do not actually exist, # but are going to be created on gather without warnings, # by reflecting the catalogs found in the given path. refcats = collect_catalogs(nwgrefpath, ".po", None, None, None, project, options) for name, spec in refcats.iteritems(): if name not in p.catalogs[SUMMIT_ID]: path, subdir = spec[0] # all summit catalogs unique tpath = join_ncwd(p.summit.topdir, subdir, name + ".pot") p.catalogs[SUMMIT_ID][name] = [(tpath, subdir)] # Resolve ascription filter. project.ascription_filter = None for afname, afspec in project.ascription_filters: if options.asc_filter is None or afname == options.asc_filter: if isinstance(afspec, basestring): afcall = make_ascription_selector([afspec]) elif isinstance(afspec, (tuple, list)): afcall = make_ascription_selector(afspec) elif callable(afspec): afcall = afspec else: error(_("@info", "Unknown type of definition for " "ascription filter '%(filt)s'.", filt=afname)) project.ascription_filter = afcall break if options.asc_filter is not None and project.ascription_filter is None: error(_("@info", "Summit configuration does not define " "ascription filter '%(filt)s'.", filt=options.asc_filter)) # Link summit and ascription catalogs. if project.ascription_filter: tmp0 = [(x, y[0][0]) for x, y in p.catalogs[SUMMIT_ID].items()] tmp1 = [x[0] for x in tmp0] tmp2 = collect_ascription_associations([x[1] for x in tmp0]) tmp3 = zip([tmp2[0][0]] * len(tmp1), [x[1] for x in tmp2[0][1]]) p.aconfs_acatpaths = dict(zip(tmp1, tmp3)) # Assure that summit catalogs are unique. for name, spec in p.catalogs[SUMMIT_ID].items(): if len(spec) > 1: fstr = "\n".join([x[0] for x in spec]) error(_("@info", "Non-unique summit catalog '%(name)s', found as:\n" "%(filelist)s", name=name, filelist=fstr)) # At scatter in summit-over-static-templates mode, add to the collection # of branch catalogs any that should be newly created. p.add_on_scatter = {} if ( p.over_templates and p.lang != p.templates_lang and "scatter" in p.opmodes): # Go through all mappings and collect branch names mapped to # summit catalogs per branch id and summit name, and vice versa. mapped_summit_names = {} mapped_branch_names = {} for mapping in p.mappings: branch_id = mapping[0] branch_name = mapping[1] summit_names = mapping[2:] if not branch_id in mapped_summit_names: mapped_summit_names[branch_id] = {} if not branch_id in mapped_branch_names: mapped_branch_names[branch_id] = {} for summit_name in summit_names: if not summit_name in mapped_summit_names[branch_id]: mapped_summit_names[branch_id][summit_name] = [] mapped_summit_names[branch_id][summit_name].append(branch_name) if not branch_name in mapped_branch_names[branch_id]: mapped_branch_names[branch_id][branch_name] = [] mapped_branch_names[branch_id][branch_name].append(summit_name) # Go through all branches. bt_cache = {} for branch in p.branches: # Skip this branch if no templates. if not branch.topdir_templates: continue # Collect all templates for this branch. branch_templates = bt_cache.get(branch.topdir_templates) if branch_templates is None: branch_templates = collect_catalogs(branch.topdir_templates, ".pot", branch.by_lang, branch.ignored, branch.split_path, project, options) bt_cache[branch.topdir_templates] = branch_templates # Go through all summit catalogs. for summit_name in p.catalogs[SUMMIT_ID]: # Collect names of any catalogs in this branch mapped to # the current summit catalog. branch_names = [] if ( branch.id in mapped_summit_names and summit_name in mapped_summit_names[branch.id]): branch_names = mapped_summit_names[branch.id][summit_name] # Unconditionally add summit name as one possible branch name, # since otherwise a mapped branch catalog could shadow # a direct branch catalog. branch_names.append(summit_name) # For each collected branch name, check if there are some # branch templates for which the corresponding branch path # does not exit and (in case of explicit mapping) whether # all summit catalogs needed for scattering are available. # If this is the case, set missing paths for scattering. for branch_name in branch_names: if ( branch_name in branch_templates and all(map(lambda x: x in p.catalogs[SUMMIT_ID], mapped_branch_names.get(branch.id, {}) .get(branch_name, []))) ): # Assemble all branch catalog entries. for template in branch_templates[branch_name]: # Compose the branch catalog subdir and path. subdir = template[1] if branch.join_path: subpath = branch.join_path(branch_name, subdir, branch.by_lang) elif branch.by_lang: subpath = os.path.join(subdir, branch_name, branch.by_lang + ".po") else: subpath = os.path.join(subdir, branch_name + ".po") path = join_ncwd(branch.topdir, subpath) # Skip this catalog if excluded from creation on # scatter, by filter on catalog name and subdir # (False -> excluded). scf = branch.scatter_create_filter if scf and not scf(branch_name, subdir): continue # If not there already, add this path # to branch catalog entry, # and record later initialization from template. brcats = p.catalogs[branch.id].get(branch_name) if brcats is None: brcats = [] p.catalogs[branch.id][branch_name] = brcats if (path, subdir) not in brcats: brcats.append((path, subdir)) p.add_on_scatter[path] = template[0] # In summit-over-dynamic-templates mode, # automatic vivification of summit catalogs must be active. if p.templates_dynamic: p.vivify_on_merge = True # At merge in summit-over-templates mode, # if automatic vivification of summit catalogs requested, # add to the collection of summit catalogs any that should be created. p.add_on_merge = {} if ( p.over_templates and p.lang != p.templates_lang and "merge" in p.opmodes and (p.vivify_on_merge or options.create) ): # Collect all summit templates. if not p.templates_dynamic: summit_templates = collect_catalogs(p.summit.topdir_templates, ".pot", None, None, None, project, options) else: summit_templates = p.tproject.catalogs[SUMMIT_ID] # Go through all summit templates, recording missing summit catalogs. for name, spec in summit_templates.iteritems(): tpath, tsubdir = spec[0] # all summit catalogs unique if name not in p.catalogs[SUMMIT_ID]: # Compose the summit catalog path. spath = join_ncwd(p.summit.topdir, tsubdir, name + ".po") # Add this file to summit catalog entries. p.catalogs[SUMMIT_ID][name] = [(spath, tsubdir)] # Record later initialization from template. p.add_on_merge[spath] = tpath # Convenient dictionary views of mappings. # - direct: branch_id->branch_name->summit_name # - part inverse: branch_id->summit_name->branch_name # - full inverse: summit_name->branch_id->branch_name p.direct_map = {} p.part_inverse_map = {} p.full_inverse_map = {} # Initialize mappings by branch before the main loop for direct mappings, # because an explicit mapping may name a branch before it was processed # in the main loop. for branch_id in p.branch_ids: p.direct_map[branch_id] = {} for branch_name in p.catalogs[branch_id]: p.direct_map[branch_id][branch_name] = [] # Add direct mappings. # - explicit for mapping in p.mappings: branch_id, branch_name = mapping[:2] if ( "gather" in p.opmodes and ( branch_id not in p.catalogs or branch_name not in p.catalogs[branch_id]) ): warning(_("@info", "No branch catalog corresponding to mapping %(mapping)s " "set by the summit configuration.", mapping=("('%s', '%s', ...)" % (branch_id, branch_name)))) continue summit_names = mapping[2:] p.direct_map[branch_id][branch_name] = summit_names # - implicit for branch_id in p.branch_ids: for branch_name in p.catalogs[branch_id]: if p.direct_map[branch_id][branch_name] == []: p.direct_map[branch_id][branch_name].append(branch_name) # Convert subdir mappings into dictionary by branch ID and subdir. p.subdir_map = {} for bid, bsubdir, ssubdir in p.subdir_mappings: p.subdir_map[(bid, bsubdir)] = ssubdir # Collect missing summit catalogs. needed_additions = [] for branch_id in p.branch_ids: for branch_name in p.catalogs[branch_id]: summit_names = p.direct_map[branch_id][branch_name] for summit_name in summit_names: if summit_name not in p.catalogs[SUMMIT_ID]: # Compose the path for the missing summit catalog. # Default the subdir to that of the current branch, # as it is the primary branch for this catalog. # Or use explicit subdir mapping if given. branch_path, branch_subdir = \ p.catalogs[branch_id][branch_name][0] dmkey = (branch_id, branch_subdir) summit_subdir = p.subdir_map.get(dmkey) or branch_subdir summit_path = join_ncwd(p.summit.topdir, summit_subdir, summit_name + catext) if "gather" in p.opmodes: if options.create: # Add summit catalog into list of existing catalogs; # it will be created for real on gather. p.catalogs[SUMMIT_ID][summit_name] = [ (summit_path, summit_subdir)] else: needed_additions.append((branch_path, summit_path)) elif "scatter" in p.opmodes: needed_additions.append((branch_path, summit_path)) # Initialize inverse mappings. # - part inverse: for branch_id in p.branch_ids: p.part_inverse_map[branch_id] = {} for summit_name in p.catalogs[SUMMIT_ID]: p.part_inverse_map[branch_id][summit_name] = [] # - full inverse: for summit_name in p.catalogs[SUMMIT_ID]: p.full_inverse_map[summit_name] = {} for branch_id in p.branch_ids: p.full_inverse_map[summit_name][branch_id] = [] # Add existing inverse mappings. for branch_id in p.branch_ids: for branch_name in sorted(p.catalogs[branch_id]): for summit_name in p.direct_map[branch_id][branch_name]: if summit_name in p.full_inverse_map: # - part inverse: pinv = p.part_inverse_map[branch_id][summit_name] if branch_name not in pinv: pinv.append(branch_name) # - full inverse: finv = p.full_inverse_map[summit_name][branch_id] if branch_name not in finv: finv.append(branch_name) # Collect superfluous summit catalogs. needed_removals = [] for summit_name in p.catalogs[SUMMIT_ID]: src_branch_ids = [] for branch_id in project.branch_ids: if project.full_inverse_map[summit_name][branch_id]: src_branch_ids.append(branch_id) if not src_branch_ids: if "gather" in p.opmodes: if not options.create: summit_path = p.catalogs[SUMMIT_ID][summit_name][0][0] needed_removals.append(summit_path) # Create function to assign precedence to a subdirectory. p.subdir_precedence = [os.path.normpath(sd) for sd in p.subdir_precedence] def calc_subdir_precedence (subdir): for i, test_subdir in enumerate(p.subdir_precedence): ltsd = len(test_subdir) if ( subdir.startswith(test_subdir) and subdir[ltsd:ltsd + 1] in ("", os.path.sep) ): return i return len(p.subdir_precedence) p.calc_subdir_precedence = calc_subdir_precedence # Collect summit catalogs that should be moved. needed_moves = [] for summit_name in p.catalogs[SUMMIT_ID]: branch_subdirs = [] for branch_id in p.full_inverse_map[summit_name]: for branch_name in p.full_inverse_map[summit_name][branch_id]: branch_subdirs_1 = [] for bpath, bsubdir in p.catalogs[branch_id][branch_name]: dmkey = (branch_id, bsubdir) branch_subdirs_1.append(p.subdir_map.get(dmkey) or bsubdir) branch_subdirs.extend(branch_subdirs_1) if branch_subdirs: branch_subdirs = list(set(branch_subdirs)) subdir_precs = map(p.calc_subdir_precedence, branch_subdirs) precs_subdirs = sorted(zip(subdir_precs, branch_subdirs)) branch_subdirs_sel = [sd for pr, sd in precs_subdirs if pr == precs_subdirs[0][0]] summit_subdir = p.catalogs[SUMMIT_ID][summit_name][0][1] if summit_subdir not in branch_subdirs_sel: summit_path = p.catalogs[SUMMIT_ID][summit_name][0][0] dpaths = [] for bsubdir in branch_subdirs_sel: dpath = join_ncwd(p.summit.topdir, bsubdir, summit_name + catext) dpaths.append(dpath) if "gather" in p.opmodes: if not options.create: needed_moves.append((summit_path, dpaths)) # If catalog creation is not allowed, # complain about needed additions, removals, and moves. if needed_additions or needed_removals or needed_moves: if needed_additions: fmtlist = "\n".join("%s --> %s" % x for x in sorted(needed_additions)) warning(_("@info", "Some branch catalogs have no " "associated summit catalog " "(expected summit path given):\n" "%(filelist)s", filelist=fmtlist)) if needed_removals: fmtlist = "\n".join(sorted(needed_removals)) warning(_("@info", "Some summit catalogs have no " "associated branch catalogs:\n" "%(filelist)s", filelist=fmtlist)) if needed_moves: fmtlist = "\n".join("%s --| %s" % (x, " | ".join(y)) for x, y in sorted(needed_moves)) warning(_("@info", "Some summit catalogs should be " "moved to another subdirectory:\n" "%(filelist)s", filelist=fmtlist)) if "gather" in p.opmodes: error(_("@info", "Halting because catalog creation is not allowed " "(consider issuing %(opt)s option).", opt="--create")) # Fill in defaults for missing fields in hook specs. for attr in p.__dict__: if attr.startswith("hook_"): p.__dict__[attr] = hook_fill_defaults(p.__dict__[attr]) return p def split_path_in_project (project, path): if os.path.isfile(path): if not path.endswith((".po", ".pot")): error(_("@info", "Non-PO file '%(file)s' given as catalog.", file=path)) splits = [] for b in [project.summit] + project.branches: broot = os.path.abspath(b.topdir) apath = os.path.abspath(path) if apath.startswith(broot + os.path.sep) or apath == broot: subpath = apath[len(broot + os.path.sep):] # Split the path into catalog name and subdirectory. if os.path.isfile(apath): if b.split_path: catname, subdir = b.split_path(subpath) else: subdir = os.path.dirname(subpath) basename = os.path.basename(subpath) catname = basename[:basename.rfind(".")] if b.by_lang: # If this is by-language mode, # catalog path can be split only if of proper language, # and subdirectory and catalog name should backtrack. if catname != b.by_lang: continue catname = os.path.basename(subdir) subdir = os.path.dirname(subdir) elif os.path.isdir(apath): if b.split_path: catname = None dummy_subpath = os.path.join(subpath, "__dummy__.po") subdir = b.split_path(dummy_subpath)[1] else: subdir = subpath catname = None if b.by_lang: # If this is a leaf directory in by-language mode, # then actually a catalog has been selected, # and subdirectory and catalog name should backtrack. apath2 = os.path.join(subdir, b.by_lang + ".po") if os.path.isfile(apath2): catname = os.path.basename(subdir) subdir = os.path.dirname(subdir) # Collect the splitting. # Catalog name being None means that a subdirectory is selected, # and if subdirectory too is None, the whole branch is selected. if not catname and not subdir: subdir = None splits.append((b.id, subdir, catname)) if not splits: error(_("@info", "Path '%(path)s' is not covered by the summit configuration.", path=path)) return splits def collect_partspecs (project, specargs): partbids = [] partspecs = {} for specarg in specargs: # If the partial specification is a valid path, # convert it to operation target. optargets = [] if os.path.exists(specarg): splits = split_path_in_project(project, specarg) for bid, breldir, catname in splits: if catname: optarget = bid + ":" + catname elif breldir: optarget = bid + ":" + breldir + os.path.sep else: optarget = bid + ":" optargets.append(optarget) else: optargets = [specarg] for optarget in optargets: lst = optarget.split(":", 1) if len(lst) < 2: fdname, = lst bid = None else: bid, fdname = lst if bid not in project.branch_ids and bid != SUMMIT_ID: error(_("@info", "Branch '%(branch)s' is not defined " "in the summit configuration.", branch=bid)) if bid and bid not in partbids: partbids.append(bid) if fdname: bsid = bid or SUMMIT_ID if bsid not in partspecs: partspecs[bsid] = [] partspecs[bsid].append(fdname) return partspecs, partbids # Fill in defaults for missing fields in hook specs. def hook_fill_defaults (specs): new_specs = [] for spec in specs: call = spec[0] branch_rx = r"" if len(spec) > 1: branch_rx = spec[1] name_rx = r"" if len(spec) > 2: name_rx = spec[2] new_specs.append((call, branch_rx, name_rx)) return new_specs # Each catalog is represented by a dictionary entry: the key is the catalog # name, the value is the list of tuples of file path and subdirectory # relative to top (list in case there are several same-named catalogs in # different subdirectories). def collect_catalogs (topdir, catext, by_lang, ignored, split_path, project, options): catalogs = {} topdir = os.path.normpath(topdir) for root, dirs, files in os.walk(topdir): for file in files: catn = "" if file.endswith(catext): if not by_lang: fpath = os.path.abspath(os.path.join(root, file)) if split_path: catn, spath = split_path(fpath[len(topdir) + 1:]) else: catn = file[0:file.rfind(".")] spath = root[len(topdir) + 1:] elif file == by_lang + ".po" or catext == ".pot": fpath = os.path.abspath(os.path.join(root, file)) if split_path: catn, spath = split_path(fpath[len(topdir) + 1:]) else: catn = os.path.basename(root) spath = os.path.dirname(root)[len(topdir) + 1:] if catn: if not ignored or not ignored(fpath): if catn not in catalogs: catalogs[catn] = [] fpath = join_ncwd(fpath) spath = os.path.normpath(spath) catalogs[catn].append((fpath, spath)) for catpaths in catalogs.values(): catpaths.sort(key=lambda x: x[0]) return catalogs def summit_gather (project, options): if ( project.over_templates and project.lang != project.templates_lang and not options.force): error(_("@info", "Gathering catalogs is normally not allowed " "in summit-over-static-templates mode. " "If this is the initial creation of summit catalogs, " "or externally injected branch catalogs need to be gathered, " "run with options %(opts)s.", opts="--create --force")) elif ( project.templates_dynamic and project.lang == project.templates_lang and not options.force): warning(_("@info", "Gathering templates is superfluous in " "summit-over-templates mode. " "If this is done to check whether gathering works, " "to supress this message run with option %(opt)s.", opt="--force")) # Collect names of summit catalogs to gather. summit_names = select_summit_names(project, options) # Setup progress indicator. upprog = lambda x=None: x if not options.verbose: catpaths = [project.catalogs[SUMMIT_ID][x][0][0] for x in summit_names] upprog = init_file_progress(catpaths, addfmt=t_("@info:progress", "Gathering: %(file)s")) # Gather all selected catalogs. for name in summit_names: catpath = project.catalogs[SUMMIT_ID][name][0][0] if options.verbose: report(_("@info:progress", "Gathering %(file)s...", file=catpath)) upprogc = lambda: upprog(catpath) summit_gather_single(name, project, options, update_progress=upprogc) upprog() def summit_scatter (project, options): if project.over_templates and project.lang == project.templates_lang: error(_("@info", "Scattering not possible on '%(lang)s' " "in summit-over-templates mode.", lang=project.templates_lang)) scatter_specs = [] # Select branches to scatter to. if not options.partbids or SUMMIT_ID in options.partbids: branch_ids = project.branch_ids else: branch_ids = options.partbids # Collect catalogs to scatter through all selected branches. for branch_id in branch_ids: branch_catalogs = select_branch_catalogs(branch_id, project, options) for branch_name, branch_path, branch_subdir in branch_catalogs: # Collect names of all the summit catalogs which this branch # catalog supplies messages to. summit_names = project.direct_map[branch_id][branch_name] # Collect paths of selected summit catalogs. summit_paths = [] for summit_name in summit_names: if not summit_name in project.catalogs[SUMMIT_ID]: # Warning pertinent to this situation will have # been issued earlier, so just skip it here. #warning(_("@info", #"Missing summit catalog " #"for branch catalog '%(file)s'.", #file=branch_path)) continue summit_paths.append( project.catalogs[SUMMIT_ID][summit_name][0][0]) # There may be no summit catalogs for this branch catalog. # The warning about this condition has been issued earlier, # just skip the branch catalog here. if summit_paths: scatter_specs.append((branch_id, branch_name, branch_subdir, branch_path, summit_paths)) # Dummy entry to indicate branch switch. scatter_specs.append((branch_id, None, None, None, None)) # Setup progress indicator. upprog = lambda x=None: x if not options.verbose: catpaths = [x[3] for x in scatter_specs if x[1]] upprog = init_file_progress(catpaths, addfmt=t_("@info:progress", "Scattering: %(file)s")) # Scatter to branch catalogs. for scatter_spec in scatter_specs: branch_id, catpath = scatter_spec[0], scatter_spec[3] if catpath is not None: if options.verbose: report(_("@info:progress", "Scattering %(file)s...", file=catpath)) upprogc = lambda: upprog(catpath) summit_scatter_single(*(scatter_spec + (project, options, upprogc))) else: # Apply post-scatter hooks. if options.verbose: report(_("@info:progress", "Applying post-hook to branch %(branch)s...", branch=branch_id)) exec_hook_branch(branch_id, project.hook_on_scatter_branch) upprog() def summit_merge (project, options): if project.over_templates and project.lang == project.templates_lang: error(_("@info", "Merging not possible on '%(lang)s' in " "summit-over-templates mode.", lang=project.templates_lang)) merge_specs = [] # Select branches to merge. if not options.partbids: branch_ids = project.branch_ids + [SUMMIT_ID] else: branch_ids = options.partbids # Setup merging in summit. if SUMMIT_ID in branch_ids and project.summit.topdir_templates: branch_ids.remove(SUMMIT_ID) # Collect names of summit catalogs to merge. summit_names = select_summit_names(project, options) # Collect template catalogs to use. if not project.templates_dynamic: template_catalogs = collect_catalogs(project.summit.topdir_templates, ".pot", None, None, None, project, options) else: template_catalogs = project.tproject.catalogs[SUMMIT_ID] # Collect data for summit catalogs to merge. for name in summit_names: summit_path, summit_subdir = project.catalogs[SUMMIT_ID][name][0] if name not in template_catalogs: warning(_("@info", "No template for summit catalog '%(file)s'.", file=summit_path)) continue template_path = template_catalogs[name][0][0] merge_specs.append((SUMMIT_ID, name, summit_subdir, summit_path, template_path, project.summit_wrapping, project.summit_fuzzy_merging)) # Setup merging in branches. for branch_id in branch_ids: branch = project.bdict[branch_id] # Skip branch if local merging not desired, or no templates defined. if (not branch.merge or branch.topdir_templates is None): continue # Collect branch catalogs to merge. branch_catalogs = select_branch_catalogs(branch_id, project, options) # Collect template catalogs to use. template_catalogs = collect_catalogs(branch.topdir_templates, ".pot", branch.by_lang, branch.ignored, branch.split_path, project, options) # Collect data for branch catalogs to merge. for name, branch_path, branch_subdir in branch_catalogs: if not os.path.isfile(branch_path): # Catalog has been selected due to another operation mode, # which can create catalogs from scratch. continue if not name in template_catalogs: warning(_("@info", "No template for branch catalog '%(file)s'.", file=branch_path)) continue exact = False for template_path, template_subdir in template_catalogs[name]: if template_subdir == branch_subdir: exact = True break if not exact: warning(_("@info", "No exact template for branch catalog '%(file)s'.", file=branch_path)) continue merge_specs.append((branch_id, name, branch_subdir, branch_path, template_path, project.branches_wrapping, project.branches_fuzzy_merging)) # Setup progress indicator. upprog = lambda x=None: x if not options.verbose: catpaths = [x[3] for x in merge_specs] upprog = init_file_progress(catpaths, addfmt=t_("@info:progress", "Merging: %(file)s")) # Merge catalogs. for merge_spec in merge_specs: catpath = merge_spec[3] if options.verbose: report(_("@info:progress", "Merging %(file)s...", file=catpath)) upprogc = lambda: upprog(catpath) summit_merge_single(*(merge_spec + (project, options, upprogc))) upprog() # Remove template tree in summit-over-dynamic-templates mode. if project.templates_dynamic: shutil.rmtree(project.tproject.summit.topdir) def summit_deps (project, options): # Collect names of summit catalogs for which to report dependencies. summit_names = select_summit_names(project, options) # Report dependencies for all selected catalogs. for summit_name in summit_names: if summit_name not in project.catalogs[SUMMIT_ID]: # May happen if there are some missing summit catalogs # to current branch catalogs, i.e. gather has not been made. continue summit_path = project.catalogs[SUMMIT_ID][summit_name][0][0] branch_paths = [] for branch_id in project.branch_ids: for branch_name in project.full_inverse_map[summit_name][branch_id]: for branch_path, d1 in project.catalogs[branch_id][branch_name]: branch_paths.append(branch_path) fmtbpaths = " ".join(branch_paths) if options.verbose: actype = _("@item:intext action performed on a catalog", "depends") report(": (%s) %s %s" % (actype, summit_path, fmtbpaths)) else: report(": %s %s" % (summit_path, fmtbpaths)) def select_branch_catalogs (branch_id, project, options): # Shortcuts. pbcats = project.catalogs[branch_id] # Select either all catalogs in this branch, # or those mentioned in the command line. if not options.partspecs: branch_catalogs = [] for name, spec in pbcats.items(): for path, subdir in spec: if options.selcatf(path): branch_catalogs.append((name, path, subdir)) else: # Select branch catalogs by command line specification. branch_catalogs = [] # Process direct specifications (branch->summit). if branch_id in options.partspecs: for part_spec in options.partspecs[branch_id]: # If the catalog specification has path separators, # then it selects a complete subdir in the branch. branch_catalogs_l = [] if part_spec.find(os.sep) >= 0: sel_subdir = os.path.normpath(part_spec) one_found = False for name, spec in pbcats.items(): for path, subdir in spec: if sel_subdir == subdir: one_found = True if options.selcatf(path): branch_catalogs_l.append( (name, path, subdir)) if not one_found: error(_("@info", "No catalogs in subdirectory '%(dir)s' " "of branch '%(branch)s'.", dir=sel_subdir, branch=branch_id)) else: # Otherwise, specific catalog is selected. sel_name = part_spec one_found = False for name, spec in pbcats.items(): if sel_name == name: for path, subdir in spec: one_found = True if options.selcatf(path): branch_catalogs_l.append( (name, path, subdir)) break if not one_found: error(_("@info", "No catalog named '%(name)s' " "in branch '%(branch)s'.", name=sel_name, branch=branch_id)) # Also select all branch catalogs which contribute to same # summit catalogs as the already selected ones. branch_catalogs_l2 = [] dmap = project.direct_map[branch_id] pimap = project.part_inverse_map[branch_id] for branch_name, d1, d2 in branch_catalogs_l: if branch_name in dmap: for summit_name in dmap[branch_name]: if summit_name in pimap: for name in pimap[summit_name]: for path, subdir in pbcats[name]: if options.selcatf(path): branch_catalogs_l2.append( (name, path, subdir)) branch_catalogs.extend(branch_catalogs_l) branch_catalogs.extend(branch_catalogs_l2) # Process inverse specifications (summit->branch). if SUMMIT_ID in options.partspecs: for part_spec in options.partspecs[SUMMIT_ID]: if part_spec.find(os.sep) >= 0: # Complete subdir. sel_subdir = os.path.normpath(part_spec) cats = [] for name, spec in project.catalogs[SUMMIT_ID].items(): path, subdir = spec[0] # all summit catalogs unique if sel_subdir == subdir: bnames = project.full_inverse_map[name][branch_id] for bname in bnames: if bname in pbcats: for bpath, bsubdir in pbcats[bname]: if options.selcatf(bpath): cats.append((bname, bpath, bsubdir)) branch_catalogs.extend(cats) else: # Specific catalog. sel_name = part_spec if not sel_name in project.catalogs[SUMMIT_ID]: error(_("@info", "No summit catalog named '%(name)s'.", name=sel_name)) bnames = project.full_inverse_map[sel_name][branch_id] for bname in bnames: if bname in pbcats: for bpath, bsubdir in pbcats[bname]: if options.selcatf(bpath): branch_catalogs.append( (bname, bpath, bsubdir)) # Same catalogs may have been selected multiple times, remove. branch_catalogs = list(set(branch_catalogs)) # Sort by path. branch_catalogs.sort(key=lambda x: x[1]) # ...sorting is not only for looks, but to establish priority of # supplying comments to summit messages. return branch_catalogs def select_summit_names (project, options): # Collect all summit catalogs selected explicitly or implicitly. summit_names = [] if options.partspecs is None: for name, spec in project.catalogs[SUMMIT_ID].items(): path, subdir = spec[0] # summit catalogs are unique if options.selcatf(path): summit_names.append(name) else: for branch_id in options.partspecs: for part_spec in options.partspecs[branch_id]: if branch_id == SUMMIT_ID: # explicit by summit reference if part_spec.find(os.sep) >= 0: # whole subdir sel_subdir = os.path.normpath(part_spec) one_found = False for name, spec in project.catalogs[SUMMIT_ID].items(): path, subdir = spec[0] # summit catalogs are unique if sel_subdir == subdir: one_found = True if options.selcatf(path): summit_names.append(name) if not one_found: error(_("@info", "No summit directory named '%(name)s'.", name=sel_subdir)) else: # single name sel_name = part_spec spec = project.catalogs[SUMMIT_ID].get(sel_name) if not spec: error(_("@info", "No summit catalog named '%(name)s'.", name=sel_name)) path, subdir = spec[0] # summit catalogs are unique if options.selcatf(path): summit_names.append(sel_name) else: # implicit by branch reference if part_spec.find(os.sep) >= 0: # whole subdir sel_subdir = os.path.normpath(part_spec) one_found = False for name, spec in project.catalogs[branch_id].items(): for path, subdir in spec: if sel_subdir == subdir: one_found = True if options.selcatf(path): summit_names.extend( project.direct_map[branch_id][name]) break if not one_found: error(_("@info", "No directory named '%(name)s' " "in branch '%(branch)s'.", name=sel_subdir, branch=branch_id)) else: # single name sel_name = part_spec spec = project.catalogs[branch_id].get(sel_name) if not spec: error(_("@info", "No catalog named '%(name)s' " "in branch '%(branch)s'.", name=sel_name, branch=branch_id)) for path, subdir in spec: if options.selcatf(path): summit_names.extend( project.direct_map[branch_id][sel_name]) break # Make names unique and sort by path. summit_names = list(set(summit_names)) summit_names.sort(key=lambda x: project.catalogs[SUMMIT_ID].get(x, [[""]])[0][0]) # Additionaly sort by subdirectory precedence. # This is necessary so that catalogs can be properly moved when gathering, # in case a higher precedence subdirectory was not created before. # Default "~" means that catalogs with no paths will be sorted at end. summit_names.sort(key=lambda x: project.calc_subdir_precedence( project.catalogs[SUMMIT_ID].get(x, [["", "~"]])[0][1])) return summit_names def summit_gather_single (summit_name, project, options, phony=False, pre_summit_names=(), memo_store=None, update_progress=(lambda: None)): if memo_store is not None: memo_key = (summit_name, tuple(sorted(pre_summit_names))) if memo_key in memo_store: # value can be None return memo_store.get(memo_key) update_progress() summit_path = project.catalogs[SUMMIT_ID][summit_name][0][0] summit_subdir = project.catalogs[SUMMIT_ID][summit_name][0][1] update_from_old = ( os.path.exists(summit_path) and not project.templates_dynamic) # Do not overwrite the old summit catalog here if it exists, # as it will be needed for comparison later. monitored = update_from_old summit_cat = Catalog("", monitored=monitored, wrapping=project.summit_wrapping, create=True) summit_cat.filename = summit_path # Collect branches in which this summit catalog has corresponding # branch catalogs, in order of branch priority. src_branch_ids = [] for branch_id in project.branch_ids: if project.full_inverse_map[summit_name][branch_id]: src_branch_ids.append(branch_id) # If there are no branch catalogs, # then the current summit catalog is to be removed. if not src_branch_ids: if phony: # cannot happen error(_("@info", "Phony gather on summit catalog which is to be removed.")) # Remove by version control, if any. if project.summit_vcs: if not project.summit_vcs.remove(summit_path): warning(_("@info", "Cannot remove '%(path)s' from version control.", path=summit_path)) # If not removed by version control, plainly delete. if os.path.isfile(summit_path): os.unlink(summit_path) if os.path.isfile(summit_path): warning(_("@info", "Cannot remove '%(path)s' from disk.", path=summit_path)) if not os.path.isfile(summit_path): if options.verbose: actype = _("@item:intext action performed on a catalog", "gathered-removed") report("- (%s) %s" % (actype, summit_path)) elif not options.quiet: report("- %s" % summit_path) # Skip the rest, nothing to gather. if memo_store is not None: memo_store[memo_key] = summit_cat return summit_cat # Open all corresponding branch catalogs. # For each branch catalog, also phony-gather any dependent summit # catalogs. Phony means not to take into account branch catalogs which # map to current summit catalog if it is higher in their queue than # the phony-gathered one, and not to sync phony-gathered catalog; # this is needed in order that any new messages get inserted # uniquely and deterministically in case of split-mappings. bcat_pscats = {} if phony or memo_store is not None: sub_memo_store = memo_store else: sub_memo_store = {} for branch_id in src_branch_ids: branch = project.bdict[branch_id] if isinstance(branch.insert_nosim, (list, tuple)): apply_insert_nosim = lambda sn, sd: ( any(re.search(rs, sn) for rs in branch.insert_nosim)) elif callable(branch.insert_nosim): apply_insert_nosim = lambda sn, sd: branch.insert_nosim(sn, sd) else: apply_insert_nosim = lambda sn, sd: bool(branch.insert_nosim) bcat_pscats[branch_id] = [] for branch_name in project.full_inverse_map[summit_name][branch_id]: # In phony-gather, do not use branch catalogs with split-mappings # which map to one of the summit catalogs among previous. phony_skip = False for dep_summit_name in project.direct_map[branch_id][branch_name]: if dep_summit_name in pre_summit_names: phony_skip = True break if phony_skip: continue # Gather and open dependent summit catalogs. dep_summit_cats = [] sub_pre_summit_names = list(pre_summit_names) for dep_summit_name in project.direct_map[branch_id][branch_name]: if dep_summit_name == summit_name: sub_pre_summit_names.append(summit_name) continue dep_summit_cat = summit_gather_single(dep_summit_name, project, options, True, sub_pre_summit_names, sub_memo_store, update_progress) if dep_summit_cat is not None: dep_summit_cats.append(dep_summit_cat) # Open all branch catalogs of this name, ordered by path, # link them to the same dependent summit catalogs. for path, subdir in project.catalogs[branch_id][branch_name]: update_progress() # Apply hooks to branch catalog file, creating temporaries. tmp_path = None if project.hook_on_gather_file_branch: # Temporary path should be such as to not modify the # catalog name (e.g. appending ".mod" could make ".po" # a part of the name). tmp_path = path + "~mod" shutil.copyfile(path, tmp_path) exec_hook_file(branch_id, branch_name, subdir, tmp_path, project.hook_on_gather_file_branch) branch_cat = Catalog(tmp_path or path, monitored=False) if tmp_path: # as soon as catalog is opened, no longer needed os.unlink(tmp_path) # Apply hooks to branch catalog. if project.hook_on_gather_cat_branch: exec_hook_cat(branch_id, branch_name, subdir, branch_cat, project.hook_on_gather_cat_branch) branch_cat.sync_map() # Apply hooks to all branch catalog messages here, # as they may modify message keys. if project.hook_on_gather_msg_branch: for msg in branch_cat: update_progress() exec_hook_msg(branch_id, branch_name, subdir, msg, branch_cat, project.hook_on_gather_msg_branch) branch_cat.sync_map() insert_nosim = apply_insert_nosim(branch_name, subdir) bcat_pscats[branch_id].append((branch_cat, dep_summit_cats, insert_nosim)) # On phony gather, in case of split mappings, # it may happen that there are no corresponding branch catalogs. if phony and not any(bcat_pscats.values()): if memo_store is not None: memo_store[memo_key] = None return None # Select primary branch catalog. prim_branch_cat = None for branch_id in src_branch_ids: if bcat_pscats[branch_id]: prim_branch_cat = bcat_pscats[branch_id][0][0] break assert prim_branch_cat is not None # Gather messages through branch catalogs. for branch_id in src_branch_ids: for branch_cat, dep_summit_cats, insert_nosim in bcat_pscats[branch_id]: is_primary = branch_cat is prim_branch_cat summit_gather_single_bcat(branch_id, branch_cat, is_primary, summit_cat, monitored, dep_summit_cats, insert_nosim, project, options, update_progress) # Gather the summit header according to primary branch. summit_gather_single_header(summit_cat, prim_branch_cat, project, options) # Apply hooks to the summit messages. if project.hook_on_gather_msg: for msg in summit_cat: exec_hook_msg(SUMMIT_ID, summit_cat.name, summit_subdir, msg, summit_cat, project.hook_on_gather_msg) # Apply hooks to the summit catalog. exec_hook_cat(SUMMIT_ID, summit_cat.name, summit_subdir, summit_cat, project.hook_on_gather_cat) # If phony-gather, stop here and return summit catalog for reference. if phony: if memo_store is not None: memo_store[memo_key] = summit_cat return summit_cat # If the old summit catalog exists, compare with the new. # If there were any modified entries, or their order changed, # replace the old with the new summit catalog. # Copy over unmodified entries from the old catalog, # to avoid line reformatting. if update_from_old: old_cat = Catalog(summit_path, monitored=monitored, wrapping=project.summit_wrapping) summit_created = False replace = False # Compare headers without some insignificant fields. if cmpnorm_hdr(summit_cat.header) == cmpnorm_hdr(old_cat.header): summit_cat.header = old_cat.header else: replace = True # Compare messages and their positions. for pos in range(len(summit_cat)): update_progress() old_pos = old_cat.find(summit_cat[pos]) if pos != old_pos: replace = True if old_pos >= 0: if summit_cat[pos] == old_cat[old_pos]: summit_cat[pos] = old_cat[old_pos] else: replace = True # Compare lengths. if len(summit_cat) != len(old_cat): replace = True else: summit_created = True replace = True # Check if the catalog needs to be moved to another subdirectory. branch_subdirs = [] for branch_id in project.full_inverse_map[summit_name]: for branch_name in project.full_inverse_map[summit_name][branch_id]: branch_subdirs_1 = [] for bpath, bsubdir in project.catalogs[branch_id][branch_name]: bsubdir = project.subdir_map.get((branch_id, bsubdir), bsubdir) branch_subdirs_1.append(bsubdir) branch_subdirs_1.sort() branch_subdirs.extend(branch_subdirs_1) new_summit_path = summit_path if branch_subdirs: branch_subdirs = list(set(branch_subdirs)) subdir_precs = map(project.calc_subdir_precedence, branch_subdirs) precs_subdirs = sorted(zip(subdir_precs, branch_subdirs)) branch_subdirs_sel = [sd for pr, sd in precs_subdirs if pr == precs_subdirs[0][0]] if summit_subdir not in branch_subdirs_sel: catext = summit_path[summit_path.rfind("."):] new_summit_path = join_ncwd(project.summit.topdir, branch_subdirs_sel[0], summit_name + catext) if replace or summit_cat.filename != new_summit_path: added = False moved = False if replace: # Set template creation date for the summit catalog # to the current date. # Do not try to trust branch template creation dates, # e.g. by copying the latest one. summit_cat.header.set_field(u"POT-Creation-Date", format_datetime(), before=u"PO-Revision-Date", reorder=True) # Sync to disk. summit_cat.sync() # Apply hooks to summit catalog file. exec_hook_file(SUMMIT_ID, summit_cat.name, summit_subdir, summit_cat.filename, project.hook_on_gather_file) if summit_created: added = True # Add to version control. if ( project.summit_vcs and not project.summit_vcs.is_versioned(summit_cat.filename) ): if not project.summit_vcs.add(summit_cat.filename): warning(_("@info", "Cannot add '%(file)s' to version control.", file=summit_cat.filename)) else: added = True if summit_cat.filename != new_summit_path: if project.summit_vcs: if not project.summit_vcs.move(summit_cat.filename, new_summit_path): warning(_("@info", "Cannot move '%(srcfile)s' to '%(dstfile)s'.", srcfile=summit_cat.filename, dstfile=new_summit_path)) else: summit_cat.filename = new_summit_path moved = True branch_paths = [] for branch_id in src_branch_ids: for branch_cat, dep_summit_cats, insert_nosim in bcat_pscats[branch_id]: branch_paths.append(branch_cat.filename) paths_str = " ".join(branch_paths) if options.verbose: if added: actype = _("@item:intext action performed on a catalog", "gathered-added") report(">+ (%s) %s %s" % (actype, summit_cat.filename, paths_str)) elif moved: actype = _("@item:intext action performed on a catalog", "gathered-moved") report(">| (%s) %s %s" % (actype, summit_cat.filename, paths_str)) else: actype = _("@item:intext action performed on a catalog", "gathered") report("> (%s) %s %s" % (actype, summit_cat.filename, paths_str)) elif not options.quiet: if added: report(">+ %s %s" % (summit_cat.filename, paths_str)) elif moved: report(">| %s %s" % (summit_cat.filename, paths_str)) else: report("> %s %s" % (summit_cat.filename, paths_str)) if memo_store is not None: memo_store[memo_key] = summit_cat return summit_cat def cmpnorm_hdr (hdr): rhdr = Header(hdr) for field in ( "POT-Creation-Date", ): rhdr.remove_field(field) return rhdr def extkey_msg (msg): # NOTE: If computation of context pad is modified, # padded messages in existing summit catalogs will get fuzzy # on next merge with newly gathered templates. msg = MessageUnsafe(msg) if msg.msgid_plural is not None: h = hashlib.md5() h.update(msg.msgid_plural.encode("UTF-8")) ctxtpad = h.hexdigest() else: # Something that looks like a hex digest but slightly shorter, # so that it does not match any real digest. ctxtpad = "abcd1234efgh5665hgfe4321dcba" msg.auto_comment.append(u"%s msgctxt-pad %s" % (_summit_tag_kwprop, ctxtpad)) if msg.msgctxt is None: msg.msgctxt = u"%s" % ctxtpad else: msg.msgctxt = u"%s|%s" % (msg.msgctxt, ctxtpad) return msg def summit_gather_single_bcat (branch_id, branch_cat, is_primary, summit_cat, monitored, dep_summit_cats, insert_nosim, project, options, update_progress): MessageType = (Message if monitored else MessageUnsafe) # Go through messages in the branch catalog, merging them with # existing summit messages, or collecting for later insertion. # Do not insert new messages immediately, as source references may be # updated by merging, which reflects on heuristic insertion. # Ignore messages present in dependent summit catalogs. msgs_to_merge = [] msgs_to_insert = [] xkpairs = [] for msg in branch_cat: update_progress() # Do not gather obsolete messages. if msg.obsolete: continue # Normalizations when gathering templates, # in case extraction tool needs to have its sanity checked, # or certain language files stand in for true templates. if project.lang == project.templates_lang: msg.manual_comment[:] = [] msg.unfuzzy() if msg.msgid_plural is None: msg.msgstr[:] = [u""] else: msg.msgstr[:] = [u"", u""] # Construct branch message with extended key. xkmsg = extkey_msg(msg) # Do not gather messages belonging to depending summit catalogs. in_dep = False for dep_summit_cat in dep_summit_cats: if msg in dep_summit_cat or xkmsg in dep_summit_cat: in_dep = True break if in_dep: continue # If the summit message for the original branch message exists, # but their extended keys do not match, # switch to branch message with extended key. summit_msg = summit_cat.get(msg) if summit_msg and extkey_msg(summit_msg).key != xkmsg.key: xkpairs.append((msg, xkmsg)) msg = xkmsg summit_msg = summit_cat.get(msg) # Collect the branch message for merging or insertion. if summit_msg is not None: msgs_to_merge.append((msg, summit_msg)) else: msgs_to_insert.append(msg) # If some messages had to have extended keys, update branch catalog. if xkpairs: for msg, xkmsg in xkpairs: branch_cat.remove_on_sync(msg) branch_cat.add_last(xkmsg) branch_cat.sync_map() # Merge messages already in the summit catalog. if msgs_to_merge: for msg, summit_msg in msgs_to_merge: # Merge the message. gather_merge_msg(summit_msg, msg) # Update automatic comments. summit_override_auto(summit_msg, msg, branch_id, is_primary) # Equip any new summit tags to the merged message. summit_set_tags(summit_msg, branch_id, project) # Insert messages not already in the summit catalog. if msgs_to_insert: # Pair messages to insert from branch with summit messages # having common source files. # If summit is empty, this is primary branch catalog, so make # only one dummy pair to preserve original ordering of messages. summit_msgs_by_src_dict = dict(summit_cat.messages_by_source()) if summit_msgs_by_src_dict: msgs_by_src = branch_cat.messages_by_source() else: msgs_by_src = [("", branch_cat)] # Collect possible source file synonyms to those in the summit catalog. fnsyn = branch_cat.detect_renamed_sources(summit_cat) # Prepare messages for insertion into summit. summit_msg_by_msg = {} for msg in msgs_to_insert: update_progress() summit_msg = MessageType(msg) summit_set_tags(summit_msg, branch_id, project) summit_msg_by_msg[msg] = summit_msg # Insert branch messages into summit source by source. for src, msgs in msgs_by_src: # Assemble collection of summit messages from same source file. summit_msgs = [] for osrc in [src] + fnsyn.get(src, []): summit_msgs.extend(summit_msgs_by_src_dict.get(osrc, [])) # If existing summit messages from same source found, # insert branch messages around those summit messages. # Otherwise, just append them at the end. if summit_msgs: # Assemble groups of messages by same msgid and same msgctxt, # for insertion by similarity. if not insert_nosim: smsgs_by_msgid = {} smsgs_by_msgctxt = {} for smsg in summit_msgs: if smsg.msgid not in smsgs_by_msgid: smsgs_by_msgid[smsg.msgid] = [] smsgs_by_msgid[smsg.msgid].append(smsg) if smsg.msgctxt is not None: if smsg.msgctxt not in smsgs_by_msgctxt: smsgs_by_msgctxt[smsg.msgctxt] = [] smsgs_by_msgctxt[smsg.msgctxt].append(smsg) insertions = [] for msg in msgs: update_progress() new_summit_msg = summit_msg_by_msg.get(msg) if new_summit_msg is None: continue # Existing summit message to where (after or before) # current message is to be inserted. summit_msg_ref = None before = False # Try to insert message by similarity. # Similarity is checked by groups, # such that for each group there is a message part # which is compared for similarity. if not insert_nosim: for summit_msgs_group, matt, forceins in ( (smsgs_by_msgid.get(msg.msgid), "msgctxt", True), (smsgs_by_msgctxt.get(msg.msgctxt), "msgid", True), (summit_msgs, "key", False), ): if not summit_msgs_group: continue # Shortcut: if only one summit message in the group # and insertion forced, insert after it. if len(summit_msgs_group) == 1 and forceins: summit_msg_ref = summit_msgs_group[-1] break # Does the message have the part to be matched? mval = msg.get(matt) if mval is None: continue # Find existing message with the most similar # matching attribute. seqm = SequenceMatcher(None, mval, "") maxr = 0.0 for summit_msg in summit_msgs_group: smval = summit_msg.get(matt) if smval is None: continue seqm.set_seq2(smval) r = seqm.ratio() if maxr <= r: maxr = r maxr_summit_msg = summit_msg # If similar enough message has been found, # set insertion position after it. # Otherwise, insert after last summit message # in the group if insertion forced. if maxr > 0.6: summit_msg_ref = maxr_summit_msg break elif forceins: summit_msg_ref = summit_msgs_group[-1] break # If no similar existing message, set position before # the summit message with first greater source reference # line number, if any such. if summit_msg_ref is None and src: for summit_msg in summit_msgs: if msg.source[0][1] < summit_msg.source[0][1]: summit_msg_ref = summit_msg before = True break # If not insertion by source references, insert last. if summit_msg_ref is None: summit_msg_ref = summit_msgs[-1] # Record insertion. pos = summit_cat.find(summit_msg_ref) if not before: pos += 1 insertions.append((new_summit_msg, pos)) # Insert ordered messages into catalog. summit_cat.add_more(insertions) else: for msg in msgs: update_progress() new_summit_msg = summit_msg_by_msg.get(msg) if new_summit_msg is not None: summit_cat.add_last(new_summit_msg) def gather_merge_msg (summit_msg, msg): if summit_msg.key != msg.key: error(_("@info", "Cannot gather messages with different keys.")) if (summit_msg.msgid_plural is None) != (msg.msgid_plural is None): error(_("@info", "Cannot gather messages with different plurality.")) if ( (summit_msg.translated and msg.translated) or (summit_msg.fuzzy and msg.fuzzy) or (summit_msg.untranslated and msg.untranslated) ): if not summit_msg.manual_comment: summit_msg.manual_comment = Monlist(msg.manual_comment) if msg.msgid_plural is not None: summit_msg.msgid_plural = msg.msgid_plural summit_msg.msgstr = Monlist(msg.msgstr) elif summit_msg.fuzzy and msg.translated: summit_msg.manual_comment = Monlist(msg.manual_comment) if summit_msg.msgid_plural is None or msg.msgid_plural is not None: if msg.msgid_plural is not None: summit_msg.msgid_plural = msg.msgid_plural summit_msg.msgstr = Monlist(msg.msgstr) if summit_msg.msgid_plural == msg.msgid_plural: summit_msg.unfuzzy() elif summit_msg.untranslated and (msg.translated or msg.fuzzy): summit_msg.manual_comment = Monlist(msg.manual_comment) if summit_msg.msgid_plural is None or msg.msgid_plural is not None: if msg.fuzzy: summit_msg.msgctxt_previous = msg.msgctxt_previous summit_msg.msgid_previous = msg.msgid_previous summit_msg.msgid_plural_previous = msg.msgid_plural_previous if msg.msgid_plural is not None: summit_msg.msgid_plural = msg.msgid_plural summit_msg.msgstr = Monlist(msg.msgstr) summit_msg.fuzzy = msg.fuzzy def summit_gather_single_header (summit_cat, prim_branch_cat, project, options): # Copy over comments from the primary branch catalog. hdr = summit_cat.header bhdr = prim_branch_cat.header hdr.title = bhdr.title hdr.copyright = bhdr.copyright hdr.license = bhdr.license hdr.author = bhdr.author hdr.comment = bhdr.comment # Copy over standard fields from the primary branch catalog. for fname in [x[0] for x in Header().field]: fvalue = prim_branch_cat.header.get_field_value(fname) if fvalue is not None: summit_cat.header.set_field(fname, fvalue) else: summit_cat.header.remove_field(fname) # Copy over non-standard fields from the primary branch catalog on request. bfields = [] for fname in project.header_propagate_fields: bfields.extend(prim_branch_cat.header.select_fields(fname)) cfields = [] for fname in project.header_propagate_fields: cfields.extend(summit_cat.header.select_fields(fname)) # Replace old with new set if not equal. if bfields != cfields: for cfield in cfields: summit_cat.header.field.remove(cfield) for bfield in bfields: summit_cat.header.field.append(bfield) _asc_check_cache = {} def summit_scatter_single (branch_id, branch_name, branch_subdir, branch_path, summit_paths, project, options, update_progress): update_progress() # See if the branch catalog is to be newly created from the template. new_from_template = False branch_path_mod = branch_path if branch_path in project.add_on_scatter: new_from_template = True # Initialize new catalog with messages directly from the template. # Later the catalog file name will be switched to branch path, # if the catalog satisfies criteria to be created on scatter. branch_path_mod = project.add_on_scatter[branch_path] # Open the branch catalog and all summit catalogs. try: branch_cat = Catalog(branch_path_mod, wrapping=project.branches_wrapping) except PologyError, e: warning(_("@info", "Cannot open the branch catalog '%(file)s' " "to scatter to. The error was:\n" "%(msg)s", file=branch_path_mod, msg=str_to_unicode(str(e)))) return summit_cats = [] for summit_path in summit_paths: try: # NOTE: Must be opened monitored to have compatible types # when copying message parts to branch message. summit_cat = Catalog(summit_path) except PologyError, e: warning(_("@info", "Cannot open the summit catalog '%(file)s' " "to scatter from. The error was:\n" "%(msg)s", file=summit_path, msg=str_to_unicode(str(e)))) return summit_cats.append(summit_cat) # Collect and link ascription catalogs to summit catalogs. # (Do not open them here, but only later when a check is not cached.) if project.ascription_filter: aconfs_acats = {} for summit_cat in summit_cats: aconf, acatpath = project.aconfs_acatpaths[summit_cat.name] aconfs_acats[summit_cat.name] = (aconf, None, acatpath) if acatpath not in _asc_check_cache: _asc_check_cache[acatpath] = {} # Pair branch messages with summit messages. msgs_total = 0 msgs_translated = 0 msg_links = [] asc_stopped = 0 for branch_msg in branch_cat: update_progress() # Skip obsolete messages. if branch_msg.obsolete: continue msgs_total += 1 # If there is a hook on branch messages on gather, # it must be used here to prepare branch message for lookup # in summit catalog, as the hook may modify the key. branch_msg_lkp = branch_msg if project.hook_on_gather_msg_branch: branch_msg_lkp = MessageUnsafe(branch_msg) exec_hook_msg(branch_id, branch_name, branch_subdir, branch_msg_lkp, branch_cat, project.hook_on_gather_msg_branch) # Construct branch message for lookup with extended key. branch_xkmsg_lkp = extkey_msg(branch_msg_lkp) # Find first summit catalog which has this message translated. summit_msg = None for summit_cat in summit_cats: # Branch message with extended key must be looked up first. for bmsg_lkp in [branch_xkmsg_lkp, branch_msg_lkp]: if bmsg_lkp in summit_cat: summit_msg = summit_cat[bmsg_lkp] if summit_msg.obsolete: summit_msg = None else: break if summit_msg is not None: break if summit_msg is None: report_on_msg(_("@info:progress", "Message not in the summit."), branch_msg, branch_cat) continue if ( project.ascription_filter and not options.force and do_scatter(summit_msg, branch_msg) ): aconf, acat, acatpath = aconfs_acats[summit_cat.name] if summit_msg.key not in _asc_check_cache[acatpath]: if acat is None: acat = Catalog(acatpath, monitored=False, create=True) aconfs_acats[summit_cat.name] = (aconf, acat, acatpath) hfilter = project.ascription_history_filter ahist = collect_ascription_history(summit_msg, acat, aconf, nomrg=True, hfilter=hfilter) afilter = project.ascription_filter res = afilter(summit_msg, summit_cat, ahist, aconf) _asc_check_cache[acatpath][summit_msg.key] = res if not _asc_check_cache[acatpath][summit_msg.key]: asc_stopped += 1 continue if summit_msg.translated: msgs_translated += 1 msg_links.append((branch_msg, summit_msg, summit_cat)) if asc_stopped > 0: warning(n_("@info:progress", "%(file)s: %(num)d message stopped by ascription filter.", "%(file)s: %(num)d messages stopped by ascription filter.", file=branch_path, num=asc_stopped)) # If completeness less than minimal acceptable, remove all translations. if msgs_total > 0: completeness_ratio = float(msgs_translated) / msgs_total else: completeness_ratio = 1.0 if ( completeness_ratio < project.scatter_acc_completeness and not options.force ): for branch_msg in branch_cat: if branch_msg.obsolete: branch_cat.remove_on_sync(branch_msg) else: clear_msg(branch_msg) # If complete enough, scatter from summit to branch messages. else: scattered_branch_msgs = set() for branch_msg, summit_msg, summit_cat in msg_links: update_progress() if do_scatter(summit_msg, branch_msg): exec_hook_msg(branch_id, branch_name, branch_subdir, summit_msg, summit_cat, project.hook_on_scatter_msg) # NOTE: Same plurality and equal msgid_plural fields # between summit and branch message are enforced, # so only assert this for robustness. if summit_msg.msgid_plural != branch_msg.msgid_plural: error(_("@info", "Cannot scatter messages with " "different plurality.")) for i in range(len(summit_msg.msgstr)): piped_msgstr = exec_hook_msgstr( branch_id, branch_name, branch_subdir, summit_msg.msgstr[i], summit_msg, summit_cat, project.hook_on_scatter_msgstr) if i < len(branch_msg.msgstr): branch_msg.msgstr[i] = piped_msgstr else: branch_msg.msgstr.append(piped_msgstr) branch_msg.unfuzzy() branch_msg.manual_comment = summit_msg.manual_comment scattered_branch_msgs.add(branch_msg) # Fuzzy all active messages which were not scattered, # in order to avoid stale translations in branches. for branch_msg in branch_cat: if branch_msg.active and branch_msg not in scattered_branch_msgs: branch_msg.fuzzy = True # Update branch header based on primary summit catalog. # Copy over all header parts from summit to branch, # except for those copied from template on merging. hdr = branch_cat.header shdr = summit_cats[0].header # Fields to keep due to being copied over on merging. keep_fields = [ "Report-Msgid-Bugs-To", "POT-Creation-Date", ] # Fields to keep if no branch message was modified. if not branch_cat.modcount and branch_cat.header.initialized: keep_fields.extend([ "PO-Revision-Date", "Last-Translator", ]) # Fields to keep due to explicitly being told to. keep_fields.extend(project.header_skip_fields_on_scatter) # Update comments. hdr.title = shdr.title hdr.copyright = shdr.copyright hdr.license = shdr.license hdr.author = shdr.author hdr.comment = shdr.comment # Update fields only if normalized lists of fields do not match. if normhf(hdr.field, keep_fields) != normhf(shdr.field, keep_fields): # Collect branch fields to be preserved. preserved_fs = [] for fnam in keep_fields: selected_fs = branch_cat.header.select_fields(fnam) preserved_fs.append(selected_fs[0] if selected_fs else (fnam, None)) # Overwrite branch with summit header fields. hdr.field = shdr.field # Put back the preserved branch fields. for fnam, fval in preserved_fs: if fval is not None: hdr.set_field(fnam, fval) else: hdr.remove_field(fnam) # Apply hooks to the branch catalog. exec_hook_cat(branch_id, branch_name, branch_subdir, branch_cat, project.hook_on_scatter_cat) # If the branch catalog has been newly created, # see if it is translated enough to be really written out. skip_write = False if new_from_template and not options.force: ntrans = 0 for msg in branch_cat: if msg.translated: ntrans += 1 if len(branch_cat) > 0: skip_write = ( float(ntrans) / len(branch_cat) + 1e-6 < project.scatter_min_completeness) else: skip_write = False if new_from_template and not skip_write: # Create any needed subdirectories and set destination branch path. mkdirpath(os.path.dirname(branch_path)) branch_cat.filename = branch_path # Commit changes to the branch catalog. if not skip_write and (branch_cat.sync() or options.force): # Apply hooks to branch catalog file. exec_hook_file(branch_id, branch_name, branch_subdir, branch_cat.filename, project.hook_on_scatter_file) # Add to version control. if ( project.branches_vcs and not project.bdict[branch_id].skip_version_control ): if not project.branches_vcs.add(branch_cat.filename): warning(_("@info", "Cannot add '%(file)s' to version control.", file=branch_cat.filename)) paths_str = " ".join(summit_paths) if options.verbose: if new_from_template: actype = _("@item:intext action performed on a catalog", "scattered-added") report("<+ (%s) %s %s" % (actype, branch_cat.filename, paths_str)) else: actype = _("@item:intext action performed on a catalog", "scattered") report("< (%s) %s %s" % (actype, branch_cat.filename, paths_str)) elif not options.quiet: if new_from_template: report("<+ %s %s" % (branch_cat.filename, paths_str)) else: report("< %s %s" % (branch_cat.filename, paths_str)) def do_scatter (smsg, bmsg): return smsg.translated def hook_applicable (branch_check, branch_id, name_check, name, subdir): if branch_check is not None: if hasattr(branch_check, "__call__"): if not branch_check(branch_id): return False else: if not re.search(branch_check, branch_id): return False if name_check is not None: if hasattr(name_check, "__call__"): if not name_check(name, subdir): return False else: if not re.search(name_check, name): return False return True # Pipe msgstr through hook calls, # for which branch id and catalog name match hook specification. def exec_hook_msgstr (branch_id, branch_name, branch_subdir, msgstr, msg, cat, hooks): piped_msgstr = msgstr for call, branch_ch, name_ch in hooks: if hook_applicable(branch_ch, branch_id, name_ch, branch_name, branch_subdir): piped_msgstr_tmp = call(piped_msgstr, msg, cat) if isinstance(piped_msgstr_tmp, basestring): piped_msgstr = piped_msgstr_tmp return piped_msgstr # Pipe message through hook calls, # for which branch id and catalog name match hook specification. def exec_hook_msg (branch_id, branch_name, branch_subdir, msg, cat, hooks): # Apply all hooks to the message. for call, branch_ch, name_ch in hooks: if hook_applicable(branch_ch, branch_id, name_ch, branch_name, branch_subdir): call(msg, cat) # Pipe header through hook calls, # for which branch id and catalog name match hook specification. def exec_hook_head (branch_id, branch_name, branch_subdir, hdr, cat, hooks): # Apply all hooks to the header. for call, branch_ch, name_ch in hooks: if hook_applicable(branch_ch, branch_id, name_ch, branch_name, branch_subdir): call(hdr, cat) # Pipe catalog through hook calls, # for which branch id and catalog name match hook specification. def exec_hook_cat (branch_id, branch_name, branch_subdir, cat, hooks): # Apply all hooks to the catalog. for call, branch_ch, name_ch in hooks: if hook_applicable(branch_ch, branch_id, name_ch, branch_name, branch_subdir): call(cat) # Pipe catalog file through hook calls, # for which branch id and catalog name match hook specification. def exec_hook_file (branch_id, branch_name, branch_subdir, filepath, hooks): # Make temporary backup of the file. # FIXME: Portable construction of temporary file. bckppath = "/tmp/backup%s-%s" % (os.getpid(), os.path.basename(filepath)) shutil.copyfile(filepath, bckppath) # Apply all hooks to the file, but stop if one returns non-zero status. failed = False for call, branch_ch, name_ch in hooks: if hook_applicable(branch_ch, branch_id, name_ch, branch_name, branch_subdir): if call(filepath) != 0: failed = True break # If any hook failed, retrieve the temporary copy. if failed: shutil.move(bckppath, filepath) else: os.unlink(bckppath) # Pipe branch through hook calls, # for which branch id and matches hook specification. def exec_hook_branch (branch_id, hooks): # Apply all hooks to the branch, but stop if one returns non-zero status. failed = False for call, branch_ch, d1 in hooks: if hook_applicable(branch_ch, branch_id, None, None, None): if call(branch_id) != 0: failed = True break def find_summit_comment (msg, summit_tag): i = 0 for c in msg.auto_comment: if c.startswith(summit_tag): return i i += 1 return -1 def get_summit_comment (msg, summit_tag, default=u""): p = find_summit_comment(msg, summit_tag) if p >= 0: return msg.auto_comment[p][len(summit_tag):].strip() else: return default def set_summit_comment (msg, summit_tag, text): ctext = unicode(summit_tag + " " + text.strip()) p = find_summit_comment(msg, summit_tag) if p >= 0: msg.auto_comment[p] = ctext else: msg.auto_comment.append(ctext) _summit_tag_branchid = "+>" _summit_tag_kwprop = "+:" _summit_tags = ( _summit_tag_branchid, _summit_tag_kwprop, ) def summit_set_tags (msg, branch_id, project): # Add branch ID. branch_ids = get_summit_comment(msg, _summit_tag_branchid, "").split() if branch_id not in branch_ids: branch_ids.append(branch_id) set_summit_comment(msg, _summit_tag_branchid, " ".join(branch_ids)) def summit_override_auto (summit_msg, branch_msg, branch_id, is_primary): # Copy auto/source/flag comments only if this is the primary branch # for the current message. if is_primary: # Equalize flags, except the fuzzy. for fl in branch_msg.flag: if fl != "fuzzy": summit_msg.flag.add(fl) for fl in summit_msg.flag: if fl != "fuzzy" and fl not in branch_msg.flag: summit_msg.flag.remove(fl) # Equalize source references. # FIXME: Once there is a way to reliably tell the root directory # of source references, add missing and remove obsolete source # references instead. summit_msg.source = Monlist(map(Monpair, branch_msg.source)) # Split auto comments of the current summit message into # summit and non-summit tagged comments. # Also of the branch message, in case it has summit-alike comments. summit_nscmnts, summit_scmnts = split_summit_comments(summit_msg) branch_nscmnts, branch_scmnts = split_summit_comments(branch_msg) # Override auto comments only if different overally # (which needs not be, due to double fresh/old insertion) # and non-summit auto comments of the current summit message # are different to the branch message auto comments. if ( summit_msg.auto_comment != branch_msg.auto_comment and summit_nscmnts != branch_nscmnts ): summit_msg.auto_comment = Monlist(branch_msg.auto_comment) summit_msg.auto_comment.extend(summit_scmnts) def split_summit_comments (msg): non_summit_comments = [] summit_comments = [] for comment in msg.auto_comment: wlst = comment.split() if wlst and wlst[0] in _summit_tags: summit_comments.append(comment) else: non_summit_comments.append(comment) return non_summit_comments, summit_comments def summit_merge_single (branch_id, catalog_name, catalog_subdir, catalog_path, template_path, wrapping, fuzzy_merging, project, options, update_progress): update_progress() # Gather the summit template in summit-over-dynamic-templates mode. if project.templates_dynamic and branch_id == SUMMIT_ID: summit_gather_single(catalog_name, project.tproject, project.toptions, update_progress=update_progress) # FIXME: Portable construction of temporary file. tmp_path = os.path.join("/tmp", ( os.path.basename(catalog_path) + "~merged-%d" % os.getpid())) # Whether to create pristine catalog from template. vivified = catalog_path in project.add_on_merge # Skip calling msgmerge if template creation dates exist and are equal. do_msgmerge = True if not vivified and not project.templates_dynamic and not options.force: hdr = Catalog(catalog_path, monitored=False, headonly=True).header thdr = Catalog(template_path, monitored=False, headonly=True).header pcd = hdr.get_field_value("POT-Creation-Date") tpcd = thdr.get_field_value("POT-Creation-Date") do_msgmerge = (not pcd or not tpcd or pcd != tpcd) header_prop_fields = project.header_propagate_fields # Should merged catalog be opened, and in what mode? do_open = False headonly = False monitored = False otherwrap = set(wrapping).difference(["basic"]) if otherwrap or project.hook_on_merge_msg or project.hook_on_merge_cat: do_open = True elif header_prop_fields or project.hook_on_merge_head or vivified: do_open = True headonly = True if ( header_prop_fields or vivified or project.hook_on_merge_head or project.hook_on_merge_msg or project.hook_on_merge_cat ): monitored = True # Should template catalog be opened too? do_open_template = False if header_prop_fields or vivified: do_open_template = True cat = None if do_msgmerge: # Create the temporary merged catalog. minasfz, refuzzy = 0.0, False cmppaths, fuzzex, minwnex = [], False, 0 if branch_id == SUMMIT_ID: minasfz = project.merge_min_adjsim_fuzzy refuzzy = project.merge_rebase_fuzzy if project.compendium_on_merge: cmppaths.append(project.compendium_on_merge) fuzzex = project.compendium_fuzzy_exact minwnex = project.compendium_min_words_exact catalog_path_mod = catalog_path if vivified: if cmppaths: catalog_path_mod = "/dev/null" else: catalog_path_mod = tmp_path shutil.copyfile(template_path, tmp_path) getcat = do_open and not headonly ignpotdate = project.templates_dynamic cat = merge_pofile(catalog_path_mod, template_path, outpath=tmp_path, wrapping=wrapping, fuzzymatch=fuzzy_merging, minasfz=minasfz, refuzzy=refuzzy, cmppaths=cmppaths, fuzzex=fuzzex, minwnex=minwnex, getcat=getcat, monitored=monitored, ignpotdate=ignpotdate, quiet=True, abort=False) if cat is None: warning(_("@info", "Catalog '%(file1)s' not merged with " "template '%(file2)s' due to errors on merging.", file1=catalog_path_mod, file2=template_path)) return elif not getcat: # Catalog not requested, so the return value is True # indicating that the merge succedded. cat = None else: # Copy current to temporary catalog, to be processed by hooks, etc. shutil.copyfile(catalog_path, tmp_path) # Save good time by opening the merged catalog only if necessary, # and only as much as necessary. # Open catalogs as necessary. if do_open: update_progress() if cat is None: cat = Catalog(tmp_path, monitored=monitored, wrapping=wrapping, headonly=headonly) if do_open_template: tcat = Catalog(template_path, monitored=False, headonly=True) # Initialize header if the catalog has been vivified from template. if vivified: hdr = cat.header hdr.title = Monlist() hdr.copyright = u"" hdr.license = u"" hdr.author = Monlist() hdr.comment = Monlist() # Get the project ID from template; # if it gives default value, use catalog name instead. projid = tcat.header.get_field_value("Project-Id-Version") if not projid or "PACKAGE" in projid: projid = catalog_name hdr.set_field(u"Project-Id-Version", unicode(projid)) rdate = time.strftime("%Y-%m-%d %H:%M%z") hdr.set_field(u"PO-Revision-Date", unicode(rdate)) hdr.set_field(u"Last-Translator", unicode(project.vivify_w_translator)) hdr.set_field(u"Language-Team", unicode(project.vivify_w_langteam)) if project.vivify_w_language: hdr.set_field(u"Language", unicode(project.vivify_w_language), after="Language-Team", reorder=True) hdr.set_field(u"Content-Type", u"text/plain; charset=%s" % project.vivify_w_charset) hdr.set_field(u"Content-Transfer-Encoding", u"8bit") if project.vivify_w_plurals: hdr.set_field(u"Plural-Forms", unicode(project.vivify_w_plurals)) else: hdr.remove_field(u"Plural-Forms") # Propagate requested header fields. if header_prop_fields: # Preserve order of the fields when collecting. fields = [] for field in cat.header.field: if field[0] in header_prop_fields: fields.append(field) tfields = [] for tfield in tcat.header.field: if tfield[0] in header_prop_fields: tfields.append(tfield) # Replace the field sequence if not equal to that of the template. if fields != tfields: for field in fields: cat.header.field.remove(field) for tfield in tfields: cat.header.field.append(tfield) # Set original instead of temporary file path -- hooks may expect it. if cat is not None: cat.filename = catalog_path # Execute header hooks. if project.hook_on_merge_head: exec_hook_head(branch_id, catalog_name, catalog_subdir, cat.header, cat, project.hook_on_merge_head) # Execute message hooks. if project.hook_on_merge_msg: for msg in cat: exec_hook_msg(branch_id, catalog_name, catalog_subdir, msg, cat, project.hook_on_merge_msg) # Execute catalog hooks. if project.hook_on_merge_cat: exec_hook_cat(branch_id, catalog_name, catalog_subdir, cat, project.hook_on_merge_cat) # Synchronize merged catalog if it has been opened. if cat is not None: cat.filename = tmp_path # not to overwrite original file cat.sync(force=otherwrap) # Execute file hooks. if project.hook_on_merge_file: cat_name = os.path.basename(tmp_path) cat_name = cat_name[:cat_name.rfind(".po")] exec_hook_file(branch_id, cat_name, catalog_subdir, tmp_path, project.hook_on_merge_file) # If there is any difference between merged and old catalog. if vivified or not filecmp.cmp(catalog_path, tmp_path): # Assert correctness of the merged catalog and move over the old. assert_system("msgfmt -c -o/dev/null %s " % tmp_path) added = False if vivified: added = True mkdirpath(os.path.dirname(catalog_path)) shutil.move(tmp_path, catalog_path) # Add to version control if not already added. vcs = project.summit_vcs if SUMMIT_ID else project.branches_vcs if ( vcs and ( branch_id == SUMMIT_ID or not project.bdict[branch_id].skip_version_control) and not vcs.is_versioned(catalog_path) ): if not vcs.add(catalog_path): warning(_("@info", "Cannot add '%(file)s' to version control.", file=catalog_path)) if options.verbose: if added: actype = _("@item:intext action performed on a catalog", "merged-added") report(".+ (%s) %s" % (actype, catalog_path)) else: actype = _("@item:intext action performed on a catalog", "merged") report(". (%s) %s" % (actype, catalog_path)) elif not options.quiet: if added: report(".+ %s" % catalog_path) else: report(". %s" % catalog_path) # Remove the temporary merged catalog. if os.path.exists(tmp_path): os.remove(tmp_path) # Put header fields in canonical form, for equality checking. # Returns ordered list of (field name, field value). def normhf (fields, excluded=[]): nfs = [] for fnam, fval in fields: if fnam not in excluded: nfs.append((fnam, fval)) nfs.sort() return nfs # Remove all translator-related elements from the message. def clear_msg (msg): msg.unfuzzy() msg.msgstr[:] = [u""] * len(msg.msgstr) msg.manual_comment[:] = [] return msg if __name__ == '__main__': exit_on_exception(main)