Index: branches/work/doc/wiki2docbook.py =================================================================== --- branches/work/doc/wiki2docbook.py (revision 1554863) +++ branches/work/doc/wiki2docbook.py (revision 1554864) @@ -1,1754 +1,1768 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- import re, sys, os, getopt, subprocess headinglevels=['chapter', 'sect1', 'sect2', 'sect3', 'sect4', 'sect5'] #toplevel=1 #sect1 toplevel=0 #chapter userbase_content_marker='\n\n' def usage(): print '\nUsage: python %s [OPTION] path/to/userbasepagedumpfile' %os.path.basename(sys.argv[0]) print ' generates docbook output file with extension *.new.docbook' print 'Options: -h, --help : usage' print ' -t, --toplevel : toplevel heading 0=chapter 1=sect1' print ' -c, --check : check heading levels and print them to stout' print ' -n, --noheaderfooter : no header/footer from template' print ' -r file, --replace file : *.docbook file with kde docbook header + footer' print ' and body from a previous userbase page dump to replace it' print ' if no replace file was found a default simple kde docbook header + footer is used' print ' -s, --skip_download : skip download Userbase images' print ' -l, --locale=locale : choose the locale of the docbook' print 'Default : try to get the userbase data and generate a docbook with a simple header and footer, download images, English' sys.exit(2) try: opts, args = getopt.getopt(sys.argv[1:], "ht:r:cnsl:", ["help", "toplevel=", 'replace=', 'check', 'noheaderfooter', 'skip_download', 'locale']) except getopt.GetoptError: usage() # print help information and exit replacefile='' checklevels=False noheaderfooter=False nodownload=False locale='en' for o,a in opts: if o in ("-h", "--help"): usage() if o in ("-t", "--toplevel"): toplevel=int(a) if o in ("-r", "--replace"): replacefile=a if o in ("-c", "--check"): checklevels=True if o in ("-n", "--noheaderfooter"): noheaderfooter=True if o in ("-s", "--skip_download"): nodownload=True if o in ("-l", "--locale"): locale=a if len(args) != 1: usage() inputfile=args[0] if not os.path.isfile(inputfile): sys.exit('input file %s not found' %inputfile) locales={'aa': 'Afar', 'ab': 'Abkhazian', 'ae': 'Avestan', 'af': 'Afrikaans', 'ak': 'Akan', 'am': 'Amharic', 'an': 'Aragonese', 'ar': 'Arabic', 'as': 'Assamese', 'av': 'Avaric', 'ay': 'Aymara', 'az': 'Azerbaijani', 'ba': 'Bashkir', 'be': 'Belarusian', 'bg': 'Bulgarian', 'bh': 'Bihari', 'bi': 'Bislama', 'bm': 'Bambara', 'bn': 'Bengali', 'bo': 'Tibetan', 'br': 'Breton', 'bs': 'Bosnian', 'ca': 'Catalan', 'ce': 'Chechen', 'ch': 'Chamorro', 'co': 'Corsican', 'cr': 'Cree', 'cs': 'Czech', 'cv': 'Chuvash', 'cy': 'Welsh', 'da': 'Danish', 'de': 'German', 'dz': 'Dzongkha', 'ee': 'Ewe', 'el': 'Greek', 'en': 'English', 'eo': 'Esperanto', 'es': 'Spanish', 'et': 'Estonian', 'eu': 'Basque', 'fa': 'Persian', 'ff': 'Fulah', 'fi': 'Finnish', 'fj': 'Fijian', 'fo': 'Faroese', 'fr': 'French', 'fy': 'Frisian', 'ga': 'Irish', 'gd': 'Gaelic', 'gl': 'Galician', 'gn': 'Guarani', 'gu': 'Gujarati', 'gv': 'Manx', 'ha': 'Hausa', 'he': 'Hebrew', 'hi': 'Hindi', 'ho': 'Hiri Motu', 'hr': 'Croatian', 'ht': 'Haitian', 'hu': 'Hungarian', 'hy': 'Armenian', 'hz': 'Herero', 'ia': 'Interlingua', 'id': 'Indonesian', 'ig': 'Igbo', 'ik': 'Inupiaq', 'io': 'Ido', 'is': 'Icelandic', 'it': 'Italian', 'iu': 'Inuktitut', 'ja': 'Japanese', 'jv': 'Javanese', 'ka': 'Georgian', 'kg': 'Kongo', 'kk': 'Kazakh', 'kl': 'Kalaallisut', 'km': 'Khmer', 'kn': 'Kannada', 'ko': 'Korean', 'kr': 'Kanuri', 'ks': 'Kashmiri', 'ku': 'Kurdish', 'kv': 'Komi', 'kw': 'Cornish', 'ky': 'Kyrgyz', 'la': 'Latin', 'lb': 'Luxembourgish', 'lg': 'Ganda', 'li': 'Limburgan', 'ln': 'Lingala', 'lo': 'Lao', 'lt': 'Lithuanian', 'lu': 'Luba-Katanga', 'lv': 'Latvian', 'mg': 'Malagasy', 'mh': 'Marshallese', 'mi': 'Maori', 'mk': 'Macedonian', 'ml': 'Malayalam', 'mn': 'Mongolian', 'mr': 'Marathi', 'ms': 'Malay', 'mt': 'Maltese', 'my': 'Burmese', 'na': 'Nauru', 'nb': 'Norwegian-Bokmal', 'nd': 'North Ndebele', 'ne': 'Nepali', 'ng': 'Ndonga', 'nl': 'Dutch', 'nn': 'Norwegian-Nynorsk', 'no': 'Norwegian', 'nr': 'South-Ndebele', 'nv': 'Navajo', 'ny': 'Nyanja', 'oc': 'Occitan', 'oj': 'Ojibwa', 'om': 'Oromo', 'or': 'Oriya', 'os': 'Ossetian', 'pa': 'Panjabi', 'pi': 'Pali', 'pl': 'Polish', 'ps': 'Pushto', 'pt': 'Portuguese', 'qu': 'Quechua', 'rn': 'Rundi', 'ro': 'Romanian', 'ru': 'Russian', 'rw': 'Kinyarwanda', 'sa': 'Sanskrit', 'sc': 'Sardinian', 'sd': 'Sindhi', 'se': 'Northern Sami', 'sg': 'Sango', 'si': 'Sinhalese', 'sk': 'Slovak', 'sl': 'Slovenian', 'sm': 'Samoan', 'sn': 'Shona', 'so': 'Somali', 'sq': 'Albanian', 'sr': 'Serbian', 'ss': 'Swati', 'st': 'Southern Sotho', 'su': 'Sundanese', 'sv': 'Swedish', 'sw': 'Swahili', 'ta': 'Tamil', 'te': 'Telugu', 'tg': 'Tajik', 'th': 'Thai', 'ti': 'Tigrinya', 'tk': 'Turkmen', 'tl': 'Tagalog', 'tn': 'Tswana', 'to': 'Tonga', 'tr': 'Turkish', 'ts': 'Tsonga', 'tt': 'Tatar', 'tw': 'Twi', 'ty': 'Tahitian', 'uk': 'Ukrainian', 'ur': 'Urdu', 'uz': 'Uzbek', 've': 'Venda', 'vi': 'Vietnamese', 'vo': 'Volapük', 'wa': 'Walloon', 'wo': 'Wolof', 'xh': 'Xhosa', 'yi': 'Yiddish', 'yo': 'Yoruba', 'zh': 'Chinese', 'zu': 'Zulu'} # The general application names that should not be treated as the section links in any cases genwords=['dolphin', 'krunner', 'kickoff'] docbookheader,docbookfooter='','' if replacefile=='': print 'docbook file has no previous userbase page dump, generating a docbook with simple template header + footer' else: if os.path.isfile(replacefile): try: text=open(replacefile,"r").read() docbookheader,userpagebody,docbookfooter=text.split(userbase_content_marker) except: print 'error reading file to replace, generating a docbook with simple template header + footer' replacefile='' else: print 'docbook file with previous userbase page dump not found, generating a docbook with simple template header + footer' replacefile='' # replace the duplicates duplicates=0 sectionids=[] def sectionheader(text,level,beginmarkup): global sectionids global duplicates #output of headinglevels to console if checklevels: print text.rstrip('\n') levelstr='='*level remuster='%s.*?%s' %(levelstr,levelstr) such=re.compile(remuster,re.DOTALL) for treffer in such.findall(text): sectiontitle=treffer.replace(levelstr,'') sectiontitle=sectiontitle.strip() sectionid=sectiontitle.replace("'",'') sectionid=sectionid.replace("?",'') sectionid=sectionid.replace("!",'') sectionid=sectionid.replace(",",'') sectionid=sectionid.replace("/",'-') sectionid=sectionid.replace(' ','-') sectionid=sectionid.replace(':','-') sectionid=sectionid.replace(';','-') sectionid=sectionid.replace('.','-') sectionid=sectionid.replace('>','-') sectionid=sectionid.replace('>','') sectionid=sectionid.replace('&','') sectionid=sectionid.replace('&','') # Section ids should not start from a digit sectionid=sectionid.replace('1-','one-') sectionid=sectionid.replace('2-','two-') sectionid=sectionid.replace('3-','three-') sectionid=sectionid.replace('4-','four-') sectionid=sectionid.replace('5-','five-') sectionid=sectionid.replace('6-','six-') sectionid=sectionid.replace('7-','seven-') sectionid=sectionid.replace('8-','eight-') sectionid=sectionid.replace('9-','nine-') sectionid=sectionid.replace('0-','zero-') sectionid=sectionid.replace('','').replace('','') sectionid=sectionid.replace('"','') sectionid=sectionid.replace(' ','') sectionid=sectionid.replace('(','').replace(')','') sectionid=sectionid.lower() if sectionid in sectionids: sectionid+=`duplicates` # work links to sectionid+number still? duplicates+=1 sectionids.append(sectionid) text=text.replace(treffer, '%s id="%s">%s' %(beginmarkup,sectionid,sectiontitle)) return text text=open(inputfile,"r").read() remuster='.*?' such=re.compile(remuster,re.DOTALL) if len(such.findall(text))<1: print 'missing userbase timestamp' outtext=userbase_content_marker else: userbase_timestamp='\n' %(sorted(such.findall(text), key=str.lower, reverse=True)[0]) remuster='.*?T' such=re.compile(remuster,re.DOTALL) releasedate=such.findall(text)[0].replace('','').replace('T','') outtext='%s%s' %(userbase_timestamp, userbase_content_marker) if userbase_timestamp in docbookheader: # it is the same page dump version, remove it docbookheader=docbookheader.replace(userbase_timestamp,'') userbase_header='' userbase_header = text.partition('.*?',userbase_header)[0].replace('','').replace('','').partition('/')[0] if replacefile =='': authorname='This documentation was converted from the &kde; UserBase '\ +'%s page.' %(pagename,pagename) docbookheader='\n'\ +'\n \n]>\n\n' % locales.get(locale, 'English') docbookheader+='\nThe %s Handbook\n\n\n\n\ %s\n\n\n\ \n\n\ \n&FDLNotice;\n%s\n\ Applications 18.04\n\n%s\n\ \n\ \nKDE\n\n\n' %(pagename,authorname,releasedate,abstracttext) docbookfooter='\nCredits and License\n\ \nDocumentation Copyright see the UserBase \n\ %s page history\n\ \n&underFDL;\n\n&documentation.index;\n\n' %(pagename,pagename) if releasedate=='':print 'releasedate not found' if userbase_header=='':print 'userbase_header not found' if abstracttext=='':print 'abstracttext not found' #if abstractscreenshotlink=='':print 'abstractscreenshotlink not found' if pagename=='':print 'pagename not found' # lighter versions for docbook UserBase manuals with "Credits and License" section docbookfooterlite='\ \n&underFDL;\n\n&documentation.index;\n\n' docbookfooterlitesection='\ \n&underFDL;\n\n\n&documentation.index;\n\n' #<nowiki>== Subpages of {{FULLPAGENAME}}== #{{Special:PrefixIndex/{{FULLPAGENAME}}/}}</nowiki> nowikimarkup_id,nowikimarkup_idtext=[],[] nowikimarkup_id_marker='nowikimarkup_id_marker__' nowikimarkup_id_number=0 remuster='<nowiki>.*?</nowiki>' #<pre> </pre> such=re.compile(remuster,re.DOTALL) for treffer in such.findall(text): if '\n' in treffer: newmarkup='screen' else: newmarkup='command'#'literal' repl=treffer.replace('<nowiki>','<%s>'%newmarkup) repl=repl.replace('</nowiki>',''%newmarkup) nowikimarkup_id_marker_text='nowikimarkup_id_marker__%dx' %nowikimarkup_id_number nowikimarkup_id.append(nowikimarkup_id_marker_text) nowikimarkup_idtext.append(repl) nowikimarkup_id_number+=1 text=text.replace(treffer,nowikimarkup_id_marker_text)#'%s'%repl) #what to do with this? premarkup_id,premarkup_idtext=[],[] premarkup_id_marker='premarkup_id_marker__' premarkup_id_number=0 remuster='<pre>.*?</pre>' #<pre> </pre> such=re.compile(remuster,re.DOTALL) for treffer in such.findall(text): if '\n' in treffer: newmarkup='screen' else: newmarkup='command'#'literal' repl=treffer.replace('<pre>','<%s>'%newmarkup) repl=repl.replace('</pre>',''%newmarkup) premarkup_id_marker_text='premarkup_id_marker__%dx' %premarkup_id_number premarkup_id.append(premarkup_id_marker_text) premarkup_idtext.append(repl) premarkup_id_number+=1 text=text.replace(treffer,premarkup_id_marker_text)#'%s'%repl) #what to do with this? #strip off some header footer stuff WRONG!!!! remuster='\{\|style.*?\|\}' such=re.compile(remuster,re.DOTALL) for treffer in such.findall(text): if 'nowikimarkup_id_marker__' in treffer: such=re.compile('nowikimarkup_id_marker__[0-9]*?x',re.DOTALL) suchtext=such.findall(treffer)[0] text=text.replace(treffer,'nowikimarkup_id_marker__%dx' %nowikimarkup_id.index(suchtext)) elif 'premarkup_id_marker__' in treffer: such=re.compile('premarkup_id_marker__[0-9]*?x',re.DOTALL) suchtext=such.findall(treffer)[0] text=text.replace(treffer,'premarkup_id_marker__%dx' %premarkup_idtext.index(suchtext)) else: text=text.replace(treffer,'') #strip off {{Construction}} remuster='\{\{Construction\}\}' such=re.compile(remuster,re.DOTALL) for treffer in such.findall(text): text=text.replace(treffer,'') #strip off {{KDE3}} + {{KDE4}} remuster='\{\{KDE[34]\}\}' such=re.compile(remuster,re.DOTALL) for treffer in such.findall(text): text=text.replace(treffer,'') #strip off {{Being_Edited}} remuster='\{\{Being_Edited\}\}' such=re.compile(remuster,re.DOTALL) for treffer in such.findall(text): text=text.replace(treffer,'') #strip off {{Community-app-footnote}} remuster='\{\{Community-app-footnote\}\}' such=re.compile(remuster,re.DOTALL) for treffer in such.findall(text): text=text.replace(treffer,'') #strip off {{Community-app}} remuster='\{\{Community-app\}\}' such=re.compile(remuster,re.DOTALL) for treffer in such.findall(text): text=text.replace(treffer,'') #strip off {{FULLPAGENAME}} remuster='\{\{FULLPAGENAME\}\}' such=re.compile(remuster,re.DOTALL) for treffer in such.findall(text): #print 1 ,treffer, treffer.replace(treffer,'') text=text.replace(treffer,'') #strip off {{Prevnext2}} parts = text.split("{{Prevnext") for i in range(len(parts)): if parts[i].startswith('2'): parts[i]=parts[i].partition('\n}}')[2] text = "".join(parts) +#strip off {{Plan/Prevnext}} +parts = text.split("{{Plan/Prevnex") +for i in range(len(parts)): + if parts[i].startswith('t'): + parts[i]=parts[i].partition('}}')[2] +text = "".join(parts) + +#no fancy tables in Plan +text = text.replace(" border="1" cellpadding="5" cellspacing="0"", "") +text = text.replace("| style="background:#efefef;" |", "!") +text = text.replace("rowspan="2"| ", "") + #{{Special:PrefixIndex/{{FULLPAGENAME}}/}} etc ??? # FULLPAGENAME is already stripped off remuster='\{\{Special:PrefixIndex//\}\}' such=re.compile(remuster,re.DOTALL) for treffer in such.findall(text): text=text.replace(treffer,'') #strip off interpage clue parts = text.split("") for i in range(len(parts)): parts[i]=parts[i].rpartition('">')[2] text = "\n".join(parts) # Fix for ">>" buttons text=text.replace('<menuchoice>>></menuchoice>','>>') #Save for the parsing text=text.replace('<menuchoice>','beginguiitem') #Save for the parsing text=text.replace('</menuchoice>','endguiitem') # Fix for keycap text=text.replace('<keycap>','beginkeycap') # Fix for keycap text=text.replace('</keycap>','endkeycap') # Fix for s text=text.replace('<s>','') # Fix for s text=text.replace('</s>','') # Fix for tt text=text.replace('<tt>','') # Fix for tt text=text.replace('</tt>','') # Fix for blockquote text=text.replace('<blockquote>','') # Fix for blockquote text=text.replace('</blockquote>','') # Fix for ref text=text.replace('<ref>','') # Fix for tt text=text.replace('</ref>','') # Fix for tables text=text.replace('<table>','') # Fix for tables text=text.replace('</table>','
') # Fix for tables: break the columns in header text=text.replace('!!','\n!') # Fix for tables: break the columns in rows text=text.replace('||','\n|') # Fix for tables text=text.replace('<th>','') # Fix for tables text=text.replace('</th>','') # Fix for tables text=text.replace('<tr>','') # Fix for tables text=text.replace('</tr>','') # Fix for tables text=text.replace('<td>','') # Fix for code (UserBase rules for usage are uncertain text=text.replace('<code>','') # Fix for code text=text.replace('</code>','') # Fix for UserBase translation workaround text=text.replace('<!--{{-->','') text=text.replace('<!--}}-->','') #The use of is sometimes wrong, strip it off remuster='<translate><!--T:.*?-->\n' such=re.compile(remuster,re.DOTALL) for treffer in such.findall(text): text=text.replace(treffer,'') #The use of is sometimes wrong, strip it off text=text.replace('</translate>','') -# Remove articicial alignment +# Remove artificial alignment text=text.replace('<p align="center">','') text=text.replace('</p>','') #warning #{{warning|1=This is a very dangerous thing to do}} # docbook #tip #{{Tip|Some useful hint}} #info #{{info|1=This is another way to display your information}} # docbook ? # Download and parse hardcoded with {{Icon|blah-blah}}, {{Icon1|blah-blah}} and {PositionIcon|blah-blah|16px}} command_template = "wget -qO- 'http://userbase.kde.org/File:%s'|sed 's/images\.userbase\/0\/02\/Flag-red.png//g'|grep -e 'images\.userbase\/[0-9a-z]\/[0-9a-z][0-9a-z]\/'|head -n 1 | sed 's/.*images/images/;s/png.*/png/;s/jpg.*/jpg/;s/jpeg.*/jpeg/'" wget_template = "wget --output-document=%s http://userbase.kde.org/%s --no-verbose > /dev/null 2>&1" def have_application(application): def is_executable(our_path): return os.access(our_path, os.X_OK) and os.path.exists(our_path) for path in os.environ["PATH"].split(os.pathsep): executable = os.path.join(path, application) if is_executable(executable): return True return False def convert_icon(icon_name, icon_width, icon_height): if have_convert: print "Converting %s (good for PDF)..." %icon_name file_name = icon_name.strip('.png') convert_96ppi_command = "convert -units PixelsPerInch %s -size %sx%s -density 96 %s-new.png && mv -f %s-new.png %s" %(icon_name, icon_width, icon_height, file_name, file_name, icon_name) sts = os.system(convert_96ppi_command) else: print "Cannot find convert from ImageMagick in your system. Your image (%s) will not be optimized for PDF conversion." %icon_name # Test if we have wget and convert in $PATH have_wget = have_application("wget") have_convert = have_application("convert") have_advpng = have_application("advpng") remuster='\{\{.*?\}\}' such=re.compile(remuster,re.DOTALL) # Replace Icons ans Smileys for treffer in such.findall(text): warningnote=treffer.lstrip('{').rstrip('}') repl='' warningnotesplit=warningnote.partition('|') warningnote,warningnotetext=warningnotesplit[0],warningnotesplit[2] if warningnote.lower()=='smiley': if not nodownload: wget_command = "wget -nc http://userbase.kde.org/images.userbase/7/7c/Face-smile.png" proc = subprocess.Popen(wget_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) proc.communicate() convert_icon('Face-smile.png', 16, 16) repl=' ' elif warningnote.lower()=='plus': if not nodownload: wget_command = "wget -nc http://userbase.kde.org/images.userbase/8/81/Icon-list-add.png" proc = subprocess.Popen(wget_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) proc.communicate() convert_icon('Icon-list-add.png', 16, 16) repl=' ' elif warningnote.lower()=='minus': if not nodownload: wget_command = "wget -nc http://userbase.kde.org/images.userbase/8/8d/Icon-list-remove.png" proc = subprocess.Popen(wget_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) proc.communicate() convert_icon('Icon-list-remove.png', 16, 16) repl=' ' - elif warningnote.lower()=='icon': + elif warningnote.lower()=='icon' or warningnote.lower()=='plan/icon': icontext = 'Icon-' + warningnotetext.strip() + '.png' if not nodownload: if have_wget: download_command = command_template %icontext proc = subprocess.Popen(download_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) image_address = proc.stdout.readline().strip('\n').replace('thumb/','') image_name=image_address.rpartition('/')[2] print 'Downloading %s...' %image_name wget_command = wget_template %(image_name,image_address) sts = os.system(wget_command) if have_convert: print "Converting %s to 96 ppi (good for PDF)..." %image_name file_name = image_name.strip('.png') convert_96ppi_command = "convert -units PixelsPerInch %s -density 96 %s-new.png && mv -f %s-new.png %s" %(image_name, file_name, file_name, image_name) sts = os.system(convert_96ppi_command) else: print "Cannot find convert from ImageMagick in your system. Your image (%s) will not be optimized for PDF conversion (96 ppi)." %image_name else: print "Cannot find wget in your system. Please install it correctly to download images." repl=' ' %icontext elif warningnote.lower()=='icon1': icontext = warningnotetext.strip().replace(' ','_') if not nodownload: if have_wget: download_command = command_template %icontext proc = subprocess.Popen(download_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) image_address = proc.stdout.readline().strip('\n').replace('thumb/','') image_name=image_address.rpartition('/')[2] print 'Downloading %s...' %image_name wget_command = wget_template %(image_name,image_address) sts = os.system(wget_command) if have_convert: print "Converting %s to 96 ppi (good for PDF)..." %image_name file_name = image_name.strip('.png') convert_96ppi_command = "convert -units PixelsPerInch %s -density 96 %s-new.png && mv -f %s-new.png %s" %(image_name, file_name, file_name, image_name) sts = os.system(convert_96ppi_command) else: print "Cannot find convert from ImageMagick in your system. Your image (%s) will not be optimized for PDF conversion (96 ppi)." %image_name else: print "Cannot find wget in your system. Please install it correctly to download images." repl=' ' %icontext elif warningnote.lower()=='positionicon': iconblock = warningnotetext.strip().partition('|')[0] iconsize = warningnotetext.strip().partition('|')[2].strip('px') icontext = iconblock + '.png' if not nodownload: if have_wget: download_command = command_template %icontext proc = subprocess.Popen(download_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) image_address = proc.stdout.readline().strip('\n').replace('thumb/','') image_name=image_address.rpartition('/')[2] # Strip dots from the filename (breaks PDF) image_name=image_name.rpartition('.')[0].replace('.','-') + '.png' print 'Downloading %s...' %image_name wget_command = wget_template %(image_name,image_address) sts = os.system(wget_command) if have_convert: print "Resizing and converting %s to 96 ppi (good for PDF)..." %image_name file_name = image_name.strip('.png') convert_96ppi_command = "convert -units PixelsPerInch %s -size %sx%s -density 96 %s-new.png && mv -f %s-new.png %s" %(image_name, iconsize, iconsize, file_name, file_name, image_name) sts = os.system(convert_96ppi_command) else: print "Cannot find convert from ImageMagick in your system. Your image (%s) will not be resized and optimized for PDF conversion (96 ppi)." %image_name else: print "Cannot find wget in your system. Please install it correctly to download images." repl=' ' %(icontext.rpartition('.')[0].replace('.','-') + '.png') elif warningnote.lower()=='exit': if not nodownload: wget_command = "wget -nc http://userbase.kde.org/images.userbase/9/9f/Icon-application-exit.png" proc = subprocess.Popen(wget_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) proc.communicate() convert_icon('Icon-application-exit.png', 16, 16) repl=' ' elif warningnote.lower()=='configure': if not nodownload: wget_command = "wget -nc http://userbase.kde.org/images.userbase/3/3e/Icon-configure.png" proc = subprocess.Popen(wget_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) proc.communicate() convert_icon('Icon-configure.png', 16, 16) repl=' ' if repl!='':text=text.replace(treffer,repl) # Now replace warnings for treffer in such.findall(text): warningnote=treffer.lstrip('{').rstrip('}') repl='' warningnotesplit=warningnote.partition('|') warningnote,warningnotetext=warningnotesplit[0],warningnotesplit[2] # print warningnote.lower() if warningnote.lower()=='warning': warningnotetext=warningnotetext.lstrip('1=') repl='%s' %warningnotetext elif warningnote.lower()=='tip': repl='%s' %warningnotetext elif warningnote.lower()=='info' or warningnote.lower()=='note': warningnotetext=warningnotetext.lstrip('1=') repl='%s' %warningnotetext elif warningnote.lower()=='remember': warningnotetext=warningnotetext.rpartition('=')[2] repl='%s' %warningnotetext elif warningnote.lower()=='input': warningnotetext=warningnotetext.lstrip('1=') if '#!/' in warningnotetext or 'syntaxhighlight lang=' in warningnotetext: repl='%s ' %warningnotetext else: repl='%s ' %warningnotetext elif warningnote.lower()=='output': warningnotetext=warningnotetext.lstrip('1=') repl='%s' %warningnotetext elif warningnote.lower()=='todo': repl='%s' %warningnotetext.strip() elif warningnote.lower()=='bug': repl='bug #%s' %(warningnotetext.strip(),warningnotetext.strip()) elif (warningnote.lower()=='#evp:youtube') or (warningnote.lower()=='#ev:youtube'): repl='YouTube video' %(warningnotetext.strip()) elif warningnote.lower()=='#ev:vimeo': repl='' %(warningnotetext.strip()) if warningnote.lower()==':kdenlive/templates/contenttodo': repl='' text=text.replace(treffer,repl) if repl!='':text=text.replace(treffer,repl) #textlines = open(inputfile,"rw").readlines() textlines = text.split('\n') level=0 initemizedlist=False initemizedlist2=False initemizedlist3=False intable=False innumberedlist=False innumberedlist2=False innumberedlist3=False inuserinput=False inoutput=False invariablelist=False minequalno=5 #fix non consecutive heading levels prevno,jumpno,deltano=1,0,0 remuster='^={1,5}' for i in range(0,len(textlines)): line=textlines[i] such=re.compile(remuster) if len(such.findall(line))>0: actualno=line.count('=',0,5) minequalno=min(minequalno,actualno) if actualno-prevno>1: deltano=actualno-prevno-1 jumpno=prevno prevno=actualno if actualno<=jumpno: jumpno=0 deltano=0 #print line.rstrip('\n'),actualno,jumpno,deltano,line.replace('='*actualno,'='*(actualno-deltano)).rstrip('\n') if deltano>0: textlines[i]=line.replace('='*actualno,'='*(actualno-deltano)) if minequalno>1: delta=minequalno-1 for i in range(0,len(textlines)): line=textlines[i] such=re.compile(remuster) if len(such.findall(line))>0: actualno=line.count('=',0,5) textlines[i]=line.replace('='*actualno,'='*(actualno-delta)) #print line,textlines[i] for line in textlines: line=line+'\n' #need that cause switch from #textlines = open(inputfile,"rw").readlines() to textlines = text.split('\n') #debugging of lines #print '>>\n%s<<'%line skip=False if '</translate>' in line or '[[Category:' in line: skip=True #strip off: <!--T:1--> remuster='<.*>' line=re.sub(remuster,'',line) if line[0:1]=='=' and line[1]!='=': if invariablelist: invariablelist=False outtext+='%s\n \n \n\n' %listtext closemarkup='' if level>4:closemarkup+='\n' %headinglevels[toplevel+4] if level>3:closemarkup+='\n' %headinglevels[toplevel+3] if level>2:closemarkup+='\n' %headinglevels[toplevel+2] if level>1:closemarkup+='\n' %headinglevels[toplevel+1] if level>0:closemarkup+='\n' %headinglevels[toplevel+0] level=1 repl=sectionheader(line,1,'<%s' %headinglevels[toplevel-1+1]) outtext+='%s%s' %(closemarkup,repl) initemizedlist=False initemizedlist2=False initemizedlist3=False innumberedlist=False innumberedlist2=False innumberedlist3=False elif line[0:2]=='==' and line[2]!='=': if invariablelist: invariablelist=False outtext+='%s\n \n \n\n' %listtext closemarkup='' if level>4:closemarkup+='\n' %headinglevels[toplevel+4] if level>3:closemarkup+='\n' %headinglevels[toplevel+3] if level>2:closemarkup+='\n' %headinglevels[toplevel+2] if level>1:closemarkup+='\n' %headinglevels[toplevel+1] level=2 repl=sectionheader(line,2,'<%s' %headinglevels[toplevel-1+2]) outtext+='%s%s' %(closemarkup,repl) initemizedlist=False initemizedlist2=False initemizedlist3=False innumberedlist=False innumberedlist2=False innumberedlist3=False elif line[0:3]=='===' and line[3]!='=': if invariablelist: invariablelist=False outtext+='%s\n \n \n\n' %listtext closemarkup='' if level>4:closemarkup+='\n' %headinglevels[toplevel+4] if level>3:closemarkup+='\n' %headinglevels[toplevel+3] if level>2:closemarkup+='\n' %headinglevels[toplevel+2] level=3 repl=sectionheader(line,3,'<%s' %headinglevels[toplevel-1+3]) outtext+='%s%s' %(closemarkup,repl) initemizedlist=False initemizedlist2=False initemizedlist3=False innumberedlist=False innumberedlist2=False innumberedlist3=False elif line[0:4]=='====' and line[4]!='=': if invariablelist: invariablelist=False outtext+='%s\n \n \n\n' %listtext closemarkup='' if level>4:closemarkup+='\n' %headinglevels[toplevel+4] if level>3:closemarkup+='\n' %headinglevels[toplevel+3] level=4 repl=sectionheader(line,4,'<%s' %headinglevels[toplevel-1+4]) outtext+='%s%s' %(closemarkup,repl) initemizedlist=False initemizedlist2=False initemizedlist3=False innumberedlist=False innumberedlist2=False innumberedlist3=False elif line[0:5]=='=====' and line[5]!='=': if invariablelist: invariablelist=False outtext+='%s\n \n \n\n' %listtext closemarkup='' if level>4:closemarkup+='\n' %headinglevels[toplevel+4] level=5 repl=sectionheader(line,5,'<%s' %headinglevels[toplevel-1+5]) outtext+='%s%s' %(closemarkup,repl) initemizedlist=False initemizedlist2=False initemizedlist3=False innumberedlist=False innumberedlist2=False innumberedlist3=False else: #level="para" if line !='\n' and level!=0 and skip==False: if (not invariablelist) and line[0:2]!=':*' and line[0:2]!=':#': # variablelist can contain itemized or ordered list line=line.lstrip(':') #strip off indenting for the moment, how translate that to docbook? if (line[0]=='*' or line[0:2]==':*') and line[0:2]!='**': #itemizedlist initemizedlist2=False if innumberedlist and line[0:2]==':*': listitemtext=line.lstrip(' :*').rstrip() outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] if initemizedlist==False: initemizedlist=True outtext+='\n' else: outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] #alternativ outtext.replace('','') later!! outtext+='%s\n\n\n\n' %listitemtext else: listitemtext=line.lstrip(' :*').rstrip() if invariablelist==False: if initemizedlist==False: initemizedlist=True outtext+='\n' else: outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] #alternativ outtext.replace('','') later!! outtext+='%s\n\n' %listitemtext else: if initemizedlist==False: initemizedlist=True listtext+='\n\n' else: listtext=listtext.rstrip('\n') listtext=listtext[:-len('')] listtext+='%s\n\n' %listitemtext elif (line[0:2]=='**' or line[0:3]==':**') and line[0:3]!='***': #itemizedlist (sublevel) initemizedlist3=False listitemtext=line.lstrip(' :*').rstrip() if initemizedlist2==False: initemizedlist2=True outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext+='\n' else: outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext+='%s\n\n\n' %listitemtext elif line[0:3]=='***' or line[0:4]==':***': #itemizedlist (subsublevel) listitemtext=line.lstrip(' :*').rstrip() if initemizedlist3==False: initemizedlist3=True outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext+='\n\n' else: outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext+='%s\n\n\n\n\n' %listitemtext elif (line[0]=='#' or line[0:2]==':#') and ((line[0:2]!='#!' and line[0:2]!='##') and line[0:2]!='#*'): #orderedlist of first level, not a script innumberedlist2=False innumberedlist3=False initemizedlist=False listitemtext=line.lstrip(' :#').rstrip() if initemizedlist and line[0:2]==':#': listitemtext=line.lstrip(' :#').rstrip() outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] if innumberedlist==False: innumberedlist=True outtext+='\n' else: if outtext.endswith('\n'): outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext+='%s\n\n\n\n' %listitemtext else: listitemtext=line.lstrip(' :#').rstrip() if innumberedlist==False: innumberedlist=True outtext+='\n' else: outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext+='%s\n\n' %listitemtext elif (line[0:2]=='#*' or line[0:3]==':#*') and (line[0:3]!='#**' or line[0:3]!='#*#'): #itemizedlist (sublevel) listitemtext=line.lstrip(' :#*').rstrip() if outtext.endswith('\n'): outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] if initemizedlist==False: initemizedlist=True outtext+='\n' else: if outtext.endswith('\n'): outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext+='%s\n\n\n\n' %listitemtext elif (line[0:2]=='##' or line[0:3]==':##') and line[0:3]!='###': #orderedlist (sublevel) innumberedlist3=False listitemtext=line.lstrip(' :#').rstrip() if innumberedlist2==False: innumberedlist2=True outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext+='\n' else: outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext+='%s\n\n\n' %listitemtext elif line[0:3]=='###' or line[0:4]==':###': #orderedlist (subsublevel) listitemtext=line.lstrip(' :#').rstrip() if innumberedlist3==False: innumberedlist3=True outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext+='\n\n' else: outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] outtext+='%s\n\n\n\n\n' %listitemtext elif line[0]==';' or line[0:2]==':;': # begin variablelist if invariablelist: listtext+='\n \n \n %s\n ' %line[1:].lstrip(';').strip() else: invariablelist=True listtext='\n %s\n ' %line[1:].strip() elif line[0]==':': # varlist item listtext+='\n %s' %line[1:].strip() elif line[0:2]=='{|' or line.strip()[0:7]=='': #begin table tabletext='\n\n \n \n \n' colsnumber=0 rownumber=0 hcolsnumber=0 hascaption=False hasheader=False intable=True inheader=False headertext='' #print line elif line[0:2]=='|+': hascaption=True captiontext=line[2:].strip() tabletext='\n
\n %s\n \n \n \n' elif line[0:1]=='!' or line.strip()[0:4]=='\n \n
': #in table in header inheader=True rownumber=1 if intable: hasheader=True if hascaption: tabletext='\n\n %s\n \n \n \n%s \n \n \n' else: tabletext='\n\n \n \n \n%s \n \n \n' headertext+=' %s\n' %line[1:].lstrip().lstrip('': #in table new row if intable: if rownumber>0: tabletext+=' \n \n \n' inheader=False rownumber+=1 colsnumber=0 elif line[0:2]=='|}' or line.strip()[0:8]=='
').rstrip(' \n') hcolsnumber+=1 elif line[0:2]=='| ' or line.strip()[0:4]=='': #in table in row if intable: if colsnumber==0: tabletext+=' \n %s' %line[1:].lstrip().replace('','').rstrip(' \n') else: tabletext+='\n \n \n %s' %line[1:].lstrip().replace('','').rstrip(' \n') colsnumber+=1 elif line[0:2]=='|-': #in table new row if intable: if inheader==False: tabletext+='\n \n \n' inheader=False rownumber+=1 tabletext+=' \n' colsnumber=0 elif line.strip()[0:4]=='
': #end table #print line if intable: + tabletext=tabletext.replace("% ", "%% ") + tabletext=tabletext.replace("%)", "%%)") if hasheader and hascaption: tabletext=tabletext %(captiontext,hcolsnumber,headertext) elif hasheader: tabletext=tabletext %(hcolsnumber,headertext) else: tabletext=tabletext %colsnumber if hascaption: outtext+="%s\n \n
\n\n" %tabletext else: outtext+="%s\n \n \n \n\n\n" %tabletext intable=False elif (line.find('')>-1 and line.find('')==-1) or (line.find('')>-1 and line.find('')==-1): inuserinput=True outtext+='%s' %line elif (line.find('')==-1 and line.find('')>-1) or (line.find('')==-1 and line.find('')>-1): inuserinput=False outtext+='%s\n' %line elif line.find('')>-1 and line.find('')==-1: inoutput=True outtext+='%s' %line elif line.find('')==-1 and line.find('')>-1: inoutput=False outtext+='%s\n' %line elif intable and line.strip()!='': tabletext+='%s\n\n' %line.rstrip() else: if initemizedlist==True: initemizedlist=False if initemizedlist2==True: initemizedlist2=False if initemizedlist3==True: initemizedlist3=False if innumberedlist==True: innumberedlist=False if innumberedlist2==True: innumberedlist2=False if innumberedlist3==True: innumberedlist3=False if line.strip()!='': if invariablelist: invariablelist=False outtext+='%s\n
\n
\n
\n' %listtext if (inuserinput==False) and (inoutput==False): outtext+='%s\n\n' %line.rstrip() else: outtext+='%s\n' %line.replace('"','"').rstrip() # close nesting sections for i in range(level,toplevel,-1): outtext+='\n' %headinglevels[i-1] #''''' bold+italic -> replaceable remuster="'''''.*?\'''''" such=re.compile(remuster,re.DOTALL) for i in such.findall(outtext): repl=i.replace("'''''","") repl='%s' %repl outtext=outtext.replace(i,repl) # Outdated? outtext=outtext.replace('[[Image:Face-smile.png|16px]]',':)') outtext=outtext.replace('[[Image:Face-smile.png|11px]]','\n\n\n\n\nSmiley\n\n') outtext=outtext.replace('[[Image:Smile_wink.png|11px]]',';)') #'''[[#Vocabulary Practice|Practice]]''' # guilabel-link-guilabel not allowed in docbook # move ''' inside for document internal links remuster="'''\[\[#.*?\]]'''" such=re.compile(remuster,re.DOTALL) for guilinkgui in such.findall(outtext): repl=guilinkgui.replace("'''[[","[[") repl=repl.replace("|","|'''") repl=repl.replace("]]'''","''']]") outtext=outtext.replace(guilinkgui,repl) remuster="''\[\[#.*?\]]''" such=re.compile(remuster,re.DOTALL) for guilinkgui in such.findall(outtext): repl=guilinkgui.replace("''[[","[[") repl=repl.replace("|","|''") repl=repl.replace("]]''","'']]") outtext=outtext.replace(guilinkgui,repl) #strip off all other occurrencies outtext=outtext.replace("'''[[","[[") outtext=outtext.replace("]]'''","]]") outtext=outtext.replace("''[[","[[") outtext=outtext.replace("]]''","]]") #"context menu" #context menu remuster='".*?"' such=re.compile(remuster)#,re.DOTALL) for quot in such.findall(outtext): repl=quot.replace('"','') repl='%s' %repl.strip() outtext=outtext.replace(quot,repl) #&nbsp; ->   make it a valid entity outtext=outtext.replace('&nbsp;',' ') #etc. -> &etc; make an entity outtext=outtext.replace('etc.','&etc;') #i.e. -> &ie; make it an entity outtext=outtext.replace('i.e.','&ie;') #e.g. -> ⪚ make it an entity outtext=outtext.replace('e.g.','⪚') #-&gt; -> -> outtext=outtext.replace('-&gt;','->') # in title not allowed, strip it off remuster='.*?' such=re.compile(remuster)#,re.DOTALL) for title in such.findall(outtext): repl=title.replace('','').replace('','') outtext=outtext.replace(title,repl) menuls=['', '', '', '', '', '', '', '', '', ''] menule=['', '', '', '', '', '', '', '', '', ''] #View-&gt;Lessons #ViewLessons textparts = outtext.split('beginguiitem') for i in range(len(textparts)): if i>0: subparts = textparts[i].split('endguiitem') if subparts[0].find('->')<0: textparts[i]='%s%s' %(subparts[0].strip(),subparts[1]) else: mitems = subparts[0].split('->') subparts[0] = '' k = len(mitems) for j in range(k): if k == 2 and j > 0: subparts[0] += '%s%s%s' %(menuls[2],mitems[j].strip(),menule[2]) else: subparts[0] += '%s%s%s' %(menuls[j],mitems[j].strip(),menule[j]) subparts[0] += '' textparts[i] = '%s%s' %(subparts[0],subparts[1]) outtext = "".join(textparts) #Ctrl+Shift+F #&Ctrl;ShiftF keyentities=['Alt', 'Backspace', 'Ctrl', 'Enter', 'Esc', 'Shift', 'Tab'] textparts = outtext.split('beginkeycap') for i in range(len(textparts)): if i>0: subparts = textparts[i].split('endkeycap') if subparts[0].find('+')<0 or subparts[0]=='+': if subparts[0].strip() in keyentities: textparts[i]='&%s;%s' %(subparts[0].strip(),subparts[1]) else: textparts[i]='%s%s' %(subparts[0].strip(),subparts[1]) else: subparts[0]=subparts[0].replace('+ +','+ Plusss') mitems = subparts[0].split('+') subparts[0] = '' k = len(mitems) for j in range(k): if mitems[j].strip() in keyentities: subparts[0] += '%s%s%s' %('&',mitems[j].strip(),';') elif mitems[j].strip()=='Plusss': subparts[0] += '+' else: subparts[0] += '%s%s%s' %('',mitems[j].strip(),'') subparts[0] += '' textparts[i] = '%s%s' %(subparts[0],subparts[1]) outtext = "".join(textparts) #'''File→Properties''' (antique, can be found only in Parley) #FileProperties remuster="'{2,3}.*?→.*?'{2,3}" such=re.compile(remuster)#,re.DOTALL) for guimenu in such.findall(outtext): repl=guimenu.strip("'") repl=repl.split('→') repl='%s%s' %(repl[0].strip(),repl[1].strip()) outtext=outtext.replace(guimenu,repl) #'''Start Page''' #Start Page remuster="'''.*?'''" such=re.compile(remuster)#,re.DOTALL) for guilabel in such.findall(outtext): repl=guilabel.replace("'''",'') repl='%s' %repl.strip() outtext=outtext.replace(guilabel,repl) #''Vocabulary Collection'' #Vocabulary Collection remuster="''.*?''" such=re.compile(remuster)#,re.DOTALL) for guilabel in such.findall(outtext): repl=guilabel.replace("''",'') repl='%s' %repl.strip() outtext=outtext.replace(guilabel,repl) #userbase internal link #[[Parley#Vocabulary_Collections|Vocabulary Collections]] #Vocabulary Collections #[[Parley/FileFormats|Parley File Formats on userbase.kde.org] #Parley File Formats on userbase.kde.org #[[Akregator]] #Akregator #document internal link #[[#Vocabulary Editing|an Editor for Vocabulary Collections]] #Editor for Vocabulary Collections ##Vocabulary_Editing also valid in wiki #images #[[File:Parley welcome screen.png|thumb|500px] #"Parley_welcome_screen.png" format="PNG" #[[Image:Parley edit main.png|thumb|500px]] #"Parley_edit_main.png" format="PNG" #[[File:Parley new_document_lang_tenses.png|thumb|500px]] #Parley_new_document_lang_tenses.png" format="PNG" #[[File:Parley articles and gender 0-9-4 003.png|thumb|500px]] #"Parley_articles_and_gender_0-9-4_003.png" format="PNG" screenshot_template_wcaption='\n\n\ \n\ \n\ \n\ \n\ \n\ \n\ %s\n\ \n\ \n\ %s\n\ \n\ \n\ ' screenshot_template='\n\n\ \n\ \n\ \n\ \n\ \n\ \n\ \n\ \n\ \n\ ' alignments=['right', 'center', 'left', 'thumb', ''] #to differentiate captions from alignment remuster='' #remove inline images from captions suchimage=re.compile(remuster,re.DOTALL) remuster='\[\[.*?\]\]' such=re.compile(remuster,re.DOTALL) for linkimage in such.findall(outtext): if ('[[File:' in linkimage) or ('[[Image:' in linkimage): #screenshot repl=linkimage.split('|')[0] repl=repl.replace('[[File:','') repl=repl.replace('[[Image:','') repl=repl.replace(' ','_') repl=repl.rstrip(']') # Download image from UserBase if not nodownload: if have_wget: download_command = command_template %repl.strip() proc = subprocess.Popen(download_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) image_address = proc.stdout.readline().strip('\n').replace('thumb/','') image_name=image_address.rpartition('/')[2] our_image_name = image_name.lower() # Strip dots from the name as it breaks PDF generation our_image_name = re.sub("[\.]", "-" , our_image_name.rpartition('.')[0]).replace("%3d", "-") + '.' + our_image_name.rpartition('.')[2] print 'Downloading %s...' %our_image_name wget_command = wget_template %(our_image_name,image_address) sts = os.system(wget_command) if ('.jpeg' in our_image_name) or ('.jpg' in our_image_name): if have_convert: print "Converting %s to PNG..." %our_image_name sts = os.system("convert " + our_image_name +" " + our_image_name.replace('.jpeg','.png').replace('.jpg','.png')) sts = os.system("rm -f " + our_image_name) our_image_name = our_image_name.replace('.jpeg','.png').replace('.jpg','.png') else: print "Cannot find convert from ImageMagick in your system. You should convert your JPEGs (%s) to PNG manually." %our_image_name if have_convert: print "Converting %s to 96 ppi (good for PDF)..." %our_image_name identify_command = "identify -format \"%%w\" %s" %our_image_name proc = subprocess.Popen(identify_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) try: image_width = int(proc.stdout.readline().strip('\n')) except ValueError: print "Sorry, cannot identify image width. Result can be too wide." if image_width > 800: print "Image is too wide. It will be resized to 800px." file_name = our_image_name.strip('.png') convert_96ppi_command = "convert -units PixelsPerInch %s -resize \"800\" -density 96 %s-new.png && mv -f %s-new.png %s" %(our_image_name, file_name, file_name, our_image_name) else: file_name = our_image_name.strip('.png') convert_96ppi_command = "convert -units PixelsPerInch %s -density 96 %s-new.png && mv -f %s-new.png %s" %(our_image_name, file_name, file_name, our_image_name) sts = os.system(convert_96ppi_command) else: print "Cannot find convert from ImageMagick in your system. Your image (%s) will not be optimized for PDF conversion (96 ppi)." %our_image_name if have_advpng: print "Optimizing %s to save disk space..." %our_image_name advpng_command = "advpng -z4 %s > /dev/null 2>&1" %our_image_name sts = os.system(advpng_command) else: print "Cannot find advpng in your system. Your image (%s) will not be optimized to save disk space." %our_image_name else: print "Cannot find wget in your system. Please install it correctly to download images." #repl=repl.capitalize() #falsch für k3b repl=repl.replace('.jpeg','.png').strip().lower() repl=repl.replace('.jpg','.png') repl=re.sub("[\.=]", "-" , repl.rpartition('.')[0]) + '.png' caption=linkimage.rpartition('|')[2] caption=caption.rstrip(']') caption=caption.strip() if (caption in alignments) or (caption[0:2] == '[[') or ('px' in caption): repl=screenshot_template %repl else: for images in suchimage.findall(caption): caption=caption.replace(images,'the icon') # Remove '(click to enlarge)' from captions (images in docbook cannot be enlarged with clicking) caption=caption.replace(' (click to enlarge)','').replace(' (Click to enlarge)','') repl=screenshot_template_wcaption %(repl,caption,caption) elif '[[Special:' in linkimage or '[[#' in linkimage: #document internal link linkimagesplit=linkimage.split('|') if '#' in linkimagesplit[0]: anchor=linkimagesplit[0].rpartition('#')[2] else: anchor=linkimagesplit[0].rpartition('/')[2] anchor=anchor.strip().replace( ' ','-') anchor=anchor.replace('_','-') anchor=anchor.replace(';','-') # remove brackets from linkend anchor=anchor.replace('.28','') anchor=anchor.replace('.29','') # remove & from linkend anchor=anchor.replace('.2C','') # remove digits from linkend anchor=anchor.replace('1-','one-') anchor=anchor.replace('2-','two-') anchor=anchor.replace('3-','three-') anchor=anchor.replace('4-','four-') anchor=anchor.replace('5-','five-') anchor=anchor.replace('6-','six-') anchor=anchor.replace('7-','seven-') anchor=anchor.replace('8-','eight-') anchor=anchor.replace('9-','nine-') anchor=anchor.replace('0-','zero-') # remove question mark from linkend anchor=anchor.replace('.3F','') anchor=anchor.replace("'",'') anchor=anchor.replace("?",'') anchor=anchor.replace("!",'') anchor=anchor.replace(",",'') anchor=anchor.replace(' ','-') anchor=anchor.replace(':','-') anchor=anchor.replace('.','-') anchor=anchor.replace('','').replace('','') anchor=anchor.replace('"','') anchor=anchor.replace(' ','') anchor=anchor.replace('>','') anchor=anchor.replace('&','') anchor=anchor.replace('(','').replace(')','') # Some UserBase pages are wrong named: Section name != Page name (Amarok! Sigh...) altanchor='' for i in range(len(anchor)): if anchor[i] in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' and i!=0: altanchor+='-'+anchor[i] else: altanchor+=anchor[i] anchor=anchor.lower() theanchor='the-'+anchor anchordialog=anchor+'-dialog' anchorfeature=anchor+'-feature' anchormenu=anchor+'-menu' if anchor=='faq': altanchor='frequently-asked-questions' else: altanchor=altanchor.lower() thealtanchor='the-'+altanchor thealtanchorpane='the-'+altanchor+'-pane' kdealtanchor='kde-'+altanchor amarokonaltanchor='amarok-on-'+altanchor kwinrulesanchor=anchor.replace('kwin-rules-','') anchortext=linkimagesplit[1].replace(']]','') if (anchor in sectionids) and not (anchor in genwords): repl='%s' %(anchor.strip(),anchortext.strip()) elif theanchor in sectionids: repl='%s' %(theanchor,anchortext.strip()) elif anchormenu in sectionids: repl='%s' %(anchormenu,anchortext.strip()) elif (altanchor in sectionids) and not (anchor in genwords): repl='%s' %(altanchor,anchortext.strip()) elif thealtanchor in sectionids: repl='%s' %(thealtanchor,anchortext.strip()) elif anchordialog in sectionids: repl='%s' %(anchordialog,anchortext.strip()) elif anchorfeature in sectionids: repl='%s' %(anchorfeature,anchortext.strip()) elif thealtanchorpane in sectionids: repl='%s' %(thealtanchorpane,anchortext.strip()) elif kdealtanchor in sectionids: repl='%s' %(kdealtanchor,anchortext.strip()) elif amarokonaltanchor in sectionids: repl='%s' %(amarokonaltanchor,anchortext.strip()) elif (kwinrulesanchor in sectionids) and not (anchor in genwords): repl='%s' %(kwinrulesanchor,anchortext.strip()) else: #userbase internal link outside this manual linkimagesplit=linkimage.split('|') anchor=linkimagesplit[0].lstrip('[') anchor=anchor.rstrip(']') print anchor if len(linkimagesplit)>1: anchortext=linkimagesplit[1].replace(']]','') else: anchortext=anchor repl='%s' %(anchor.strip(),anchortext.strip()) elif '[[http' in linkimage: #it is an external link, process that later pass outtext=outtext.replace(linkimage,repl) #external link #[http://en.wikipedia.org/wiki/Flashcard flash card learning approach] #flash card learning approach #[[https://mail.kde.org/mailman/listinfo/kdepim-users subscribe to kdepim-users]] #subscribe to kdepim-users remuster='\[{1,2}http.*?\]{1,2}' such=re.compile(remuster)#,re.DOTALL) for link in such.findall(outtext): linkwobracket=link.replace('[','').replace(']','') linksplit=linkwobracket.split(' ') anchor = linksplit[0] anchortext='' for i in range(1,len(linksplit)): anchortext+=linksplit[i]+' ' repl='%s' %(anchor,anchortext.rstrip()) outtext=outtext.replace(link,repl) #mailto link #[mailto:rekonq@kde.org mailing list] #mailing list remuster='\[{1,2}mailto.*?\]{1,2}' such=re.compile(remuster)#,re.DOTALL) for link in such.findall(outtext): linkwobracket=link.replace('[','').replace(']','') linksplit=linkwobracket.split(' ') anchor = linksplit[0] anchortext='' for i in range(1,len(linksplit)): anchortext+=linksplit[i]+' ' repl='%s' %(anchor,anchortext.rstrip()) outtext=outtext.replace(link,repl) #IRC link #[irc://freenode/#rekonq #rekonq IRC channel] ##rekonq IRC channel remuster='\[{1,2}irc:.*?\]{1,2}' such=re.compile(remuster)#,re.DOTALL) for link in such.findall(outtext): linkwobracket=link.replace('[','').replace(']','') linksplit=linkwobracket.split(' ') anchor = linksplit[0] anchortext='' for i in range(1,len(linksplit)): anchortext+=linksplit[i]+' ' repl='%s' %(anchor,anchortext.rstrip()) outtext=outtext.replace(link,repl) #fix empty chapter/sections def comment_empty_sections(outtext, sectname): remuster="<%s.*?' outtext=outtext.replace("'","'") # remove empty paragraphs outtext=outtext.replace('\n','') # remove empty cuts outtext=outtext.replace('----\n','') # remove _TOC_, they do not work ;) outtext=outtext.replace('__TOC__\n','') # fix for lists in warnings, notes, and tips paddings=['warning', 'note', 'tip'] lists=['itemizedlist', 'orderedlist'] for str1 in paddings: for str2 in lists: endmess='
\n' %(str1,str2) repl='\n\n' %(str2,str1) outtext=outtext.replace(endmess,repl) #remuster='\n' #such=re.compile(remuster,re.DOTALL) #for endmess in such.findall(outtext): # repl=endmess.split('\n' %repl # outtext=outtext.replace(endmess,repl) # Fix for screens in userinput #outtext=outtext.replace('\n','') #outtext=outtext.replace('','') outtext=outtext.replace('','') outtext=outtext.replace('','') outtext=outtext.replace('\n ','') outtext=outtext.replace('','') outtext=outtext.replace('','') # xml2pot does not like link-only paragraphs. Let's trick it with a non-breakable space. outtext=outtext.replace('',' ') outtext=outtext.replace('',' ') outtext=outtext.replace('\n',' \n') outtext=outtext.replace('',' ') outtext=outtext.replace('\n',' \n') # Nothing will be enlarged when you click. Remove these sentences. outtext=outtext.replace(' (click to enlarge)','') outtext=outtext.replace('(Click to enlarge)','') # Replace app name with an entity outtext=outtext.replace("Akregator","&akregator;") outtext=outtext.replace("Amarok","&amarok;") outtext=outtext.replace("Amarok's","&amarok;'s") outtext=outtext.replace("Amor","&amor;") outtext=outtext.replace("Ark","&ark;") outtext=outtext.replace("Calligra","&calligra;") outtext=outtext.replace("Cantor","&cantor;") outtext=outtext.replace("Choqok","&choqok;") outtext=outtext.replace("digiKam","&digikam;") outtext=outtext.replace("Dolphin","&dolphin;") outtext=outtext.replace("Emacs","&Emacs;") outtext=outtext.replace("Emacs's","&Emacs;'s") outtext=outtext.replace("Jovie","&jovie;") outtext=outtext.replace("JuK","&juk;") outtext=outtext.replace("KAddressBook","&kaddressbook;") outtext=outtext.replace("KAlarm","&kalarm;") outtext=outtext.replace("Kamoso","Kamoso") outtext=outtext.replace("KAppTemplate","KAppTemplate") outtext=outtext.replace("KCalc","&kcalc;") outtext=outtext.replace("KCharSelect","&kcharselect;") outtext=outtext.replace("KDE","&kde;") outtext=outtext.replace("Kdenlive","Kdenlive") outtext=outtext.replace("KDevelop","KDevelop") outtext=outtext.replace("Kexi","&kexi;") outtext=outtext.replace("Kexi's","&kexi;'s") outtext=outtext.replace("KMail","&kmail;") outtext=outtext.replace("KMix","&kmix;") outtext=outtext.replace("Konqueror","&konqueror;") outtext=outtext.replace("Konsole","&konsole;") outtext=outtext.replace("Kontact","&kontact;") outtext=outtext.replace("KOrganizer","&korganizer;") outtext=outtext.replace("Kopete","&kopete;") outtext=outtext.replace("Kubuntu","&kubuntu;") outtext=outtext.replace("KWin","&kwin;") outtext=outtext.replace("LibreOffice","LibreOffice") outtext=outtext.replace("Mac OS","&MacOS;") outtext=outtext.replace("Microsoft Excel","&Microsoft; Excel") outtext=outtext.replace("OpenOffice.org","OpenOffice.org") outtext=outtext.replace("Parley","&parley;") outtext=outtext.replace("Phonon","&phonon;") outtext=outtext.replace("Pulseaudio","PulseAudio") outtext=outtext.replace("Thunderbird","Thunderbird") outtext=outtext.replace("Thunderbird's","Thunderbird's") outtext=outtext.replace("Ubuntu","&ubuntu;") # Replace UserBase internal links with help links outtext=outtext.replace("http://userbase.kde.org/Special:mylanguage/System Settings/Account Details#Paths","help:/kcontrol/paths") outtext=outtext.replace("http://userbase.kde.org/Special:myLanguage/Dragon_Player","help:/dragonplayer") outtext=outtext.replace("http://userbase.kde.org/Special:myLanguage/System_Settings/Search_Desktop","help:/kcontrol/nepomuk") # fix for Amarok handbook outtext=outtext.replace("the Manual","this handbook") outtext=outtext.replace('UPnP','UPnP') outtext=outtext.replace('Nepomuk Collection','Nepomuk Collection') outtext=outtext.replace('Statistics Synchronization Between Collections and with Last.fm','Statistics Synchronization Between Collections and with Last.fm') # Fix for — outtext=outtext.replace("&mdash;","—") # Replace Linux with &linux; etc. outtext=re.sub("(?')] if outtext.strip('\n').rpartition('sect1>')[2]=='': outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] modifiedtext.write(docbookheader+outtext+userbase_content_marker+docbookfooterlitesection) else: modifiedtext.write(docbookheader+outtext+userbase_content_marker+docbookfooterlite) elif 'credits-and-license' in sectionids: outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] if outtext.strip('\n').rpartition('sect1>')[2]=='': outtext=outtext.rstrip('\n') outtext=outtext[:-len('')] modifiedtext.write(docbookheader+outtext+userbase_content_marker+docbookfooterlitesection) else: modifiedtext.write(docbookheader+outtext+userbase_content_marker+docbookfooter) else: outtext+=userbase_content_marker modifiedtext.write(docbookheader+outtext+docbookfooter) modifiedtext.close() print "output written to %s" %filemodified