diff --git a/README.mdown b/README.mdown index 9e57507..18c1998 100644 --- a/README.mdown +++ b/README.mdown @@ -1,63 +1,70 @@ texla ===== ![](doc/texla-logo.png) A minimal and easily extensible LaTeX parser. It's **minimal** because it only splits tex without doing anything strange to the sources. It breaks down LaTeX into sections, environments, math, commands and plain text, creating a simple tree of Blocks objects. It's **easily extensible** because to support a new command or environment the only necessary code is a Python class that defines a new Block. Moreover options and arguments of Latex commands and environments could be parsed with a simple and easy **API**. Further documentation can be found at: https://meta.wikitolearn.org/Texla +Run Texla +========= +To run Texla you'll need Python3 and PyYaml installed. + + python texla.py + python texla.py --debug + Texla Configuration =================== The execution of texla is controlled by the **configs.yaml** file. There are a few parameters to set: * __renderer__ : The output format of the conversion. __mediawiki__ is the only avaiable one for now * __input_path__ : the path of the main tex file to convert. Texla is able to convert complex documents with many included subfiles: in "input_path" you have to put the main tex file that includes all the subfiles. * __output_path__ : the path for the output files. Write it without the extension because it will be the base filename for all output files. * __doc_title__: The title of the document. Texla doesn't read the title written inside tex. :__doc_title__ is used as a base namespace for pages * __base_path__: texla exports pages in an hierarchical way, organizing pages with unique urls. __base_path__ is used as a root for the urls of the pages. It can be void. * __collapse_content_level__ : The sectioning of a latex file is a tree. Every part of the tex doc has a level. The level of the root page, that contains the index of the document is -1. The first level of sectioning in the document has level 0. Texla converts the sections into pages and the page gets the level of the seciton. The content of the pages with level greater than __collapse_content_level__ is inserted in the text of the parent page as a paragraph. * __collapse_pages_level__: If a page has a level greater than __collape_pages_level__ and is not collapsed, it is moved to the level given by __collapse_pages_level__ going up in the page tree. * __create_index__: if True a index is create in the root page. * __export_format__ : for now _text_ is the only avaiable * __export_single_pages__: if True a file for every page is created and saved in a directory called __"output_path"_pages__ * __export_pages_tree__: if True the pages are exported in a tree of directory (root in __"output_path"_pages__ ) corresponding to the actual sectioning. * __export_book_page__: If True the page necessary to Project:Books is created. * __print_preparsed_tex__: if True a debug file called _preparsed.tex_ is saved with preparsed tex. * __lang__: localization for keywords. The avaiable languages are those inside __i18n.yaml__ file. Contributions appreciated :) * __plugins__: [...] List of the enabled plugins. The order of this list is the order of executing: Be Careful. Plugins ======= The available plugins are: * __MathCheck__: it fixes the math to be correct for WikiToLearn rendering * __math_check_online__: at the end of the rendering it calls the WikiToLearn math renderer to check if there are errors in the math. * __space_check__: it removes the single spaces after a newline. diff --git a/i18n.yaml b/i18n.yaml index e8e9e24..b6c610a 100644 --- a/i18n.yaml +++ b/i18n.yaml @@ -1,15 +1,18 @@ it: chapters: Capitoli subpages: Sottopagine intro: Introduzione book_category: Categoria:Libri book_url: Project:Libri category: Categoria + center: centro en: chapters: Chapters subpages: Subpages intro: Introduction book_category: Category:Books book_url: Project:Books category: Category + center: center + diff --git a/log.py b/log.py index c3e8760..1657c03 100644 --- a/log.py +++ b/log.py @@ -1,26 +1,30 @@ import logging import sys if len(sys.argv) > 1: - loglevel = sys.argv[1].split('=')[1] + loglevelstr = sys.argv[1][2:] + if loglevelstr in ["info", "debug", "error", "warn"]: + loglevel = loglevelstr + else: + loglevel = "info" else: loglevel = "info" if len(sys.argv)>2: output = sys.argv[2].split('=')[1] in ('True','true','Y','y','yes','Yes') else: output = False numeric_level = getattr(logging, loglevel.upper(), None) if not isinstance(numeric_level, int): raise ValueError('Invalid log level: %s' % loglevel) if output: logging.basicConfig(filename='tree.log', format='%(levelname)s:%(message)s', level=numeric_level) else: logging.basicConfig( format='%(levelname)s:%(message)s', level=numeric_level) logging.info('texla started') diff --git a/texla.py b/texla.py index 6565f3e..f3a52a7 100644 --- a/texla.py +++ b/texla.py @@ -1,65 +1,66 @@ from log import * import json import yaml from texla.Parser import Parser from texla.Renderers.MediaWikiRenderer import MediaWikiRenderer import texla.PageTree.Exporter as exporter from texla.Exceptions.TexlaExceptions import * +from texla.Reporter import Reporter def execute_texla_mediawiki(config): p = Parser(config) a = open(config['input_path'], 'r').read() try: tree = p.parse(a) except (PreparserError, ParserError) as err: err.print_error() exit() f = open(config['output_path'] + '.tree', 'w') json_tree = tree.to_json(0) n_blocks = tree.n_blocks() logging.info('PARSED %i Blocks', n_blocks) f.write(json_tree) logging.info('\033[0;34m############### STARTING RENDERING ###############\033[0m') + #creating Reporter + reporter = Reporter(p.tree_explorer) #rendering - rend = MediaWikiRenderer(config) + rend = MediaWikiRenderer(config, reporter) rend.start_rendering(tree) o = open(config['output_path'] + '.json', 'w') o.write(json.dumps(rend.tree.get_tree_json(), indent=3)) - p = open(config['output_path'] + '.debug', 'w') - p.write(json.dumps(rend.used_tags, indent=2)) #print page tree before POST-PROCESSING logging.info('PageTree:\n'+rend.tree.get_tree_debug()) #collpasing logging.info('\033[0;34m############### STARTING POST-PROCESSING ###############\033[0m') tree = rend.tree tree.collapse_tree(config['collapse_content_level'], config['collapse_pages_level']) #printing tree after POST-PROCESSING logging.info('PageTree:\n'+rend.tree.get_tree_debug()) oc = open(config['output_path'] + '-coll.json', 'w') oc.write(json.dumps(rend.tree.get_tree_json(), indent=3)) - logging.info('######## STARTING EXPORTING ########') + logging.info('\033[0;34m############### EXPORTING ###############\033[0m') if config['create_index']: tree.create_indexes(config["export_book_page"]) exporter.exportPages(tree.pages, config['output_path'] + '.mw', config['export_format']) if config['export_single_pages']: exporter.export_singlePages(tree.pages, config['output_path'] + '_pages', config['export_format']) if config['export_pages_tree']: exporter.export_pages_tree(tree.pages.values(), config['output_path'] + "_pages") - + reporter.print_report(console=True) logging.info('Finished') if __name__ == '__main__': #reading JSON configs config = yaml.load(open('configs.yaml','r')) #loading localized keywords config['keywords'] = yaml.load(open('i18n.yaml','r'))[config['lang']] #executing process for alla renderers if config['renderer'] == 'mediawiki': execute_texla_mediawiki(config) diff --git a/texla/PageTree/Babel.py b/texla/PageTree/Babel.py index dd2d6cf..de3c558 100644 --- a/texla/PageTree/Babel.py +++ b/texla/PageTree/Babel.py @@ -1,81 +1,80 @@ import logging class Babel: #Label babel def __init__(self): self.refs = {} self.anchors = {} def add_label(self, label, anchor): ''' This method adds a label and its anchor to the babel. The label is unique but can be overwritten. ''' if label in self.anchors: logging.warning("Babel @ label: {} already present".format(label)) #saving the anchor that has to be unique self.anchors[label] = anchor - logging.info("Babel @ Adding label: \"{}\" to anchor: {}". + logging.debug("Babel @ Adding label: \"{}\" to anchor: {}". format(label, anchor)) def add_reference(self, label, ref): ''' This method adds a reference to the label. A reference is a page or in general an object with .text properties. The babel will fix the reference of the registered objects. ''' #we don't check if label exist because #the process is asynchronous" if label not in self.refs: self.refs[label] = [] self.refs[label].append(ref) - logging.info("Babel @ Adding ref: {}, to label: \"{}\"". + logging.debug("Babel @ Adding ref: {}, to label: \"{}\"". format(ref, label)) def move_anchor(self, oldanc, newanc): '''This function replace the references to oldanc with newanc, both as anchor and ref. It is used mainly when a page is moved''' new_anchors = {} for label, anc in self.anchors.items(): if anc == oldanc: new_anchors[label] = newanc self.anchors.update(new_anchors) new_refs = {} for label, ref in self.refs.items(): if ref == oldanc: new_refs[label] = newanc self.refs.update(new_refs) def fix_references(self): ''' This method will fix the reference in the objects saved under self.refs. The text {{ref:label}} in the objects' .text properties will be replaces by a url made of [url|title]. The url and the title MUST be properties of the anchor saved. ''' #iterating over anchor to fix only the right labels #and ignoring the missing ones for label in self.anchors: obj = self.anchors[label] title = obj.title url = obj.url if url is None and title is None: continue elif url is None and title is not None: replace_string = title elif url is not None and title is None: replace_string = "[["+ url + "]]" else: replace_string = "[[{}|{}]]".format(url,title) #checking if the babel has refs if label not in self.refs: continue #iterating over all refs for ref in self.refs[label]: - logging.info("Babel @ Fixing ref to label: \"{}\", in page: {}". - format(label,ref.title)) - logging.debug("From page: {}, to page: {}".format(ref, obj)) + logging.debug("Babel @ Fixing ref to label: \"{}\", from page {} to page: {}". + format(label,ref, obj)) ref.text = ref.text.replace("{{ref@"+ label +"}}", replace_string) diff --git a/texla/PageTree/PageTree.py b/texla/PageTree/PageTree.py index 96102b6..1152620 100644 --- a/texla/PageTree/PageTree.py +++ b/texla/PageTree/PageTree.py @@ -1,323 +1,326 @@ from .Page import Page from .Babel import Babel from .TheoremsManager import * import re, os, json class PageTree(): def __init__(self, configs): self.configs = configs self.doc_title = configs['doc_title'] self.keywords = configs['keywords'] self.output_path = configs['output_path'] #pages Data {id: page} self.pages = {} #id : titles self.titles = {} #label manager self.babel = Babel() #theorems manager self.theorems_manager = TheoremsManager(self.pages) #urls (they are created after collapsing). #it's a dictionary id:url self.urls = {} #ROOT PAGE ro = Page(self.doc_title, 'root', -1, self.keywords) self.root_id = ro.id self.root_page = ro self.pages[self.root_id] = ro self.titles[self.root_id] = ro.title #indexes self.pageid_stack = [ro.id] self.current_page_id = self.root_id #the anchor is the object to be referentiated by labels self.current_anchor = ro def createPage(self, title, page_type): """This method creates a new page and enters in his enviroment setting current variables""" title = self.get_normalized_title(title) #finding level level = len(self.pageid_stack) - 1 #create new page p = Page(title, page_type, level, self.keywords) #add page to pages index self.pages[p.id] = p self.titles[p.id] = p.title #adding the page as subpage of the current page self.pages[self.current_page_id].addSubpage(p) #updates current self.pageid_stack.append(p.id) self.current_page_id = p.id self.current_anchor = p def exitPage(self): """Return to the parent page enviroment""" self.current_page_id = self.pageid_stack[-2] self.pageid_stack.pop() self.current_anchor = self.pages[self.current_page_id] def addText(self, text): self.pages[self.current_page_id].addText(text) def addLabel(self, label): """adding label to the babel with the current page as the anchor""" self.babel.add_label(label, self.current_anchor) def addReference(self, label): """adding the current_anchor as a reference for the requesting label""" self.babel.add_reference(label, self.current_anchor) def addTheorem(self, id, th_type): """Adding a theorem also as anchor""" th = Theorem(id,self.current_anchor, th_type) self.theorems_manager.addTheorem(th) #setting current anchor on the theorem self.current_anchor = th def exitTheorem(self): """Removing the anchor from the theorem and setting it to the last used page""" self.current_anchor = self.pages[self.current_page_id] @staticmethod def get_normalized_title(title): """Function that removes bad symbols from title""" title = title.replace('$', '') title = title.replace('{','') title = title.replace('}','') title = title.replace('\\mathcal','') title = title.replace('\\mathbf','') title = title.replace('\\mathbb','') title = title.replace('\\ensuremath','') - title = title.replace(';', ' ') title = title.replace('&', 'e') title = title.replace('\\', '') title = title.replace('/', '_') title = title.replace('>', 'gt') title = title.replace('<', 'lt') + title = title.replace(':',' ') + title = title.replace('.',' ') + title = title.replace(',',' ') + title = title.replace(';',' ') return title def get_tree_json(self): """This function return the json tree""" return self.root_page.get_json_dictionary(self.pages) def get_tree_debug(self): """This function prints the tree for debug""" s = [] for p in self.root_page.get_subpages(): s.append(p.get_str()) return('\n'.join(s)) def after_render(self): """This function does some fixes after rendering""" for page in self.pages.values(): page.after_render() def change_title(self, page_id, title): self.pages[page_id].title = title def remove_page_from_tree(self, page, parent=None): """This function remove a page from the tree, but doesn't delete it. The page remains in the self.pages dictionary but not in the subpages of the pages in the tree. If a parent page is passed the research for the removal starts from that page with performance improvements""" if parent: parent.removeSubpage(page) else: self.root_page.removeSubpage(page) def move_page_references(self, oldpage, newpage): """This function fixes the reference in TheoremsManager and Babel when a page is moved""" #we need to fix anchors in Babel self.babel.move_anchor(oldpage, newpage) #we need also to fix the theorems page self.theorems_manager.move_theorems_page(oldpage, newpage) def collapse_tree(self, content_level, max_page_level): """This function contains all the tree collapsing procedures in the order: 1) Mark the pages for the content collapsing without actually move the text 2) Fix the tree order with collapsing of page level (N.B.: it needs the collasped status of pages) 3) Fix the urls now that the level if fixed. 4) Create the pagenumber of every page, after the movement in the tree. 5) The theorems are fixed adding the right numbering. 6) Fix references to labels: the Babel will change pages content so this has to be done after the url fixing but before the actual text collapsing. 7) Finally collapse the pages text to the right content level""" self.collapse_content_level(content_level) self.collapse_page_level(max_page_level) self.collapse_urls() self.create_pagenumbers() self.theorems_manager.fix_theorems() #ask the babel to fix the refs to labels in all the pages self.babel.fix_references() #collapse the text in the right pages self.collapse_content_level_text(content_level) def collapse_content_level(self, max_level): """This function marks pages with level higher than choosen level to be collapsed. It DOESN'T move the text.""" for p in self.pages.values(): if p.level > max_level: p.collapsed = True def collapse_content_level_text(self, max_level): """This function collapses the content of the pages at the choosen level. The content of the pages with level higher than max_level is moved up to the tree to the page with the max_level, creating titles in the page text. The pages touched are marked as collapsed=True.""" for p in self.pages.values(): if p.level == max_level: p.collapseSubpagesText() def collapse_page_level(self, max_level): """This function fixes the level of the pages in the index according to a max_level. Pages with a level higher than the max_level are moved up in the tree till the max_level. The order related to parent pages is mantained. The PageTree is rewrited, hierarchy and levels are fixed. Moreover the level=0 is a special level and it's content is moved to an intro page, because level=0 pages must contain the index of their subpages. """ #PAGES LEVEL = 0 #If they contain text we have to create a new page #called introduction (localized) for p in [x for x in self.pages.values() if x.level==0]: if len(p.text)>0: #creating new page for text inside text page. p_intro = Page(self.keywords['intro'], 'section',1, self.keywords) p_intro.text = p.text #saving the intro page self.pages[p_intro.id] = p_intro self.titles[p_intro.id] = p_intro.title p.addSubpage_top(p_intro) #erasing text from section page p.text = '' #fixing page references self.move_page_references(p, p_intro) #Now we move pages according to the max_level. #pages not collapsed and with higher level then #the max_level are moved as subpages of the #nearest max_level page. for p in [x for x in self.pages.values() if x.level==max_level]: parent_page = p.parent #list of subpages to move at the right level subpages_to_add = [] #now we are cycling on the pages with level>max_level for sp in p.get_subpages(): if not sp.collapsed: #removing page from the tree acting #directly on the parent page sp.parent.removeSubpage(sp) #saving the page for the movement subpages_to_add.append(sp) #adding the list of moved subpages to the parent_page #so getting the right level. parent_page.addSubpages(subpages_to_add, p) ###NB: remember that the subpages level #is AUTOMATICALLY refreshed for all pages added. def collapse_urls(self): """This function creates the urls of the pages, checking is they are collapsed or not. If they are collapsed the url is parent_page#title. Then the references are resolved to urls throught labes""" self.root_page.collapseURL(self.configs['base_path']) def create_pagenumbers(self): """Every page will have a pagenumber like 1.2.1""" self.root_page.pagenumber = "0" i = 1 for pages in self.root_page.subpages: pages.create_pagenumbers(None, i ) i += 1 def create_indexes(self, export_book_page=False): """This function create sections index and book total index""" self.create_sections_index() self.create_book_index(export_book_page=False) def create_sections_index(self): """This function create the index for the sections (level=0) pages""" for page in self.pages.values(): if page.level == 0: index = [] for p in page.get_subpages(): if not p.collapsed: if len(p.text) >0: index.append('*'*p.level+ \ '[[' + p.url + '|' + p.title + ']]') else: index.append('*'*p.level+ p.title ) #adding section category index.append("[[Category:CourseLevelTwo]]") page.text = '\n'.join(index) def create_book_index(self, export_book_page=False): """This function create the book total index and the book export page index""" base_page = self.root_page #book export: link book_url = self.doc_title.replace(' ','_') #creating root index index = ["{{CourseRoot|"] if export_book_page: book_export_index = ['{{libro_salvato | setting-papersize = a4\ | setting-toc = auto | setting-columns = 1}}'] #book export: setting title book_export_index.append('==' + self.doc_title + '==') for page in self.root_page.subpages: if page.level == 0: index.append('{{CourseLevelTwo|'+page.title +'}}') if export_book_page: #book export index for chapters book_export_index.append(';' + page.title) #creating index for book for p in page.get_subpages(): if not p.collapsed: if len(p.text) > 0: book_export_index.append( ':[[' + p.url + '|' + p.title + ']]') #closing section index.append('\n{{ForceBreak}}\n') #adding course categories index.append("}}\n") index.append("[["+ self.configs["keywords"]["category"] +":Structure]]") index.append("[[Category:CourseRoot]]") base_page.text += '\n'.join(index) #creating book export page if export_book_page: #adding category to book page book_export_index.append("[["+self.configs["keywords"]["book_category"]+ "|"+self.doc_title +"]]") book_template = self.configs["keywords"]["book_template"] book_title = book_template + '_' + book_url book_export_page = Page(book_title, 'root', -1,None) book_export_page.url = self.configs['base_path']+ \ book_template + '/' + self.doc_title #inserting index text book_export_page.addText(u'\n'.join(book_export_index)) #the export book page is inserted in the pages dict and index self.pages[book_template + '/' + self.doc_title] = book_export_page diff --git a/texla/PageTree/TheoremsManager.py b/texla/PageTree/TheoremsManager.py index 2a5421d..cb92bf5 100644 --- a/texla/PageTree/TheoremsManager.py +++ b/texla/PageTree/TheoremsManager.py @@ -1,96 +1,96 @@ import logging class TheoremsManager: def __init__(self, pages_dict): #refernce to the pages dictionary self.pages = pages_dict self.pages_ths = {} def addTheorem(self, theorem): page = theorem.page - logging.info("TheoremsManager @ adding Theorem: {}".format(theorem)) + logging.debug("TheoremsManager @ adding Theorem: {}".format(theorem)) if page not in self.pages_ths: self.pages_ths[page] = [] self.pages_ths[page].append(theorem) def move_theorems_page(self, oldpage, newpage): """This function moves a theorem to a different page to maintain the right anchor in case of moved page.""" if oldpage in self.pages_ths: self.pages_ths[newpage] = self.pages_ths[oldpage] self.pages_ths.pop(oldpage) def fix_theorems(self): """This function fixes the theorems calculating their number and substituing it the placeholder in the text. Moreover it fixes the data needed by the label manager.""" for chapter in [x for x in self.pages.values() if x.level == 0]: chapter_number = chapter.pagenumber th_numbering = {} pages_to_check = self.get_subpages_ordered(chapter) for pag in pages_to_check: for th in self.pages_ths[pag]: if th.th_type in th_numbering: number = th_numbering[th.th_type] +1 else: number = 1 th_numbering[th.th_type] = 1 th_numbering[th.th_type] = number th.fixNumber(chapter_number+"."+str(number)) th.fixUrl() def get_subpages_ordered(self, page): pages = [] for subp in page.subpages: if subp in self.pages_ths: pages.append(subp) pages += self.get_subpages_ordered(subp) return pages class Theorem: def __init__(self, id, page, th_type): self.id = id self.page = page #creating a text value. If the Theorem is used #as reference the text of the parent page is used. self.text = page.text self.th_type = th_type self.number = 0 self.title = None self.url = None def fixNumber(self, number): """This method fix the number of the theorem inside its page text replacing the string {{thnum:id}}. The number is also appended to the title""" self.page.text = self.page.text.replace( "{{thnum@"+ self.id + "}}", str(number)) #creating title for label management. Only the number as latex. self.title = str(number) self.number = number - logging.info("Theorem @ Fixing number {} of theorem: {}". + logging.debug("Theorem @ Fixing number {} of theorem: {}". format(number, self)) def fixUrl(self): """The theorem url is setted to the page url. N.B.: to be called after pages' urls fixing""" #getting not collapsed url current_page = self.page while True: if current_page.collapsed: current_page = current_page.parent else: break self.url = current_page.url + "#" + self.th_type + self.title.replace(".", "_") #replacing anchor in the text self.page.text = self.page.text.replace( "{{thanchor@"+ self.id + "}}", self.th_type + self.title.replace(".","_")) logging.debug("Theorem @ Fixing URL of theorem {}".format(self)) def __str__(self): return "Theorem. ID: {}. Type: {}, Page: {}, Number: {}".format( self.id, self.th_type, self.page.title, self.number ) diff --git a/texla/Parser/Blocks/AbstractBlock.py b/texla/Parser/Blocks/AbstractBlock.py index bf93bae..e2710de 100644 --- a/texla/Parser/Blocks/AbstractBlock.py +++ b/texla/Parser/Blocks/AbstractBlock.py @@ -1,24 +1,23 @@ import logging from .Utilities import * from .Block import Block class AbstractBlock(Block): '''This class handles the abstract environment''' @staticmethod def parse(parser, tex, parent_block, params): #we first create the Block block = AbstractBlock( tex, parent_block) - logging.debug('AbstractBlock.parse_env @') #now we parse the content children_blocks = parser.parse_instructions(tex, block, {}) #now we can add the children nodes block.add_children_blocks(children_blocks) return block def __init__(self, tex, parent_block): super().__init__('abstract', tex, parent_block) parser_hooks = { 'abstract' : AbstractBlock.parse } diff --git a/texla/Parser/Blocks/AlignmentBlocks.py b/texla/Parser/Blocks/AlignmentBlocks.py index 3b6ddce..6dfb84d 100644 --- a/texla/Parser/Blocks/AlignmentBlocks.py +++ b/texla/Parser/Blocks/AlignmentBlocks.py @@ -1,54 +1,51 @@ import logging from .Utilities import * from .Block import Block class AlignmentBlock(Block): '''This class handles the flushright, flushleft and center environments''' @staticmethod def parse_env(parser, tex, parent_block, params): #we first create the Block block = AlignmentBlock(params['env'], tex, parent_block) - logging.debug('AlignmentBlock.parse_env @ type: %s', params['env']) #now we parse the content children_blocks = parser.parse_instructions(tex, block, {}) #now we can add the children nodes block.add_children_blocks(children_blocks) return block @staticmethod def parse_command(parser, tex, parent_block, params): block = AlignmentBlock(params['cmd'], '' , parent_block) - logging.debug('AlignmentBlock.parse_cmd @ type: %s', params['cmd']) return (block,tex) @staticmethod def parse_command_content(parser, tex, parent_block, params): options, left_text = CommandParser.parse_options( tex, [('content','{','}')]) block = AlignmentBlock(params['cmd'], options['content'], parent_block) children_blocks = parser.parse_instructions( options['content'], block,{}) block.add_children_blocks(children_blocks) - logging.debug('AlignmentBlock.parse_cmd @ type: %s', params['cmd']) return (block, left_text) def __init__(self, align_type, tex, parent_block): super().__init__(align_type, tex, parent_block) self.attributes['align_type'] = align_type def __str__(self): return ''.format( self.block_name, self.id, self.attributes['align_type']) parser_hooks = { 'flushleft' : AlignmentBlock.parse_env, 'flushright' : AlignmentBlock.parse_env, 'center' : AlignmentBlock.parse_env, 'centering' : AlignmentBlock.parse_command, 'centerline' : AlignmentBlock.parse_command_content } diff --git a/texla/Parser/Blocks/Block.py b/texla/Parser/Blocks/Block.py index 10ea448..206d8b1 100644 --- a/texla/Parser/Blocks/Block.py +++ b/texla/Parser/Blocks/Block.py @@ -1,124 +1,134 @@ from .Utilities import * -'''Base Block definition''' +"""Base Block definition""" class Block: - ''' + """ Block general attributes: -block_name: the new of the "type" of the block -id: unique id for the block in the tree -parent_block: parent in the tree -attributes: a dictionary for description of the block. All useful parser data go into attributes -ch_blocks: a list of children_blocks -section_level: the position of the block compared to sectioning levels defined in utility.py Derived Block could add more attributes. - ''' + """ @staticmethod def parse(parser, tex, parent_block, params): - ''' + """ The method must return a tuple with the created - Block and the last used index of tex string.''' + Block and the last used index of tex string.""" pass def __init__(self, block_name, content, parent_block): - ''' + """ Base constructor for Block. It saves the parent_block and block name and create the new id for the new block. It creates data structures like the attributed dictionary and children nodes list. It always saves a content variable. By default, it sets the section_level of the block to that of the parend_block. - ''' + """ self.block_name = block_name self.content = content if not parent_block is None: self.parent_block = parent_block self.id = parent_block.id + '-' + utility.get_random_string(3) #Section level: #by default the level is the same of parent block self.section_level = self.parent_block.section_level #depth in the tree self.tree_depth = self.parent_block.tree_depth+1 else: #if this is the root block self.parent_block = None self.id = '@' self.section_level = -1 self.tree_depth = 0 #dictionary for attributes self.attributes = {'N_chblocks' : 0} #list for childrend blocks self.ch_blocks = [] self.N_chblocks = 0 def add_child_block(self, block): - ''' + """ IMPORTANT: this function is called by the self.parse fuction. It MUST NOT be called from outside, expecially the parser - ''' + """ self.ch_blocks.append(block) self.N_chblocks +=1 self.attributes['N_chblocks']+=1 def add_children_blocks(self, blocks): - ''' + """ IMPORTANT: this function is called by the self.parse fuction. It MUST NOT be called from outside, expecially the parser - ''' + """ self.ch_blocks += blocks self.N_chblocks +=len(blocks) self.attributes['N_chblocks']+=len(blocks) def change_parent_block(self, new_parent): - '''This function changes the parent of the + """s function changes the parent of the block. It changes parent object, id, and tree_depth. The section level is not changes for consistency. All children are updated. - ''' + """ self.parent_block = new_parent #rebuiding id self.id = new_parent.id + '-' + utility.get_random_string(3) #the section level is not changed, #but tree_depth is updated self.tree_depth = new_parent. tree_depth + 1 #now childrens are updated for ch in self.ch_blocks: ch.change_parent_block(self) + def get_children(self, bl_name): + """ This function return a list of children blocks + corresponding to the requested type. If there are not + children blocks of that type it returns a void list.""" + result = [] + for bl in self.ch_blocks: + if bl.block_name == bl_name: + result.append(bl) + return result + def __str__(self): return ''.format( self.block_name, self.id) def to_json(self, level=0): - ''' + """ This functions create a json ouput that represents the tree of subblocks of the called block. - ''' + """ json = '' levelb = level+3 json += (' '*level + '{\n') json += (' '*levelb + '"ID":"'+ self.id+'",\n') json += (' '*levelb + '"block_name":"'+ self.block_name+'",\n') json += (' '*levelb + '"tree_depth":"'+ str(self.tree_depth)+'",\n') for k,v in self.attributes.items(): json += (' '*levelb + '"'+k+ '":"'+str(v)+ '",\n' ) json += (' '*levelb + '"children_blocks":[\n') for b in self.ch_blocks: json+= b.to_json(levelb+3) json += (' '*levelb+'],\n') json += (' '*level + '}\n') return json def n_blocks(self): - '''This function returns the - number of all children blocks recursively.''' + """s function returns the + number of all children blocks recursively.""" n = len(self.ch_blocks) for c in self.ch_blocks: n+= c.n_blocks() return n diff --git a/texla/Parser/Blocks/BreakBlocks.py b/texla/Parser/Blocks/BreakBlocks.py index b820cfa..8fb8226 100644 --- a/texla/Parser/Blocks/BreakBlocks.py +++ b/texla/Parser/Blocks/BreakBlocks.py @@ -1,74 +1,71 @@ import logging from .Utilities import * from .Block import Block class BreakBlock(Block): '''This class gives you the possibility to break/not break a line/page''' @staticmethod def parse(parser, tex, parent_block, params): options, left_text = CommandParser.parse_options( tex, [('priority','[',']')]) if options['priority']==None: priority = 0 else: priority = int(options['priority']) block = BreakBlock(params['cmd'], priority ,tex, parent_block) - logging.debug('BreakBlock.parse') return (block, left_text) def __init__(self, break_type, priority, content, parent_block): super().__init__(break_type, content, parent_block) self.attributes['priority'] = priority class NewlineBlock(Block): def parse_newline(parser, tex, parent_block, params): - logging.debug('NewlineBlock.parse @ ') block = NewlineBlock(params['star'], parent_block) left_tex = CommandParser.parse_options(tex,[])[1] return (block, left_tex) def __init__(self, star, parent_block): super().__init__('newline','\n', parent_block) self.attributes['text'] = '\n' self.attributes['star'] = star class NewPageBlock(Block): @staticmethod def parse_newpage(parser, tex, parent_block, params): block = NewPageBlock(params['star'], parent_block) - logging.debug('NewPageBlock.parse_newpage @ cmd: %s',params['cmd']) return (block,tex) def __init__(self, star, parent_block): super().__init__('newpage','', parent_block) self.attributes['star']=star class ParBlock(Block): ''' This block only represents a \n\n in the tex''' @staticmethod def parse(parser, tex, parent_block, params): return (ParBlock(parent_block), tex) def __init__(self, parent_block): super().__init__('par','',parent_block) parser_hooks = { '\\': NewlineBlock.parse_newline, 'newline': NewlineBlock.parse_newline, 'newpage' : NewPageBlock.parse_newpage, 'linebreak' : BreakBlock.parse, 'pagebreak' : BreakBlock.parse, 'nolinebreak' : BreakBlock.parse, 'nopagebreak' : BreakBlock.parse, 'par' : ParBlock.parse } diff --git a/texla/Parser/Blocks/ClearBlocks.py b/texla/Parser/Blocks/ClearBlocks.py index 70422eb..15d8169 100644 --- a/texla/Parser/Blocks/ClearBlocks.py +++ b/texla/Parser/Blocks/ClearBlocks.py @@ -1,23 +1,22 @@ import logging from .Utilities import * from .Block import Block class ClearBlock(Block): '''Block that ends the current page and causes all figures and tables that have so far appeared in the input to be printed''' @staticmethod def parse(parser, tex, parent_block, params): block = ClearBlock(params['cmd'], parent_block) - logging.debug('ClearBlock.parse') return (block, tex) def __init__(self, clear_type, parent_block): super().__init__(clear_type, '', parent_block) parser_hooks = { 'clearpage' : ClearBlock.parse, 'cleardoublepage' : ClearBlock.parse - } \ No newline at end of file + } diff --git a/texla/Parser/Blocks/CommandsGroupBlock.py b/texla/Parser/Blocks/CommandsGroupBlock.py index 5f74abb..a6314b8 100644 --- a/texla/Parser/Blocks/CommandsGroupBlock.py +++ b/texla/Parser/Blocks/CommandsGroupBlock.py @@ -1,41 +1,40 @@ import logging from .Utilities import * from .Block import Block from .FormattingBlocks import * class CommandsGroupBlock(Block): '''This block represents the syntax {...}. It is used to group formatting commands. These commands are parsed normally in FormattingBlocks.py and then catched by this block analyzing the ch_blocks. Formatting commands are saved inside formatting list as FormattingGroupBlock objects, ready for rendering. ''' @staticmethod def parse(parser, tex, parent_block, params): - logging.debug('FormattingText.parse @ tex: %s', tex[:30] ) options, left_tex = CommandParser.parse_options(tex, [('content','{','}')]) content = options['content'] block = CommandsGroupBlock(content, parent_block) ch_blocks = parser.parse_instructions( content, block, {}) #searching Formatting Group block for ch in ch_blocks: if isinstance(ch, FormattingGroupBlock): #adding the formatting to the list of formatting block.formatting.append(ch) else: block.add_child_block(ch) return (block, left_tex) def __init__(self, content, parent_block): super().__init__('commands_group',content,parent_block) self.formatting = [] parser_hooks = { 'commands_group' : CommandsGroupBlock.parse, } diff --git a/texla/Parser/Blocks/DefaultBlock.py b/texla/Parser/Blocks/DefaultBlock.py index d3070df..0bdc4c8 100644 --- a/texla/Parser/Blocks/DefaultBlock.py +++ b/texla/Parser/Blocks/DefaultBlock.py @@ -1,60 +1,49 @@ '''Default Block''' import logging from .Utilities import * from .Block import Block class DefaultBlock(Block): ''' This Block is used when the parser doesn't find a proper parser_hook to call for a matched env or command''' @staticmethod def parse_env(parser ,tex, parent_block, params): #getting the name of env if 'env' in params: env_name = params['env'] else: env_name = 'no_env' if 'star' in params: env_name = env_name + '*' if params['star'] else env_name #default block is created - logging.debug('DefaultBlock.parse_env @ %s:',tex[:5]+'...') block = DefaultBlock(tex, env_name, parent_block) #We cannot look inside tex, we don't know #what to parser. #we return the block return block @staticmethod def parse_cmd(parser ,tex, parent_block, params): cmd = params['cmd'] cmd = cmd + '*' if params['star'] else cmd #the options has to be matched from the tex match = CommandParser.get_command_options(tex) #match is (options string, left tex ptex = '\\'+cmd+match[0] - logging.debug('DefaultBlock.parse_cmd @ %s:',ptex) #default block is created block = DefaultBlock(ptex, cmd, parent_block) #we return the block and the left tex to parse return (block, match[1]) def __init__(self, tex, block_name, parent_block): - ''' - Constructor for sections: - -title: main title - -index_title: title for table of content - -numbered: True/False - -level: sections level - -parent_block - ''' - #base constructor for Block. It created the id - #nd basic data structures - super().__init__('default-'+block_name, tex, parent_block) + super().__init__('default', tex, parent_block) #the tex is added also as attribute + self.type = block_name self.attributes['content'] = tex parser_hooks = { 'default_env' : DefaultBlock.parse_env, 'default_cmd' : DefaultBlock.parse_cmd, } diff --git a/texla/Parser/Blocks/FigureBlocks.py b/texla/Parser/Blocks/FigureBlocks.py index 59e90a8..d552ed3 100644 --- a/texla/Parser/Blocks/FigureBlocks.py +++ b/texla/Parser/Blocks/FigureBlocks.py @@ -1,62 +1,80 @@ import logging from .Utilities import * from .Block import Block class FigureBlock(Block): '''Permission to place the float: h here at the very place in the text where it occurred. This is useful mainly for small floats. t at the top of a page b at the bottom of a page p on a special page containing only floats. ! without considering most of the internal parametersa, which could otherwhise stop this float from being placed. ''' @staticmethod def parse_env(parser, tex, parent_block, params): options, left_tex = CommandParser.parse_options(tex, [('placement_specifier','[',']')]) - ps = 'tbp'; + ps = 'tbp' if options['placement_specifier']: - ps = options['placement_specifier']; + ps = options['placement_specifier'] block = FigureBlock(ps, left_tex, parent_block) #now we parse the content children_blocks = parser.parse_instructions(left_tex, block, {}) #now we can add the children nodes block.add_children_blocks(children_blocks) return block def __init__(self, placement_specifier, tex, parent_block): super().__init__('figure', tex, parent_block) self.attributes['placement_specifier'] = placement_specifier class IncludeGraphicsBlock(Block): @staticmethod def parse(parser, tex, parent_block, params): options, left_tex = CommandParser.parse_options(tex, [('img_info','[',']'), ('img_name','{','}')]) ar_img_info = {} if options['img_info']: str_splits = options['img_info'].split( ',', options['img_info'].count(',')) for str_split in str_splits: - spl = str_split.split('=', 1); + spl = str_split.split('=', 1) ar_img_info[spl[0].strip(' ')] = spl[1].strip(' ') - logging.info('FigureBlock.parse_env @ ar_img_info: %s', ar_img_info) block = IncludeGraphicsBlock(options['img_name'], ar_img_info, left_tex, parent_block) return (block, left_tex) def __init__(self, img_name, ar_img_info, tex, parent_block): super().__init__('includegraphics', tex, parent_block) self.attributes['img_name'] = img_name self.attributes['img_options'] = ar_img_info + + +class CaptionBlock(Block): + + @staticmethod + def parse(parser, tex, parent_block, params): + options, left_tex = CommandParser.parse_options(tex, + [('caption','{','}')]) + caption = options['caption'] + block = CaptionBlock(caption, parent_block) + return (block, left_tex) + + def __init__(self, caption, parent_block): + super().__init__('caption', caption, parent_block) + self.attributes["caption"] = caption + + + parser_hooks = { 'figure' : FigureBlock.parse_env, - 'includegraphics' : IncludeGraphicsBlock.parse + 'includegraphics' : IncludeGraphicsBlock.parse, + 'caption' : CaptionBlock.parse } diff --git a/texla/Parser/Blocks/FormattingBlocks.py b/texla/Parser/Blocks/FormattingBlocks.py index ce7766f..a47cd5a 100644 --- a/texla/Parser/Blocks/FormattingBlocks.py +++ b/texla/Parser/Blocks/FormattingBlocks.py @@ -1,96 +1,92 @@ import logging from .Utilities import * from .Block import Block class FormattingTextBlock(Block): @staticmethod def parse(parser, tex, parent_block, params): - logging.debug('FormattingTextBlock.parse @ tex: %s', tex[:30] ) options, left_tex = CommandParser.parse_options(tex, [('text','{','}')]) text = options['text'] block = FormattingTextBlock(params['cmd'], text, parent_block) ch_blocks = parser.parse_instructions( text, block, {}) block.add_children_blocks(ch_blocks) return (block, left_tex) def __init__(self, format_type, text, parent_block): - logging.debug('format type: %s', format_type ) super().__init__(format_type,text,parent_block) self.attributes['text'] = text self.attributes['text_length'] = len(text) self.attributes['format_type'] = format_type class FormattingGroupBlock(Block): '''This type of block is created for formatting commands used inside a {...} construct''' @staticmethod def parse(parser, tex, parent_block, params): - logging.debug('FormattingGroupBlock.parse @ tex: %s', tex[:30] ) block = FormattingGroupBlock(params['cmd'], parent_block) return (block, tex) def __init__(self, format_type, parent_block): - logging.debug('format type: %s', format_type ) super().__init__(format_type,'',parent_block) self.attributes['format_type'] = format_type parser_hooks = { #fonts 'underline' : FormattingTextBlock.parse, 'uline' : FormattingTextBlock.parse, 'uppercase' : FormattingTextBlock.parse, 'textrm' : FormattingTextBlock.parse, 'texttt' : FormattingTextBlock.parse, 'textmd' : FormattingTextBlock.parse, 'textup' : FormattingTextBlock.parse, 'textsl' : FormattingTextBlock.parse, 'emph' : FormattingTextBlock.parse, 'textsf' : FormattingTextBlock.parse, 'textbf' : FormattingTextBlock.parse, 'textit' : FormattingTextBlock.parse, 'textsc' : FormattingTextBlock.parse, 'textlf' : FormattingTextBlock.parse, 'textnormal' : FormattingTextBlock.parse, 'textsuperscript' : FormattingTextBlock.parse, 'textsubscript' : FormattingTextBlock.parse, #command in groups #shapes 'normalfont' : FormattingGroupBlock.parse, 'em' : FormattingGroupBlock.parse, 'rmfamily' : FormattingGroupBlock.parse, 'rm': FormattingGroupBlock.parse, 'sffamily' : FormattingGroupBlock.parse, 'sf': FormattingGroupBlock.parse, 'ttfamily' : FormattingGroupBlock.parse, 'tt': FormattingGroupBlock.parse, 'upshape' : FormattingGroupBlock.parse, 'up': FormattingGroupBlock.parse, 'itshape' : FormattingGroupBlock.parse, 'it': FormattingGroupBlock.parse, 'slshape' : FormattingGroupBlock.parse, 'sl': FormattingGroupBlock.parse, 'scshape' : FormattingGroupBlock.parse, 'sc': FormattingGroupBlock.parse, 'bfseries' : FormattingGroupBlock.parse, 'bf': FormattingGroupBlock.parse, 'mdseries' : FormattingGroupBlock.parse, 'md': FormattingGroupBlock.parse, 'lfseries' : FormattingGroupBlock.parse, 'lf': FormattingGroupBlock.parse, #sizes 'tiny' : FormattingGroupBlock.parse, 'scriptsize' : FormattingGroupBlock.parse, 'footnotesize' : FormattingGroupBlock.parse, 'small' : FormattingGroupBlock.parse, 'normalsize' : FormattingGroupBlock.parse, 'large' : FormattingGroupBlock.parse, 'Large' : FormattingGroupBlock.parse, 'LARGE' : FormattingGroupBlock.parse, 'huge' : FormattingGroupBlock.parse, 'Huge' : FormattingGroupBlock.parse, } diff --git a/texla/Parser/Blocks/ListBlocks.py b/texla/Parser/Blocks/ListBlocks.py index fe39a07..d245ccc 100644 --- a/texla/Parser/Blocks/ListBlocks.py +++ b/texla/Parser/Blocks/ListBlocks.py @@ -1,93 +1,91 @@ import logging from .Utilities import * from .Block import Block class ListBlock(Block): ''' We use one Block type for all listings. Itemize, Enumerate, Description are specified in list_type attributes. ''' @staticmethod def parse(parser, tex, parent_block, params): '''We parse the content of the env. Then we analyze the blocks and find which are items and not. The hierarchy of blocks is constructed after the parsing of the content. It's the only way to let the parser handle nested environments. Then, all the blocks are reappended under items blocks and added as children nodes. ''' list_type = params['env'] block = ListBlock(list_type, tex, parent_block) - logging.debug('ListBlock.parse @' ) #parsing children blocks ch_blocks = parser.parse_instructions( tex, parent_block,{}) #now we search for item blocks item_blocks = [] for i,bl in enumerate(ch_blocks): if isinstance(bl,ItemBlock): item_blocks.append(bl) #all block until we reach another #item is added to the item block j = i while True: if j+1 < len(ch_blocks): bll = ch_blocks[j+1] if isinstance(bll,ItemBlock): break #changin parent bll.change_parent_block(bl) #adding block to #item children bl.add_child_block(bll) j+=1 else: break #adding items blocks to children block.add_children_blocks(item_blocks) return block def __init__(self, list_type, tex, parent_block): super().__init__(list_type,tex,parent_block) #all information is in children blocks self.attributes['list_type'] = list_type class ItemBlock(Block): '''This is only a place holder for a item. The itemize environment will add it his content. It's impossibile to extract it before''' @staticmethod def parse (parser, tex, parent_block, params): #we must search for the param \item [word] options, left_tex = CommandParser.parse_options( tex, [('word','[',']')]) word = options['word'] if word==None: word = '' block = ItemBlock(word, parent_block) - logging.debug('ItemBlock.parse @ word: %s',str(word)) #if there's a column in the left text is removed left_tex = left_tex.strip() if left_tex[0] == ':': left_tex = left_tex[1:] return (block, left_tex.strip()) def __init__(self, word, parent_block): super().__init__('item',word,parent_block) #the word is the \item[word] part self.attributes['word'] = word parser_hooks = { 'itemize': ListBlock.parse, 'enumerate': ListBlock.parse, 'description': ListBlock.parse, 'item': ItemBlock.parse, } diff --git a/texla/Parser/Blocks/MathBlocks.py b/texla/Parser/Blocks/MathBlocks.py index aacabb7..85e47a7 100644 --- a/texla/Parser/Blocks/MathBlocks.py +++ b/texla/Parser/Blocks/MathBlocks.py @@ -1,85 +1,83 @@ import logging import re from .Utilities import * from .ReferenceBlocks import LabelBlock from .Block import Block class MathBlock(Block): @staticmethod def parse_math_env(parser, tex, parent_block, params): ''' This parse hook it's used for $$, $, \[ \( and general math environments''' env = params['env'] star = params.get('star',False) #getting labels and tex without labels tex, labels = MathBlock.parse_labels(tex) #the content of the math is stripped block = MathBlock(env, star, tex.strip(), parent_block) - logging.debug('MathBlock.parse_math_env @ env: %s', env) #creating and adding labels blocks for l in labels: lblock = LabelBlock(l, block) - logging.info('BLOCK @ %s%s', + logging.debug('BLOCK @ %s%s', "\t"*lblock.tree_depth, str(lblock)) block.labels.append(lblock) return block @staticmethod def parse_ensure_math(parser, tex, parent_block, params): ''' The \ensuremath{} is a math command, not env''' options, left_tex = CommandParser.parse_options(tex, [('math','{','}')]) text = options['math'] block = MathBlock('ensuremath', False, text, parent_block) - logging.debug('MathBlock.parse_ensure_math') return (block, left_tex) @staticmethod def parse_labels(tex): ''' The function get labels from math. Multiple labels in math are allowed. It creates a list of Label mathced and removes them from the tex. It returns the modified tex and list of labels. ''' lre = re.compile(r'\\label\s*\{(?P