diff --git a/texla.py b/texla.py index f3a52a7..c4cc71e 100644 --- a/texla.py +++ b/texla.py @@ -1,66 +1,67 @@ from log import * import json import yaml from texla.Parser import Parser from texla.Renderers.MediaWikiRenderer import MediaWikiRenderer import texla.PageTree.Exporter as exporter from texla.Exceptions.TexlaExceptions import * from texla.Reporter import Reporter def execute_texla_mediawiki(config): p = Parser(config) a = open(config['input_path'], 'r').read() try: tree = p.parse(a) except (PreparserError, ParserError) as err: err.print_error() + err.print_complete_tree('debug/crash_tree') exit() f = open(config['output_path'] + '.tree', 'w') json_tree = tree.to_json(0) n_blocks = tree.n_blocks() logging.info('PARSED %i Blocks', n_blocks) f.write(json_tree) logging.info('\033[0;34m############### STARTING RENDERING ###############\033[0m') #creating Reporter reporter = Reporter(p.tree_explorer) #rendering rend = MediaWikiRenderer(config, reporter) rend.start_rendering(tree) o = open(config['output_path'] + '.json', 'w') o.write(json.dumps(rend.tree.get_tree_json(), indent=3)) #print page tree before POST-PROCESSING logging.info('PageTree:\n'+rend.tree.get_tree_debug()) #collpasing logging.info('\033[0;34m############### STARTING POST-PROCESSING ###############\033[0m') tree = rend.tree tree.collapse_tree(config['collapse_content_level'], config['collapse_pages_level']) #printing tree after POST-PROCESSING logging.info('PageTree:\n'+rend.tree.get_tree_debug()) oc = open(config['output_path'] + '-coll.json', 'w') oc.write(json.dumps(rend.tree.get_tree_json(), indent=3)) logging.info('\033[0;34m############### EXPORTING ###############\033[0m') if config['create_index']: tree.create_indexes(config["export_book_page"]) exporter.exportPages(tree.pages, config['output_path'] + '.mw', config['export_format']) if config['export_single_pages']: exporter.export_singlePages(tree.pages, config['output_path'] + '_pages', config['export_format']) if config['export_pages_tree']: exporter.export_pages_tree(tree.pages.values(), config['output_path'] + "_pages") reporter.print_report(console=True) logging.info('Finished') if __name__ == '__main__': #reading JSON configs config = yaml.load(open('configs.yaml','r')) #loading localized keywords config['keywords'] = yaml.load(open('i18n.yaml','r'))[config['lang']] #executing process for alla renderers if config['renderer'] == 'mediawiki': execute_texla_mediawiki(config) diff --git a/texla/Exceptions/TexlaExceptions.py b/texla/Exceptions/TexlaExceptions.py index 27fa77a..4a7ac50 100644 --- a/texla/Exceptions/TexlaExceptions.py +++ b/texla/Exceptions/TexlaExceptions.py @@ -1,57 +1,65 @@ from ..Parser.TreeExplorer import TreeExplorer class TexlaError(Exception): pass class PreparserError(TexlaError): def __init__(self, error_tex="", all_tex=""): self.error_tex = error_tex self.all_tex = all_tex def print_error(self): output = ["#"*50 + "\nCRASH REPORT\n" +"-"*30, "ERROR TEX: " + self.error_tex] return "\n".join(output) class PreparseMacrosError(PreparserError): pass class ParserError(TexlaError): def __init__(self, error_tex, block, message): self.error_tex = error_tex self.block = block self.message = message #creating tree explorer self.tree_explorer = TreeExplorer.create_tree_from_children(block) def print_error(self): output = ["#" * 50 + "\nCRASH REPORT\n" + "-" * 30] output.append("\nMESSAGE: " + self.message) if len(self.error_tex) < 200: output.append("\nERROR TEX: " + self.error_tex) else: output.append("\nERROR TEX: " + self.error_tex[:200]) output.append(self.tree_explorer.print_tree_to_block(self.block)) print("\n".join(output)) + def print_complete_tree(self, path=None): + if path: + with open(path, 'w') as f: + f.write(self.tree_explorer.print_all_tree()) + else: + print(self.tree_explorer.print_all_tree()) + + + class BlockError(ParserError): def __init__(self, block_type, error_tex, block, message): self.block_type = block_type self.error_tex = error_tex self.block = block self.message = message #creating tree explorer self.tree_explorer = TreeExplorer.create_tree_from_children(block) def print_error(self): output = ["#" * 50 + "\nCRASH REPORT\n" + "-" * 30] output.append("\nMESSAGE: " + self.message) if len(self.error_tex) < 200: output.append("\nERROR TEX: " + self.error_tex) else: output.append("\nERROR TEX: " + self.error_tex[:200]) output.append("\nBLOCK TYPE: " + self.block_type) output.append(self.tree_explorer.print_tree_to_block(self.block)) print("\n".join(output)) - diff --git a/texla/Parser/Blocks/Block.py b/texla/Parser/Blocks/Block.py index 3c45e17..a903047 100644 --- a/texla/Parser/Blocks/Block.py +++ b/texla/Parser/Blocks/Block.py @@ -1,133 +1,141 @@ from .Utilities import * """Base Block definition""" class Block: """ Block general attributes: -block_name: the new of the "type" of the block -id: unique id for the block in the tree -parent_block: parent in the tree -attributes: a dictionary for description of the block. All useful parser data go into attributes -ch_blocks: a list of children_blocks -section_level: the position of the block compared to sectioning levels defined in utility.py Derived Block could add more attributes. """ @staticmethod def parse(parser, tex, parent_block, params): """ The method must return a tuple with the created Block and the last used index of tex string.""" pass def __init__(self, block_name, content, parent_block): """ Base constructor for Block. It saves the parent_block and block name and create the new id for the new block. It creates data structures like the attributed dictionary and children nodes list. It always saves a content variable. By default, it sets the section_level of the block to that of the parend_block. """ self.block_name = block_name self.content = content if not parent_block is None: self.parent_block = parent_block self.id = parent_block.id + '-' + utility.get_random_string(3) #Section level: #by default the level is the same of parent block self.section_level = self.parent_block.section_level #depth in the tree self.tree_depth = self.parent_block.tree_depth+1 else: #if this is the root block self.parent_block = None self.id = '@' self.section_level = -1 self.tree_depth = 0 #dictionary for attributes self.attributes = {} #list for childrend blocks self.ch_blocks = [] self.N_chblocks = 0 def add_child_block(self, block): """ IMPORTANT: this function is called by the self.parse fuction. It MUST NOT be called from outside, expecially the parser """ self.ch_blocks.append(block) self.N_chblocks +=1 def add_children_blocks(self, blocks): """ IMPORTANT: this function is called by the self.parse fuction. It MUST NOT be called from outside, expecially the parser """ self.ch_blocks += blocks self.N_chblocks +=len(blocks) def change_parent_block(self, new_parent): """s function changes the parent of the block. It changes parent object, id, and tree_depth. The section level is not changes for consistency. All children are updated. """ self.parent_block = new_parent #rebuiding id self.id = new_parent.id + '-' + utility.get_random_string(3) #the section level is not changed, #but tree_depth is updated self.tree_depth = new_parent. tree_depth + 1 #now childrens are updated for ch in self.ch_blocks: ch.change_parent_block(self) def get_children(self, bl_name): """ This function return a list of children blocks corresponding to the requested type. If there are not children blocks of that type it returns a void list.""" result = [] for bl in self.ch_blocks: if bl.block_name == bl_name: result.append(bl) return result def __str__(self): return ''.format( self.block_name, self.id) def to_json(self, level=0): """ This functions create a json ouput that represents the tree of subblocks of the called block. """ json = '' levelb = level+3 json += (' '*level + '{\n') json += (' '*levelb + '"ID":"'+ self.id+'",\n') json += (' '*levelb + '"block_name":"'+ self.block_name+'",\n') json += (' '*levelb + '"N. ch_blocks":"'+ str(self.N_chblocks)+'",\n') json += (' '*levelb + '"tree_depth":"'+ str(self.tree_depth)+'",\n') for k,v in self.attributes.items(): json += (' '*levelb + '"'+k+ '":"'+str(v)+ '",\n' ) json += (' '*levelb + '"children_blocks":[\n') for b in self.ch_blocks: json+= b.to_json(levelb+3) json += (' '*levelb+'],\n') json += (' '*level + '}\n') return json def n_blocks(self): - """s function returns the + """This function returns the number of all children blocks recursively.""" n = len(self.ch_blocks) for c in self.ch_blocks: n+= c.n_blocks() return n + + def get_content(self): + """ + This function can be overrided by a specific + block in order to provide a personalized representation + of the content of the block for logging/reporting. + """ + return self.content diff --git a/texla/Parser/Blocks/TextBlocks.py b/texla/Parser/Blocks/TextBlocks.py index fd47f1b..1dd22f6 100644 --- a/texla/Parser/Blocks/TextBlocks.py +++ b/texla/Parser/Blocks/TextBlocks.py @@ -1,87 +1,88 @@ import re import logging from .Utilities import * from .Block import Block class TextBlock(Block): @staticmethod def parse_plain_text(parser, tex, parent_block, params): - '''Plain text is seen as and env. It has only to return + '''Plain text is handled directly by the Parser as + an environment. We have only to return the block''' #first of all we can create the new block text_block = TextBlock(tex, parent_block) #the block is returned return text_block def __init__(self, text, parent_block): ''' Constructor for text: -text: string: ''' super().__init__('text', text, parent_block) #attributes self.attributes['text'] = TextBlock.fix_text(text) self.attributes['text_length'] = len(text) @staticmethod def fix_text(text): '''Function that removes useless spaces from text''' text = text.replace('\t', '') text = text.replace(' ', '') return text def __str__(self): return ''.format( self.block_name, self.id, self.attributes['text_length']) class AccentedLetterBlock(Block): @staticmethod def parse_accents(parser, tex, parent_block, params): accent_type = params['cmd'] #we can extract the letter using grammar params, left_tex = CommandParser.parse_options(tex, [('letter', '{', '}')]) #we get the letter, stripper to avoid spaces letter = params['letter'].strip() block = AccentedLetterBlock(letter, accent_type, parent_block) return (block, left_tex) def __init__(self, letter, accent_type, parent_block): super().__init__('accented_letter', letter, parent_block) self.attributes['letter'] = letter self.attributes['accent_type'] = accent_type def __str__(self): return ''.format( self.block_name, self.id, self.attributes['accent_type']) class SpecialCharacterBlock(Block): def parse(parser, tex, parent_block, params): block = SpecialCharacterBlock(params['cmd'], parent_block) return (block, tex) def __init__(self, char, parent_block): super().__init__(char, char, parent_block) self.attributes['character'] = char parser_hooks = { 'text': TextBlock.parse_plain_text, "'": AccentedLetterBlock.parse_accents, "`": AccentedLetterBlock.parse_accents, '"': AccentedLetterBlock.parse_accents, "~": AccentedLetterBlock.parse_accents, "^": AccentedLetterBlock.parse_accents, "=": AccentedLetterBlock.parse_accents, ".": AccentedLetterBlock.parse_accents, '%': SpecialCharacterBlock.parse, '&': SpecialCharacterBlock.parse, '{': SpecialCharacterBlock.parse, '}': SpecialCharacterBlock.parse, '#': SpecialCharacterBlock.parse, '_': SpecialCharacterBlock.parse, '$': SpecialCharacterBlock.parse } diff --git a/texla/Parser/Parser.py b/texla/Parser/Parser.py index b263c74..a2d1afb 100644 --- a/texla/Parser/Parser.py +++ b/texla/Parser/Parser.py @@ -1,457 +1,473 @@ import re import logging from . import PreParser from . import Blocks from .TreeExplorer import TreeExplorer from .Blocks.Utilities import * from .Blocks.DocumentBlock import DocumentBlock from ..Exceptions.TexlaExceptions import * '''Commands that changes directly the subsequent letter''' letters_commands = ("'","`",'"','~','^','=','.') special_characters = ('%','&','$','{','}','#','_',' ', '\n') class Parser: def __init__(self, configs): self.configs = configs self.doc_data = {} self.root_block = None self.tree_explorer = None def parse(self,tex): """Entry point for parsing. The DocumentBlock is created and all the parse chain is started from parse_sections. The function returns the root_block, which contains all the parsed tree blocks.""" #preparsing tex, doc_data = PreParser.preparse(tex, self.configs['input_path']) self.doc_data = doc_data logging.info('\033[0;34m############### STARTING PARSING ###############\033[0m') #Getting content of document r_doc = re.compile(r'\\begin(?P\[.*?\])?{document}'+ r'(?P.*?)\\end{document}', re.DOTALL) m_doc = r_doc.search(tex) #getting content content = m_doc.group("content") logging.debug('Parser @ got content of Document') #creating root block self.root_block = DocumentBlock(self.doc_data['title'],{}) #creating the TreeExplorer self.tree_explorer = TreeExplorer(self.root_block) #beginning of parsing: creation of root block options = {} #for now we don't have options blocks = self.parse_sections(content, -1, self.root_block,options) self.root_block.add_children_blocks(blocks) #updating the tree_explorer self.tree_explorer.update_blocks_register() return self.root_block def parse_sections(self, tex, level, parent_block, options): """ This parser function search for sections splitting inside tex. The level of sections searched is indicated by sec_level option. The function calls the parser_hooks of every section block. When all sections levels are searched the control pass to parse_instructions(). It returns a list of blocks parsed as tuples. """ try: pblocks = [] #check if the level is greater than subparagraph if (level+1) < (len(utility.section_level)-1): #getting level key from utility to create regex level_key = utility.section_level[level+1] sec_re = re.compile(r'\\'+ level_key + r'(?![a-zA-Z])') #the tex is splitted by the section key toks = sec_re.split(tex) #the first token is the tex outside sectioning #the text is STRIPED to avoid null strings outside_sec = toks.pop(0).strip() if outside_sec != '': #this tex has to be parser but with a sectioning #level greater than one pblocks+=self.parse_sections(outside_sec, level+1, parent_block, options) #now the sections found are processed for tok in toks: if tok.startswith('*'): star = True tok = tok[1:].strip() else: star = False tok = tok.strip() #we insert the level in the options sec_params = { 'sec_level' : (level +1), 'level_key' : level_key, 'star' : star} #the tuple with the result is saved pblocks.append(self.call_parser_hook(level_key, 'env', tok, parent_block, sec_params)) logging.debug('Block @ %s%s', "\t"*pblocks[-1].tree_depth, str(pblocks[-1])) else: #if we have searched for all section levels #we continue with instructions #First we STRIP the tex and check if tex is not void new_tex = tex.strip() if len(new_tex) > 0: pblocks += self.parse_instructions(new_tex, parent_block, options) #found block are returned to main cycle return pblocks except ParserError as err: raise except: raise ParserError(tex, parent_block, "Error in parse_sections") def parse_instructions(self, tex, parent_block, options): """This function is the MAIN ENTRY POINT for parsing. It scan the tex from left to right. It searches for \\ or $. When an instruction is found (a pattern starting with \\ or $), the right parser function is called. These functions take care to parse the command, create the block calling parser_hooks, and to return the block and the tex left to parse. Then the remaining tex starts a new cycle in parse_instructions() recursively. It returnes a list of parsed blocks. The categories of instrucions parsed are: -math: starts with $, $$ or \[ \( -environments: (start with \begin) -letters commands: they are special commands listed in letters_commands. They are parsed separately -normal commands: like \cmd{text} """ #printing the current tex for debug #logging.debug('CURRENT-TEX: ' + tex[:40]) #list of blocks parsed pblocks = [] #checking if tex is void if len(tex) == 0: return pblocks #searching for comands \cmd, envs \begin or math symb = {} left_tex = '' slash = tex.find('\\') dollar = tex.find('$') graph = tex.find('{') if slash == -1 and dollar == -1 and graph==-1: #we check if the string is only space or \n if len(tex.strip()): #it's plain text pblocks.append(self.parse_plain_text(tex, parent_block)) return pblocks #searching the first symbol if slash != -1: symb[slash] = 'slash' if dollar != -1: symb[dollar] = 'dollar' if graph != -1: symb[graph] = 'group' #getting the first occurence first_index = sorted(symb)[0] first_symb = symb[first_index] #creating block text with before_tex before_tex = tex[:first_index] #tex to parse tex_tp = tex[first_index:] #creating a plain text block if len(before_tex.strip()): pblocks.append(self.parse_plain_text(before_tex, parent_block)) #what's the first token: slash, dollar, group if first_symb == 'slash': #we check if it's a math command like \[ or \( if tex_tp[1] in ('[','('): block, left_tex = self.parse_math( tex_tp, parent_block, options) #now we check if it's an environment elif tex_tp[1:6] == 'begin': block, left_tex = self.parse_enviroment( tex_tp, parent_block, options) #we check if we have letters commands elif tex_tp[1] in letters_commands: block, left_tex = self.parse_letter_command( tex_tp, parent_block, options) #we check if we have special characters elif tex_tp[1] in special_characters: block, left_tex = self.parse_special_character( tex_tp, parent_block, options) else: #finally we have a normal command block, left_tex = self.parse_command( tex_tp, parent_block, options) #block saved pblocks.append(block) elif first_symb == 'dollar': #we have to parse math block, left_tex = self.parse_math(tex_tp, parent_block,options) pblocks.append(block) elif first_symb == 'group': #we have a group {...} syntax block, left_tex = self.parse_commands_group( tex_tp, parent_block, options) pblocks.append(block) #left_tex is parsed with another cycle pblocks += self.parse_instructions( left_tex, parent_block, options) #all the blocks parsed are returned return pblocks def parse_enviroment(self, tex, parent_block, options): """ This function handles the parsing of environments. It parses the name of the environment and if it's starred. Then EnvironmentParser.get_environment() is used to extract the complete environment, handling nested envs. The content is sent to parser_hook for the specific parsing. The parser_hook decides also if the content of the env must be parsed recursively. A new block is created and returned with the tex remained to parse. """ - #we search for the first enviroment - re_env1 = re.compile(r'\\begin(?: *)\{(?: *)(?P\w*?)'+\ - r'(?P[*]?)(?: *)\}') - match = re_env1.match(tex) - if not match is None: - env = match.group('env') - star = True if match.group('star')!='' else False - env_tot = env + '\*' if star else env - #now we extract the env greedy - s,e,content = EnvironmentParser.get_environment( - tex,env_tot) - #the name of catched env is inserted in params - #with the star param - env_params = {'env':env, 'star':star} - #we can call the parser hooks. - #N.B.: the tex passed to parser hook is the CONTENT STRIPPED - #of the environment, without \begin{} and \end{} part. - #The strip is necessary to parse possible options. - block = self.call_parser_hook(env,'env', - content.strip(), parent_block, env_params) - logging.debug('Block @ %s%s', - "\t"*block.tree_depth, - str(block)) - #we return the block and left_tex - return (block, tex[e:]) - else: - #it's an error - logging.error('PARSER.parse_enviroment @ env NOT FOUND') + try: + #we search for the first enviroment + re_env1 = re.compile(r'\\begin(?: *)\{(?: *)(?P\w*?)'+\ + r'(?P[*]?)(?: *)\}') + match = re_env1.match(tex) + if not match is None: + env = match.group('env') + star = True if match.group('star')!='' else False + env_tot = env + '\*' if star else env + #now we extract the env greedy + s,e,content = EnvironmentParser.get_environment( + tex,env_tot) + #the name of catched env is inserted in params + #with the star param + env_params = {'env':env, 'star':star} + #we can call the parser hooks. + #N.B.: the tex passed to parser hook is the CONTENT STRIPPED + #of the environment, without \begin{} and \end{} part. + #The strip is necessary to parse possible options. + block = self.call_parser_hook(env,'env', + content.strip(), parent_block, env_params) + logging.debug('Block @ %s%s', + "\t"*block.tree_depth, + str(block)) + #we return the block and left_tex + return (block, tex[e:]) + else: + #it's an error + logging.error('PARSER.parse_enviroment @ env NOT FOUND') + except ParserError: + raise + except: + raise ParserError(tex, parent_block, "Error in parse_enviroment") def parse_math(self, tex, parent_block, options): """ This function handles the parsing of math commands: $..$, $$..$$, \[..\], \(..\). The matched math is inserted in "display_math" or "inline_math" block. The function returnes the block and left_tex. """ - #firt we have to check the double dollar - if tex.startswith("$$"): - i = tex.find("$$", 2) - content = tex[2:i] - left_tex = tex[i+2:] - env = "displaymath" - elif tex.startswith("$"): - i = tex.find("$", 1) - content = tex[1:i] - left_tex = tex[i+1:] - env = "inlinemath" - elif tex.startswith("\\["): - i = tex.find("\\]", 2) - content = tex[2:i] - left_tex = tex[i+2:] - env = "displaymath" - elif tex.startswith("\\("): - i = tex.find("\\)", 2) - content = tex[2:i] - left_tex = tex[i+2:] - env = "inlinemath" - params = {'env': env} - block = self.call_parser_hook(env, 'env', - content, parent_block, params) - logging.debug('Block @ %s%s', - "\t"*block.tree_depth, - str(block)) - return (block, left_tex) + try: + #firt we have to check the double dollar + if tex.startswith("$$"): + i = tex.find("$$", 2) + content = tex[2:i] + left_tex = tex[i+2:] + env = "displaymath" + elif tex.startswith("$"): + i = tex.find("$", 1) + content = tex[1:i] + left_tex = tex[i+1:] + env = "inlinemath" + elif tex.startswith("\\["): + i = tex.find("\\]", 2) + content = tex[2:i] + left_tex = tex[i+2:] + env = "displaymath" + elif tex.startswith("\\("): + i = tex.find("\\)", 2) + content = tex[2:i] + left_tex = tex[i+2:] + env = "inlinemath" + params = {'env': env} + block = self.call_parser_hook(env, 'env', + content, parent_block, params) + logging.debug('Block @ %s%s', + "\t"*block.tree_depth, + str(block)) + return (block, left_tex) + except ParserError: + raise + except: + raise ParserError(tex, parent_block, "Error in parse_math") def parse_command(self, tex, parent_block, options): """ This function handles the parsing of normal commands. It catches the command's name and if it's starred. Removed the \cmd part, the tex is passed to the right parser_hook that manages the real parsing of commands options. The parser_hook decides also if the content of the command must be parsed recursively. It returns the block and the left tex that must be parsed by another cycle of parse_instructions() """ - #regex to catch commands - re_cmd = re.compile(r"\\(?:(?P[a-zA-Z]+)"+\ - r"(?P[*]?)|(?P\\))", re.DOTALL) - match = re_cmd.match(tex) - if not match is None: - #managing match. - #checking if the part of the match with the regular - #command is present--> math.group != None!!! - if match.group('cmd') != None: - matched_cmd = match.group('cmd') - star = True if match.group('star')!='' else False - #we insert the matched options in the dict for hooks - params = {'cmd':matched_cmd, 'star':star} - #the text passed to hooks is STRIPPED to remove - #useless spaces. - #N.B the matched part is not sent to hook - tex_to_parse = tex[match.end():].strip() - #the matched command is parsed by the parser_hook - #and the remaining tex is returned as the second element of - #a list. The first element is the parsed Block. - block, left_tex = self.call_parser_hook(matched_cmd, - 'cmd', tex_to_parse, parent_block,params) - logging.debug('Block @ %s%s', - "\t"*block.tree_depth, - str(block)) + try: + #regex to catch commands + re_cmd = re.compile(r"\\(?:(?P[a-zA-Z]+)"+\ + r"(?P[*]?)|(?P\\))", re.DOTALL) + match = re_cmd.match(tex) + if not match is None: + #managing match. + #checking if the part of the match with the regular + #command is present--> math.group != None!!! + if match.group('cmd') != None: + matched_cmd = match.group('cmd') + star = True if match.group('star')!='' else False + #we insert the matched options in the dict for hooks + params = {'cmd':matched_cmd, 'star':star} + #the text passed to hooks is STRIPPED to remove + #useless spaces. + #N.B the matched part is not sent to hook + tex_to_parse = tex[match.end():].strip() + #the matched command is parsed by the parser_hook + #and the remaining tex is returned as the second element of + #a list. The first element is the parsed Block. + block, left_tex = self.call_parser_hook(matched_cmd, + 'cmd', tex_to_parse, parent_block,params) + logging.debug('Block @ %s%s', + "\t"*block.tree_depth, + str(block)) + else: + #we have a \\ command + matched_cmd = '\\' + tex_to_parse = tex[match.end():].strip() + #we insert the matched options in the dict for hooks + params = {'cmd':'\\', 'star':False} + #check if we have \\* + if tex_to_parse.startswith('*'): + params['star'] = True + tex_to_parse = tex_to_parse[1:] + #parser_hook call + block, left_tex = self.call_parser_hook(matched_cmd, + 'cmd', tex_to_parse, parent_block,params) + logging.debug('Block @ %s%s', + "\t"*block.tree_depth, + str(block)) + return (block, left_tex) else: - #we have a \\ command - matched_cmd = '\\' - tex_to_parse = tex[match.end():].strip() - #we insert the matched options in the dict for hooks - params = {'cmd':'\\', 'star':False} - #check if we have \\* - if tex_to_parse.startswith('*'): - params['star'] = True - tex_to_parse = tex_to_parse[1:] - #parser_hook call - block, left_tex = self.call_parser_hook(matched_cmd, - 'cmd', tex_to_parse, parent_block,params) - logging.debug('Block @ %s%s', - "\t"*block.tree_depth, - str(block)) - return (block, left_tex) - else: - #it's an error - logging.error('PARSER.parse_command @ command NOT FOUND: {}'. - format(tex[0:10])) + #it's an error + logging.error('PARSER.parse_command @ command NOT FOUND: {}'. + format(tex[0:10])) + raise ParserError(tex, parent_block, "command NOT FOUND in parse_command") + except ParserError: + raise + except: + raise ParserError(tex, parent_block, "Error in parse_command") def parse_commands_group(self, tex, parent_block, options): """ This function handles the group of commands created with the syntax {...}. It's used for the formatting commands. """ block, left_tex = self.call_parser_hook( 'commands_group', 'env', tex, parent_block, {'env':'commands_group'}) return (block, left_tex) def parse_letter_command(self, tex, parent_block,options): """' This function handles special commands for accented or modified letters. They are special commands because they don't need a {} and they act directly on the next letter. Examples: \'a: accented letter \`a: grave accent \~a \=a \^a other changes on the letter The function parse that commands and call parser_hook as the normal parse_command() function. Althought, the letter influenced by the command is inserted in a {} so that special command could be treated like normal commands with hooks. It returns the block and the left tex to parse. """ #first of all we get the command cmd = tex[1] params = {'cmd':cmd, 'star':False} #then it is a letter command #check if the letter is inside a {} r = re.compile(r'\\' + cmd + r'\s*\{(.*?)\}') match = r.match(tex) if match != None: tex_to_parse = tex[2:].strip() block, left_tex = self.call_parser_hook(cmd, 'cmd', tex_to_parse, parent_block,params) else: #we have to catch the next letter re_letter = re.compile(r'\\' + cmd + r'\s*(?P\w)') letter_m = re_letter.match(tex) letter = letter_m.group('letter') #adding parenthesis to standardize parser_hook tex_to_parse = '{'+letter + '}'+ \ tex[letter_m.end():] block, left_tex = self.call_parser_hook(cmd, 'cmd', tex_to_parse, parent_block, params) logging.debug('Block @ %s%s', "\t"*block.tree_depth, str(block)) return (block, left_tex) def parse_special_character(self, tex, parent_block,options): """ This function parse special commands like \% or \&. The mechanism is the same ad special_commands, but options are not searched. """ cmd = tex[1] if cmd in [' ','\n']: #we change the name of the command cmd = "mandatory_space" params = {'cmd':cmd, 'star':False} block, left_tex = self.call_parser_hook(cmd, 'cmd', tex[2:], parent_block, params) logging.debug('Block @ %s%s', "\t"*block.tree_depth, str(block)) return (block, left_tex) def parse_plain_text(self, tex, parent_block): """ This function create the block for plain text. It doesn't return any left tex. """ params = {'env':'text'} block = self.call_parser_hook('text','env', tex, parent_block,params) logging.debug('Block @ %s%s', "\t"*block.tree_depth, str(block)) return block def call_parser_hook(self, hook, type, tex, parent_block, params={}): """ This function checks if the required parser_hook is avaiable, if not it calls th default hook. The function ask for type of call (env or cmd) to be able of asking the right default hooks, in case the hook in not avaiable. Params is a dictionary of options for the parser. It usually contains che env or cmd parsed and if it's starred. It returns directly the output of parser_hook. """ if hook in Blocks.parser_hooks: return Blocks.parser_hooks[hook](self, tex, parent_block, params) else: #default hook is called if type == 'cmd': return Blocks.parser_hooks['default_cmd']( self, tex, parent_block, params) elif type == 'env': return Blocks.parser_hooks['default_env']( self, tex, parent_block, params) diff --git a/texla/Parser/TreeExplorer.py b/texla/Parser/TreeExplorer.py index 49f054a..a8dceeb 100644 --- a/texla/Parser/TreeExplorer.py +++ b/texla/Parser/TreeExplorer.py @@ -1,124 +1,124 @@ import logging class TreeExplorer: """ The TreeExplorer class is an utility to navigate and extract information from the tree of parsed blocks. For example it is useful to extract the tree of the parents of a block for debugging reasons. It is useful also in rendering to localize blocks inside the document. """ def __init__(self, root_block): """ The constructor needs a root_block to begin the tree""" self.root_block = root_block self.blocks = {'@': root_block} self.block_names = {"default":[]} #registering blocks by id self.register_blocks(root_block.ch_blocks) self.register_block_names() @staticmethod def create_tree_from_children(block): #first of all we need the root_block current = block while True: if current.parent_block is None: root_block = current break current = current.parent_block #now we can return a new TreeExplorer #constructed from the found root. return TreeExplorer(root_block) def register_blocks(self, blocks): """This methods reads all the blocks tree from the root_block and created a dictionary with id:block""" for block in blocks: self.blocks[block.id] = block if block.N_chblocks > 0: self.register_blocks(block.ch_blocks) def register_block_names(self): """This function registers the block_names, creating a dictionary with blocks groups by type""" for bl in self.blocks.values(): if not bl in self.block_names: self.block_names[bl.block_name] = [] if not bl in self.block_names[bl.block_name]: self.block_names[bl.block_name].append(bl) def update_blocks_register(self): """This methods update the blocks' ids register recalling register_blocks with the root_block""" self.register_blocks(self.root_block.ch_blocks) self.register_block_names() def get_parents_list(self, block): """This method returns the list of the parent blocks of the requested block """ if isinstance(block, str): block = self.blocks[block] parents = [] current = block while True: if current == self.root_block: break parents.append(current.parent_block) current = current.parent_block parents.reverse() return parents def get_parents_list_ids(self, block): parents = self.get_parents_list(block) return [x.id for x in parents] def get_block(self, blockid): return self.blocks.get(blockid) def print_tree(self, block, filter_list=None): """This methods prints a beautified tree starting from block parameter and his children. If filter_list is present only the block with the id in the list - are printed. It returns a list of output strings""" + are printed. It returns a single string. """ output = [] if filter_list is None or block.id in filter_list: lstr = ". "* (block.tree_depth+1) output.append(lstr+ ". "+ " "+"_"*40 ) output.append(lstr+ "#"+"---"+ ">|ID : {}".format(block.id)) output.append(lstr+ ". "+ " |block_name : {}". format(block.block_name)) output.append(lstr+ ". "+ " |attributes: ") for at,attr in block.attributes.items(): output.append(lstr+ ". " + " | - "+ "{} : {}". format(at, attr)) output.append(lstr+ ". "+ " |content : {}". - format(block.content)) + format(block.get_content())) output.append(lstr+ ". ."+"\u203E"*40+"\n") output = "\n".join(output) #iterating on the block children for bl in block.ch_blocks: output += self.print_tree(bl, filter_list) return output def print_tree_to_blocks(self, blocks): """This methods print the tree of parents of the list of blocks passed as parameter. First of all it gets all the parents ids and then prints the tree using the list as filter.""" fl = [] for bl in blocks: fl+= self.get_parents_list_ids(bl) if isinstance(bl, str): fl.append(bl) else: fl.append(bl.id) return self.print_tree(self.root_block, filter_list=fl) def print_tree_to_block(self, block): return self.print_tree_to_blocks([block]) def print_all_tree(self): return self.print_tree(self.root_block)