diff --git a/texla/Parser/PreParser.py b/texla/Parser/PreParser.py index aba01d8..d3d5243 100644 --- a/texla/Parser/PreParser.py +++ b/texla/Parser/PreParser.py @@ -1,311 +1,311 @@ import logging import os import re from collections import deque from os import path from ..Exceptions.TexlaExceptions import * from .Blocks import TheoremBlocks from .Blocks.Utilities import * data = {} def preparse(tex, input_path): ''' Entrypoint for preparsing of tex ''' try: logging.info('\033[0;34m############### STARTING PRE-PARSING ###############\033[0m') tex = check_doc_integrity(tex) logging.info("PreParser @ Removing comments") tex = remove_comments(tex) logging.info("PreParser @ Preparsing include and subfiles") tex = preparse_include(tex, input_path) tex = preparse_subfiles(tex, input_path) logging.info("PreParser @ Removing comments...again") tex = remove_comments(tex) logging.info("PreParser @ Parsing macros") tex = parse_macros(tex) logging.info("PreParser @ Preparsing Theorems") tex = preparse_theorems(tex) logging.info("PreParser @ Preparsing par (\\n\\n)") tex = preparse_par(tex) logging.info("PreParser @ Preparsing verb") tex = preparse_verb(tex) logging.info("PreParser @ Preparsing header info (title, author, date)") data = preparse_header(tex) #saving preparsed tex log_preparsed_file_path = path.relpath('debug/preparsed.tex') with open(log_preparsed_file_path, 'w') as o: o.write(tex) return tex, data except PreparserError as err: err.print_error() exit() def check_doc_integrity(tex): '''checking if the source has \begin{document}''' if not ("\\begin{document}" in tex): tex = "\\begin{document}"+tex+ "\\end{document}" return tex def parse_macros(tex): ''' Preparsing of macros: \newcommand are searched and macro objects created. Then the occurrences of the macro are replaced with the tex from the macro. ''' #regex for newcommand new_re = re.compile(r'\\(re)?newcommand[*]?|\\providecommand[*]?') macros = {} log = {} tex_to_parse = tex[:] for match in new_re.finditer(tex): #first we get the options opt_tex = CommandParser.get_command_options(tex[match.end():]) macro = MacroParser.Macro.parse_macro(opt_tex[0]) macros[macro.name] = macro log[macro.name] = 0 tex_to_parse = tex_to_parse.replace( tex[match.start():match.end() + opt_tex[2]], '') #now we can search for occurrence of the macro, #get the options, and replace the tex macros_found = 0 #we reiterate the process until no macros are found. #This is useful for macros using other macros. while True: for m in macros: logging.debug("preparsing MACRO: %s", m) #the macro name is \\name, but it's not #raw: we have to add a \\ in front of it. #we have to create the string for regex #to fix the special characters problem m_r = ''.join(['['+x+']' for x in m[1:]]) cmd_re = re.compile(r'\\' + m_r + r'(?![a-zA-Z])') #lists for positions and replacements pos = [] replaces = [] #first of all we get all the occurrence #of a macro and its replacement tex. for cmd_ma in cmd_re.finditer(tex_to_parse): #little hack to check if we are getting #a macro inside the previous matched macro content. if len(pos)>0 and (cmd_ma.start() < pos[len(pos)-1]): continue log[m] += 1 macros_found += 1 #we get command complete tex cmd_tex = CommandParser.get_command_options(tex_to_parse[ cmd_ma.end():]) #cmd_tex contains also the index of the star of #tex after the macro. We need it later. #we get parenthesis parenthesis = CommandParser.get_parenthesis(cmd_tex[0]) if parenthesis[0][0] == '[': param_default = parenthesis[0][1] parenthesis.pop(0) else: param_default = None params = [parenthesis[i][1] for i in range(len(parenthesis) - 1)] #asking the tex to the macro replace_tex = macros[m].get_tex(params, param_default) #adding a space for safety is necessary if cmd_tex[1][0] not in ["\\", " ", '_', '^', '[', '(','{',"'"]: replace_tex = replace_tex + " " #saving data replaces.append(replace_tex) pos+= [cmd_ma.start(), cmd_ma.end()+cmd_tex[2]] #now that we have all macros we can replace them if (len(pos) ==0): continue preparsed_result = [tex_to_parse[:pos[0]]] repl_queu = deque(replaces) for x in range(1, len(pos)-1, 2): preparsed_result.append(repl_queu.popleft()) preparsed_result.append(tex_to_parse[pos[x]:pos[x+1]]) #we have to add the last piece preparsed_result.append(repl_queu.popleft()) preparsed_result.append(tex_to_parse[pos[len(pos)-1]:]) preparsed_tex = ''.join(preparsed_result) #for the next macro we have to reset tex_to_parse tex_to_parse = preparsed_tex #at the end of the all the macros we check if #we have found something if macros_found > 0: macros_found = 0 #the cycle continues else: break #logging for m in log: logging.debug('PREPARSER @ macro: %s, %s occurrences', m, log[m]) return tex_to_parse def remove_comments(tex): ''' This function removes comments from the tex. ''' com_re = re.compile(r'(?[*]?)') for match in p.finditer(tex): t = tex[match.end():] data = CommandParser.parse_options( t, [('th_type', '{', '}'), ('counter', '[', ']'), ('definition', '{', '}'), ('numberby', '[', ']')]) if match.group('star') != "": data[0]['star'] = True else: data[0]['star'] = False if data[0]['definition'] is None: pass the = TheoremBlocks.Theorem(**data[0]) logging.debug('PREPARSER @ theorem: %s', the.th_type) th_dict[the.th_type] = the #now we search for every theorem \beging{th_id} and \end{th_id} #and we substitue them with \begin{theorem}{th_id} and \begin{theorem} #to use out theorem environment for key in th_dict: tag_open = '\\begin{' + key + '}' new_tag_open = '\\begin{theorem}{' + key + '}' tex = tex.replace(tag_open, new_tag_open) tag_close = '\\end{' + key + '}' new_tag_close = '\\end{theorem}' tex = tex.replace(tag_close, new_tag_close) #parsed theorems are saved in TheoremBlocks moduile TheoremBlocks.parsed_theorems = th_dict return tex def preparse_header(tex): '''Function that searches title, date, author in preamble of the tex ''' headerBlock = {} headerBlock['title'] = '' headerBlock['date'] = '' headerBlock['author'] = '' mat = re.search(r'\\title{(.*?)}', tex) if mat: headerBlock['title'] = mat.group(1) mat = re.search(r'\\date{(.*?)}', tex) if mat: headerBlock['date'] = mat.group(1) mat = re.search(r'\\author{(.*?)}', tex) if mat: headerBlock['author'] = mat.group(1) logging.debug('PreParser @ preparse_header ') logging.debug('\ntitle: %s\ndate: '\ '%s\nauthor: %s', headerBlock['title'], headerBlock['date'], headerBlock['author']) return headerBlock def preparse_par(tex): '''This function replace \n\n with a \\par to show that there's a change of par, understendable by the parser. It replaces even occurrences of \n\n''' return re.sub(r'(\n\n)+','\\par ',tex) def preparse_include(tex,input_path): ''' This function replace \input{} commands with the content of the files. ''' base_path = os.path.dirname(input_path) r1 = re.compile(r'\\input{(.*?)}') r2 = re.compile(r'\\include{(.*?)}') inputs_found = 0 result_tex = tex while True: for m in r1.finditer(tex): name = m.group(1) + '.tex' #reading file file_name = base_path + "/"+ name #checking if the file exits, otherwise skipping if not os.path.exists(file_name): - logging.error("Preparser.prepase_include @ file {} not found!". + logging.error("Preparser.preparse_include @ file {} not found!". format(file_name)) continue file_tex = open(file_name, 'r').read() result_tex = result_tex.replace( m.group(0), file_tex) inputs_found+=1 for m2 in r2.finditer(tex): name = m2.group(1) + '.tex' #reading file file_name =os.getcwd()+ '/'+\ base_path + "/"+ name #checking if the file exits, otherwise skipping if not os.path.exists(file_name): - logging.error("Preparser.repase_include @ file {} not found!". + logging.error("Preparser.preparse_include @ file {} not found!". format(file_name)) continue file_tex = open(file_name, 'r').read() result_tex = result_tex.replace( m2.group(0), file_tex) inputs_found+=1 if inputs_found>0: tex = result_tex #removing comments tex = remove_comments(tex) inputs_found = 0 else: break return result_tex def preparse_subfiles(tex, input_path): '''This preparse function insert the subfiles in the tex''' base_path = os.path.dirname(input_path) r = re.compile(r'\\subfile{(.*?)}') inputs_found = 0 result_tex = tex while True: for m in r.finditer(tex): name = m.group(1) + '.tex' #reading file file_name = base_path + "/"+ name file_tex = open(file_name, 'r').read() #we have to catch the content of the doc r_doc = re.compile(r'\\begin(?P\[.*?\])?{document}'+ r'(?P.*?)\\end{document}', re.DOTALL) mat = r_doc.search(file_tex) content = mat.group('content') result_tex = result_tex.replace( m.group(0), content) preamble = file_tex[:mat.start()] result_tex = preamble + "\n" + result_tex inputs_found+=1 if (inputs_found>0): tex = result_tex inputs_found = 0 else: return result_tex def preparse_verb(tex): '''This function preparse verbatim putting the content in a {...} pair''' r = re.compile(r'\\verb(?P[^a-zA-Z])(?P.*?)(?P=del)') result = tex for match in r.finditer(tex): c = match.group("content") result = result.replace(match.group(0), '\\verb{'+c +'}') return result diff --git a/texla/Parser/TreeExplorer.py b/texla/Parser/TreeExplorer.py index bac8598..be08cc7 100644 --- a/texla/Parser/TreeExplorer.py +++ b/texla/Parser/TreeExplorer.py @@ -1,159 +1,164 @@ import logging class TreeExplorer: """ The TreeExplorer class is an utility to navigate and extract information from the tree of parsed blocks. For example it is useful to extract the tree of the parents of a block for debugging reasons. It is useful also in rendering to localize blocks inside the document. It is the default container of the result of the parsing process because it handles the chain of block with several utilities for quering. """ def __init__(self, root_block): """ The constructor needs a root_block to begin the tree""" self.root_block = root_block self.blocks = {'@': root_block} self.block_names = {"default":[]} #registering blocks by id self.register_blocks(root_block.ch_blocks) self.register_block_names() @staticmethod def create_tree_from_children(block): + """This function populates a TreeExplorer from a children_block + using the chains of parent_block. Every parent_block should have + the ch_blocks list populated, if not the function populates the list + of children to have a consistent tree (it's necessary when the TreeExplorer + is used before the end of parsing).""" #first of all we need the root_block current = block while True: if current.parent_block is None: root_block = current break #populating ch_blocks of parent block if not present #in order to have a fully functional TreeExplorer. #This is necessary if the TreeExplorer is used before #the end of Parsing process. if current not in current.parent_block.ch_blocks: current.parent_block.ch_blocks.append(current) current = current.parent_block #now we can return a new TreeExplorer #constructed from the found root. return TreeExplorer(root_block) def register_blocks(self, blocks): """This methods reads all the blocks tree from the root_block and created a dictionary with id:block""" for block in blocks: if not block.id in self.blocks: self.blocks[block.id] = block if block.N_chblocks > 0: self.register_blocks(block.ch_blocks) def register_block_names(self): """This function registers the block_names, creating a dictionary with blocks groups by type""" for bl in self.blocks.values(): if not bl in self.block_names: self.block_names[bl.block_name] = [] if not bl in self.block_names[bl.block_name]: self.block_names[bl.block_name].append(bl) def update_blocks_register(self): """This methods update the blocks' ids register recalling register_blocks with the root_block""" self.register_blocks(self.root_block.ch_blocks) self.register_block_names() def get_parents_list(self, block): """This method returns the list of the parent blocks of the requested block """ if isinstance(block, str): block = self.blocks[block] parents = [] current = block while True: if current == self.root_block: break parents.append(current.parent_block) current = current.parent_block parents.reverse() return parents def get_parents_list_ids(self, block): parents = self.get_parents_list(block) return [x.id for x in parents] def get_block(self, blockid): return self.blocks.get(blockid) def get_number_blocks(self): return self.root_block.n_children_blocks_total() ############################################################# # Quering functions def query_block_by_name(self, block_name, depth_first=False): """ This methods queries recursively the tree of blocks and returns a list of blocks with the requested block_name. depth_first controls the type of query. """ return self.root_block.query_children_blocks(block_name, depth_first) ############################################################# # Printing functions def print_tree(self, block, filter_list=None): """This methods prints a beautified tree starting from block parameter and his children. If filter_list is present only the block with the id in the list are printed. It returns a single string. """ output = [] if filter_list is None or block.id in filter_list: lstr = ". "* (block.tree_depth+1) output.append(lstr+ ". "+ " "+"_"*40 ) output.append(lstr+ "#"+"---"+ ">|ID : {}".format(block.id)) output.append(lstr+ ". "+ " |block_name : {}". format(block.block_name)) output.append(lstr+ ". "+ " |attributes: ") for at,attr in block.attributes.items(): output.append(lstr+ ". " + " | - "+ "{} : {}". format(at, attr)) output.append(lstr+ ". "+ " |content : {}". format(block.get_content())) output.append(lstr+ ". ."+"\u203E"*40+"\n") output = "\n".join(output) #iterating on the block children for bl in block.ch_blocks: output += self.print_tree(bl, filter_list) return output def print_tree_to_blocks(self, blocks): """This methods print the tree of parents of the list of blocks passed as parameter. First of all it gets all the parents ids and then prints the tree using the list as filter.""" fl = [] for bl in blocks: fl+= self.get_parents_list_ids(bl) if isinstance(bl, str): fl.append(bl) else: fl.append(bl.id) return self.print_tree(self.root_block, filter_list=fl) def print_tree_to_block(self, block): return self.print_tree_to_blocks([block]) def print_all_tree(self): return self.print_tree(self.root_block) def print_raw_tree(self): """ This function returns the tree of blocks in the raw format (json). """ return self.root_block.to_json() diff --git a/texla/Renderers/Renderer.py b/texla/Renderers/Renderer.py index c05fb3f..8bc4426 100644 --- a/texla/Renderers/Renderer.py +++ b/texla/Renderers/Renderer.py @@ -1,225 +1,227 @@ from ..Parser import Blocks from ..Parser.TreeExplorer import TreeExplorer import logging import importlib from functools import wraps class Renderer(): """ Base class for Renderers """ def __init__(self, configs, reporter): self.configs = configs self.reporter = reporter #Parser TreeExplorer with parsed blocks tree. It will be filled at start self.parser_tree_explorer = None #hooks dictionary self.render_hooks = {} #Read the render hooks of the Renderer. It reads the hook of the derived Renderer. self.parse_render_hooks() #plugins hooks self.pre_render_hooks = {} self.post_render_hooks = {} self.start_hooks = [] self.end_hooks = [] self.loaded_plugins = {} #registering plugins from the configs self.register_plugins() def register_plugins(self): """This function loads the plugins declared in the configuration.""" for plugin in self.configs["plugins"]: module = importlib.import_module("..plugins"+'.'+ plugin, __name__) if hasattr(module, "plugin_render_hooks"): self.loaded_plugins[plugin] = module self.register_render_plugin_hooks(module.plugin_render_hooks) logging.debug("Renderer.register_plugins "\ "@ Loaded plugin: {}".format(plugin)) logging.debug("Plugin {} render hooks: {}".format( plugin, list(module.plugin_render_hooks.keys()))) if hasattr(module, "plugin_lifecycle_hooks"): self.register_lifecyle_plugin_hooks(module.plugin_lifecycle_hooks) logging.debug("Plugin {} lifecycle hooks: {}".format( plugin, list(module.plugin_lifecycle_hooks.keys()))) #adding the configurations to the plugin if "plugins_configs" in self.configs: if plugin in self.configs["plugins_configs"]: logging.debug("Plugin {} passing configs...".format(plugin)) module.configs = self.configs["plugins_configs"][plugin] def register_render_plugin_hooks(self, hooks): '''This function registers the hooks for renderer plugins. The plugins can define hooks for pre and post render actions. The pre hook receives the block before the rendering and can only return the block itself, modified. The post hook receive the block and the text from the renderer: it has to return the final text only. The keyword ALL creates a hooks for all the blocks. Note that it is always called after all the other hooks.''' for bl in hooks: if "pre" in hooks[bl]: self.register_pre_renderer_hook(bl, hooks[bl]["pre"]) if "post" in hooks[bl]: self.register_post_renderer_hook(bl, hooks[bl]["post"]) #checking ALL keyword if "ALL" in hooks: if "pre" in hooks["ALL"]: self.register_pre_renderer_hook(bl, hooks["ALL"]["pre"]) if "post" in hooks["ALL"]: self.register_post_renderer_hook(bl, hooks["ALL"]["post"]) def register_lifecyle_plugin_hooks(self, hooks): ''' This function registers the hooks for the renderer lifecycle. Plugins can register hooks for the start and end actions. The start hook is called with the root_block of the chain. The end hook is called without arguments. These hooks must be used only to signal the actions to the plugins.''' if "start" in hooks: self.register_start_hook(hooks["start"]) if "end" in hooks: self.register_end_hook(hooks["end"]) def register_pre_renderer_hook(self, block, hook): if block not in self.pre_render_hooks: self.pre_render_hooks[block] = [] self.pre_render_hooks[block].append(hook) def register_post_renderer_hook(self, block, hook): if block not in self.post_render_hooks: self.post_render_hooks[block] = [] self.post_render_hooks[block].append(hook) def register_start_hook(self, hook): self.start_hooks.append(hook) def register_end_hook(self, hook): self.end_hooks.append(hook) def parse_render_hooks(self): """ The function scans the Renderer (sub)class to find the functions annotated with @render_hooks(list_of_block_names). It inserts the function in the render_hooks using the provided block_names. """ for member_name in dir(self): member = getattr(self, member_name) if hasattr(member, "block_names"): for hook in getattr(member, "block_names"): logging.debug("Renderer @ render_hook registered: {} -> {}" .format(hook, member_name)) self.render_hooks[hook] = member def start_rendering(self, parser_tree_explorer): ''' Entrypoing for the rendering process. This function requests the TreeExplorer containing the parsed blocks and passes it to the plugins that have the variable - needs_tree_explorer=True. Then it starts the plugins''' + needs_tree_explorer=True. Then it starts the plugins. + It doesn't start the real processing, the specific Renderer can start the + chain using render_block()''' self.parser_tree_explorer = parser_tree_explorer #passing the tree_explorer for pl in self.loaded_plugins.values(): if hasattr(pl, "needs_tree_explorer"): if pl.needs_tree_explorer: logging.debug("Renderer @ Inserting "\ "TreeExplorer into plugin {}".format(pl)) pl.tree_explorer = self.parser_tree_explorer #starting the plugins for hook in self.start_hooks: hook() def end_rendering(self): #ending plugins for hook in self.end_hooks: hook() def render_children_blocks(self, block, collapse=True): '''This is one of the most important funciont of the rendering process. This function takes all the children blocks of a block and get they rendering output. If collapsed=True it returns a unique string, otherwise it returns a list of tuples with[(block_name, output)] ''' output = [] for bl in block.ch_blocks: #it's not necessary checking for renderer_hook #because default hook is mandatory output.append((bl.block_name, self.render_block(bl))) if collapse: return ''.join([x[1] for x in output]) else: return output def render_block(self, bl): '''This function calls the right render_hook for the block. If there isn't an hook it calld the default, that is mandatory''' output = "" ######### pre hooks ############ #hooks executed in the order of inserction #They receive the block and they can only modify the block object if bl.block_name in self.pre_render_hooks: for prehook in self.pre_render_hooks[bl.block_name]: #calling prehook with the block prehook(bl) #calling after the others the ALL hooks if "ALL" in self.pre_render_hooks: for prehook in self.pre_render_hooks["ALL"]: #calling prehook with the block prehook(bl) ######## rendering ############# if bl.block_name in self.render_hooks: logging.debug('Render @ block: ' + bl.block_name) output = self.render_hooks[bl.block_name](bl) else: #default hook is mandatory logging.debug('Render @ block: default@' + bl.block_name) #reporting to the Reporter if bl.block_name != "default": self.reporter.add_not_rendered_block(bl) output = self.render_hooks['default'](bl) ######## post hooks ########### #hooks executed in the order of inserction. #They receive the block and text. They have to return the #output text, that is passed to the next posthook if bl.block_name in self.post_render_hooks: for posthook in self.post_render_hooks[bl.block_name]: #calling posthook with the block and output output = posthook(bl, output) #calling ALL hooks after the others if "ALL" in self.post_render_hooks: for posthook in self.post_render_hooks["ALL"]: #calling posthook with the block and output output = posthook(bl, output) #final output return output def render_blocks(self, bls, collapse=False): '''This function renderes a list of blocks. It's the same as render_children_blocks but with a generic list''' output = [] for bl in bls: output.append((bl.block_name,self.render_block(bl))) if collapse: return ''.join([x[1] for x in output]) else: return output ############################################################################### # Decorators for renderers def render_hook(*block_names): """This decorate assigns to a function the list of block_names that it will handle as a render_hook.""" def decorate(func): #adding the list of block names as an attribute of the function setattr(func, "block_names", block_names) @wraps(func) def wrapper(*args,**kwargs): return func(*args, **kwargs) return wrapper return decorate