diff --git a/.gitignore b/.gitignore index 9fcf7b6..6ab3cd7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,7 @@ *.pyc .directory __pycache__ configs.yaml test/ #import/ -!/sandbox/.placeholder -sandbox/* - +!/debug/.placeholder diff --git a/texla/Parser/PreParser.py b/texla/Parser/PreParser.py index 1a86307..aba01d8 100644 --- a/texla/Parser/PreParser.py +++ b/texla/Parser/PreParser.py @@ -1,311 +1,311 @@ import logging import os import re from collections import deque from os import path from ..Exceptions.TexlaExceptions import * from .Blocks import TheoremBlocks from .Blocks.Utilities import * data = {} def preparse(tex, input_path): ''' Entrypoint for preparsing of tex ''' try: logging.info('\033[0;34m############### STARTING PRE-PARSING ###############\033[0m') tex = check_doc_integrity(tex) logging.info("PreParser @ Removing comments") tex = remove_comments(tex) logging.info("PreParser @ Preparsing include and subfiles") tex = preparse_include(tex, input_path) tex = preparse_subfiles(tex, input_path) logging.info("PreParser @ Removing comments...again") tex = remove_comments(tex) logging.info("PreParser @ Parsing macros") tex = parse_macros(tex) logging.info("PreParser @ Preparsing Theorems") tex = preparse_theorems(tex) logging.info("PreParser @ Preparsing par (\\n\\n)") tex = preparse_par(tex) logging.info("PreParser @ Preparsing verb") tex = preparse_verb(tex) logging.info("PreParser @ Preparsing header info (title, author, date)") data = preparse_header(tex) #saving preparsed tex - log_preparsed_file_path = path.relpath('sandbox/preparsed.tex') + log_preparsed_file_path = path.relpath('debug/preparsed.tex') with open(log_preparsed_file_path, 'w') as o: o.write(tex) return tex, data except PreparserError as err: err.print_error() exit() def check_doc_integrity(tex): '''checking if the source has \begin{document}''' if not ("\\begin{document}" in tex): tex = "\\begin{document}"+tex+ "\\end{document}" return tex def parse_macros(tex): ''' Preparsing of macros: \newcommand are searched and macro objects created. Then the occurrences of the macro are replaced with the tex from the macro. ''' #regex for newcommand new_re = re.compile(r'\\(re)?newcommand[*]?|\\providecommand[*]?') macros = {} log = {} tex_to_parse = tex[:] for match in new_re.finditer(tex): #first we get the options opt_tex = CommandParser.get_command_options(tex[match.end():]) macro = MacroParser.Macro.parse_macro(opt_tex[0]) macros[macro.name] = macro log[macro.name] = 0 tex_to_parse = tex_to_parse.replace( tex[match.start():match.end() + opt_tex[2]], '') #now we can search for occurrence of the macro, #get the options, and replace the tex macros_found = 0 #we reiterate the process until no macros are found. #This is useful for macros using other macros. while True: for m in macros: logging.debug("preparsing MACRO: %s", m) #the macro name is \\name, but it's not #raw: we have to add a \\ in front of it. #we have to create the string for regex #to fix the special characters problem m_r = ''.join(['['+x+']' for x in m[1:]]) cmd_re = re.compile(r'\\' + m_r + r'(?![a-zA-Z])') #lists for positions and replacements pos = [] replaces = [] #first of all we get all the occurrence #of a macro and its replacement tex. for cmd_ma in cmd_re.finditer(tex_to_parse): #little hack to check if we are getting #a macro inside the previous matched macro content. if len(pos)>0 and (cmd_ma.start() < pos[len(pos)-1]): continue log[m] += 1 macros_found += 1 #we get command complete tex cmd_tex = CommandParser.get_command_options(tex_to_parse[ cmd_ma.end():]) #cmd_tex contains also the index of the star of #tex after the macro. We need it later. #we get parenthesis parenthesis = CommandParser.get_parenthesis(cmd_tex[0]) if parenthesis[0][0] == '[': param_default = parenthesis[0][1] parenthesis.pop(0) else: param_default = None params = [parenthesis[i][1] for i in range(len(parenthesis) - 1)] #asking the tex to the macro replace_tex = macros[m].get_tex(params, param_default) #adding a space for safety is necessary if cmd_tex[1][0] not in ["\\", " ", '_', '^', '[', '(','{',"'"]: replace_tex = replace_tex + " " #saving data replaces.append(replace_tex) pos+= [cmd_ma.start(), cmd_ma.end()+cmd_tex[2]] #now that we have all macros we can replace them if (len(pos) ==0): continue preparsed_result = [tex_to_parse[:pos[0]]] repl_queu = deque(replaces) for x in range(1, len(pos)-1, 2): preparsed_result.append(repl_queu.popleft()) preparsed_result.append(tex_to_parse[pos[x]:pos[x+1]]) #we have to add the last piece preparsed_result.append(repl_queu.popleft()) preparsed_result.append(tex_to_parse[pos[len(pos)-1]:]) preparsed_tex = ''.join(preparsed_result) #for the next macro we have to reset tex_to_parse tex_to_parse = preparsed_tex #at the end of the all the macros we check if #we have found something if macros_found > 0: macros_found = 0 #the cycle continues else: break #logging for m in log: logging.debug('PREPARSER @ macro: %s, %s occurrences', m, log[m]) return tex_to_parse def remove_comments(tex): ''' This function removes comments from the tex. ''' com_re = re.compile(r'(?[*]?)') for match in p.finditer(tex): t = tex[match.end():] data = CommandParser.parse_options( t, [('th_type', '{', '}'), ('counter', '[', ']'), ('definition', '{', '}'), ('numberby', '[', ']')]) if match.group('star') != "": data[0]['star'] = True else: data[0]['star'] = False if data[0]['definition'] is None: pass the = TheoremBlocks.Theorem(**data[0]) logging.debug('PREPARSER @ theorem: %s', the.th_type) th_dict[the.th_type] = the #now we search for every theorem \beging{th_id} and \end{th_id} #and we substitue them with \begin{theorem}{th_id} and \begin{theorem} #to use out theorem environment for key in th_dict: tag_open = '\\begin{' + key + '}' new_tag_open = '\\begin{theorem}{' + key + '}' tex = tex.replace(tag_open, new_tag_open) tag_close = '\\end{' + key + '}' new_tag_close = '\\end{theorem}' tex = tex.replace(tag_close, new_tag_close) #parsed theorems are saved in TheoremBlocks moduile TheoremBlocks.parsed_theorems = th_dict return tex def preparse_header(tex): '''Function that searches title, date, author in preamble of the tex ''' headerBlock = {} headerBlock['title'] = '' headerBlock['date'] = '' headerBlock['author'] = '' mat = re.search(r'\\title{(.*?)}', tex) if mat: headerBlock['title'] = mat.group(1) mat = re.search(r'\\date{(.*?)}', tex) if mat: headerBlock['date'] = mat.group(1) mat = re.search(r'\\author{(.*?)}', tex) if mat: headerBlock['author'] = mat.group(1) logging.debug('PreParser @ preparse_header ') logging.debug('\ntitle: %s\ndate: '\ '%s\nauthor: %s', headerBlock['title'], headerBlock['date'], headerBlock['author']) return headerBlock def preparse_par(tex): '''This function replace \n\n with a \\par to show that there's a change of par, understendable by the parser. It replaces even occurrences of \n\n''' return re.sub(r'(\n\n)+','\\par ',tex) def preparse_include(tex,input_path): ''' This function replace \input{} commands with the content of the files. ''' base_path = os.path.dirname(input_path) r1 = re.compile(r'\\input{(.*?)}') r2 = re.compile(r'\\include{(.*?)}') inputs_found = 0 result_tex = tex while True: for m in r1.finditer(tex): name = m.group(1) + '.tex' #reading file file_name = base_path + "/"+ name #checking if the file exits, otherwise skipping if not os.path.exists(file_name): logging.error("Preparser.prepase_include @ file {} not found!". format(file_name)) continue file_tex = open(file_name, 'r').read() result_tex = result_tex.replace( m.group(0), file_tex) inputs_found+=1 for m2 in r2.finditer(tex): name = m2.group(1) + '.tex' #reading file file_name =os.getcwd()+ '/'+\ base_path + "/"+ name #checking if the file exits, otherwise skipping if not os.path.exists(file_name): logging.error("Preparser.repase_include @ file {} not found!". format(file_name)) continue file_tex = open(file_name, 'r').read() result_tex = result_tex.replace( m2.group(0), file_tex) inputs_found+=1 if inputs_found>0: tex = result_tex #removing comments tex = remove_comments(tex) inputs_found = 0 else: break return result_tex def preparse_subfiles(tex, input_path): '''This preparse function insert the subfiles in the tex''' base_path = os.path.dirname(input_path) r = re.compile(r'\\subfile{(.*?)}') inputs_found = 0 result_tex = tex while True: for m in r.finditer(tex): name = m.group(1) + '.tex' #reading file file_name = base_path + "/"+ name file_tex = open(file_name, 'r').read() #we have to catch the content of the doc r_doc = re.compile(r'\\begin(?P\[.*?\])?{document}'+ r'(?P.*?)\\end{document}', re.DOTALL) mat = r_doc.search(file_tex) content = mat.group('content') result_tex = result_tex.replace( m.group(0), content) preamble = file_tex[:mat.start()] result_tex = preamble + "\n" + result_tex inputs_found+=1 if (inputs_found>0): tex = result_tex inputs_found = 0 else: return result_tex def preparse_verb(tex): '''This function preparse verbatim putting the content in a {...} pair''' r = re.compile(r'\\verb(?P[^a-zA-Z])(?P.*?)(?P=del)') result = tex for match in r.finditer(tex): c = match.group("content") result = result.replace(match.group(0), '\\verb{'+c +'}') return result diff --git a/texla/Renderers/plugins/math_check_online.py b/texla/Renderers/plugins/math_check_online.py index 7b56fa0..219e312 100644 --- a/texla/Renderers/plugins/math_check_online.py +++ b/texla/Renderers/plugins/math_check_online.py @@ -1,110 +1,110 @@ import requests import logging from multiprocessing import Process, Pool, Queue from os import path def request_formula(tex): url_check = 'http://restbase.{0}/pool.{0}/v1/media/math/check/tex'.format(configs["domain"]) header = { 'Accept': 'application/json', 'Content-Type': 'application/x-www-form-urlencoded' } payload = {'q': tex} r = requests.post(url_check, data=payload, headers=header) if r.status_code == 200: return True else: return False formulas = [] bad_formulas = Queue() def save_math(block): tex = block.attributes["content"] #saving the formula only if it's longer than 10 if len(tex) > 5: logging.debug("Plugin math_check_online @ saving formula {}".format( tex)) formulas.append((tex, block.id)) def save_math_align(block): tex = "\\begin{align}" + block.attributes["content"] + "\\end{align}" #saving the formula only if it's longer than 10 if len(tex) > 50: logging.debug("Plugin math_check_online @ saving formula {}".format( tex)) formulas.append((tex, block.id)) def check_math(formula): ok = request_formula(formula[0]) if ok: logging.info("Plugin math_check_online @ formula {}, block_id: {}". format("OK", formula[1])) else: logging.error("Plugin math_check_online @ formula {}, block_id: {}". format("BAD", formula[1])) bad_formulas.put(formula) def start_pool(): pool = Pool(processes=int(configs["threads"])) logging.info("Plugin math_check_online @ total formulas to check: {}". format(len(formulas))) pool.map(check_math, formulas) #saving results logging.info("GOOD FORMULAS: {} --- BAD FORMULAS: {}".format( len(formulas)-bad_formulas.qsize(), bad_formulas.qsize())) - log_matherrors_file_path = path.relpath("sandbox/math_errors.txt") + log_matherrors_file_path = path.relpath("debug/math_errors.txt") with open(log_matherrors_file_path, "w") as f: f.write("Math Errors Tree Log: \n") f.write("---------------------\n") ids = [] while not bad_formulas.empty(): form = bad_formulas.get() ids.append(form[1]) output = tree_explorer.print_tree_to_blocks(ids) f.write(output + "\n\n") def start_check(): p = Process(target=start_pool) p.start() plugin_render_hooks = { 'displaymath': { "pre": save_math }, 'inlinemath': { "pre": save_math }, 'ensuremath': { "pre": save_math }, 'equation': { "pre": save_math }, 'eqnarray': { "pre": save_math_align }, 'multline': { "pre": save_math_align }, 'align': { "pre": save_math_align }, 'alignat': { "pre": save_math_align } } plugin_lifecycle_hooks = {"end": start_check} needs_tree_explorer = True tree_explorer = None configs = {} diff --git a/texla/Reporter.py b/texla/Reporter.py index 1ea8609..188c8d8 100644 --- a/texla/Reporter.py +++ b/texla/Reporter.py @@ -1,52 +1,52 @@ from .Parser.TreeExplorer import TreeExplorer import logging class Reporter: def __init__(self, tree): self.tree_explorer = tree #registering the block_names self.tree_explorer.register_block_names() self.not_parsed_blocks = self.tree_explorer.block_names["default"] self.not_rendered_blocks = [] self.not_parsed_types = {} self.not_rendered_types = {} for bl in self.not_parsed_blocks: if bl.type not in self.not_parsed_types: self.not_parsed_types[bl.type] = [] self.not_parsed_types[bl.type].append(bl) def add_not_rendered_block(self, block): """This method saves a block that is not rendered by the Renderer.""" self.not_rendered_blocks.append(block) if not block.block_name in self.not_rendered_types: self.not_rendered_types[block.block_name] = [] self.not_rendered_types[block.block_name].append(block) def print_report(self, console=True): logging.info('\033[0;34m############### TEXLA REPORT ###############\033[0m') s = [] s.append("\n- NOT PARSED blocks:") for bl, v in self.not_parsed_types.items(): s.append("\t- {} : {}".format(bl, len(v))) s.append("\n- NOT RENDERED blocks:") for bl, v in self.not_rendered_types.items(): s.append("\t- {} : {}".format(bl, len(v))) text= "\n".join(s) if console: logging.info(text) #saving to file also the block trees - with open("sandbox/texla_report.txt",'w') as file: + with open("debug/texla_report.txt",'w') as file: t = ["############### TEXLA REPORT ###############"] t.append("\n- NOT PARSED blocks:") for bl, v in self.not_parsed_types.items(): t.append("\t- {} : {}".format(bl, len(v))) t.append("\n- NOT PARSED blocks details:") t.append(self.tree_explorer.print_tree_to_blocks(self.not_parsed_blocks)) t.append("\n- NOT RENDERED blocks:") for bl, v in self.not_rendered_types.items(): t.append("\t- {} : {}".format(bl, len(v))) t.append("\n- NOT RENDERED blocks details:") t.append(self.tree_explorer.print_tree_to_blocks(self.not_rendered_blocks)) file.write("\n".join(t))