diff --git a/texla/PageTree/PageTree.py b/texla/PageTree/PageTree.py index bea92e7..a7f00bd 100644 --- a/texla/PageTree/PageTree.py +++ b/texla/PageTree/PageTree.py @@ -1,276 +1,257 @@ from .Page import Page import re, os, json class PageTree(): def __init__(self, configs): self.configs = configs self.doc_title = configs['doc_title'] self.keywords = configs['keywords'] self.output_path = configs['output_path'] #pages Data {id: page} self.pages = {} #id : titles self.titles = {} #labels : id self.labels = {} #urls (they are created after collapsing). #it's a dictionary id:url self.urls = {} #ROOT PAGE ro = Page(self.doc_title, 'root', -1, self.keywords) self.root_id = ro.id self.root_page = ro self.pages[self.root_id] = ro self.titles[self.root_id] = ro.title #indexes self.pageid_stack = [ro.id] self.current_id = self.root_id self.current_anchor = self.root_id - #preparing data for normalized_urls - self.prepare_normalize_urls() def createPage(self, title, page_type): '''This method creates a new page and enters in his enviroment setting current variables''' - title = self.getNormalizedUrl(title) + title = self.get_normalized_title(title) #finding level level = len(self.pageid_stack) - 1 #create new page p = Page(title, page_type, level, self.keywords) #add page to pages index self.pages[p.id] = p self.titles[p.id] = p.title #adding the page as subpage of the current page self.pages[self.current_id].addSubpage(p) #updates current self.pageid_stack.append(p.id) self.current_id = p.id self.current_anchor = p.id def exitPage(self): '''Return to the parent page enviroment''' self.current_id = self.pageid_stack[-2] self.pageid_stack.pop() self.current_anchor = self.current_id def addText(self, text): self.pages[self.current_id].addText(text) def addLabel(self, label): self.labels[label] = self.current_anchor def getRef(self, label): return self.urls[self.labels[label]] - def prepare_normalize_urls(self): - '''This function prepare the data for normalized urls.''' - #preparing dictionary - self.normalized_urls = {} - #initializing normalized_urls dict reading from file if exists - if os.path.exists(self.output_path+".titles"): - for line in open(self.output_path+".titles",'r'): - tok = line.split('@@@') - self.normalized_urls[tok[0]] = tok[1].strip() - #the file is used to save the dict of normalized urls - self.nurls_file = open(self.output_path+".titles",'a') - - def getNormalizedUrl(self,title): - '''Function that removes math from title''' - r = re.compile(r'(?[^$]+)\$(?!\$)', re.DOTALL) - for mre in r.finditer(title): - math = mre.group(1) - #reading the normalized urls dict - if math in self.normalized_urls: - title = title.replace(mre.group(0),self.normalized_urls[math]) - else: - tit = str(input("\n@Normalize title: "+ title+" ---> ")).strip() - #saving the normalized urls - self.normalized_urls[math]= tit - #saving it to file - self.nurls_file.write(math+'@@@'+tit+'\n') - self.nurls_file.flush() - title = title.replace(mre.group(0),tit) + @staticmethod + def get_normalized_title(title): + '''Function that removes bad symbols from title''' + title = title.replace('$', '') + title = title.replace('{','') + title = title.replace('}','') + title = title.replace('\\', '') + title = title.replace('\\mathcal','') + title = title.replace('\\mathbf','') + title = title.replace('\\mathbb','') + title = title.replace('\\ensuremath','') return title def get_tree_json(self): '''This function return the json tree''' return self.root_page.get_json_dictionary(self.pages) def get_tree_debug(self): '''This function prints the tree for debug''' s = [] for p in self.root_page.get_subpages(): s.append(p.get_str()) return('\n'.join(s)) def after_render(self): '''This function does some fixes after rendering''' for page in self.pages.values(): page.after_render() def change_title(self, page_id, title): self.pages[page_id].title = title def remove_page_from_tree(self, page, parent=None): '''This function remove a page from the tree, but doesn't delete it. The page remains in the self.pages dictionary but not in the subpages of the pages in the tree. If a parent page is passed the research for the removal starts from that page with performance improvements''' if parent: parent.removeSubpage(page) else: self.root_page.removeSubpage(page) def collapse_tree(self, content_level, max_page_level): '''This funcion contains all the tree collapsing procedures in the order: subpages content collapsing, subpages level collapsing, url collapsing, fixReferences.''' self.collapse_content_level(content_level) self.collapse_page_level(max_page_level) self.collapse_urls() self.fix_references() def collapse_content_level(self, max_level): '''This functions collapse the content of the pages at the choosen level. The content of the pages with level higher than max_level is moved up to the tree to the page with the max_level, creating titles in the page text. The pages touched are marked as collapsed=True.''' for p in self.pages.values(): if p.level == max_level: p.collapseSubpages() def collapse_page_level(self, max_level): '''This function fixes the level of the pages in the index according to a max_level. Pages with a level higher than the max_level are moved up in the tree till the max_level. The order related to parent pages is mantained. The PageTree is rewrited, hierarchy and levels are fixed. Moreover the level=0 is a special level and it's content is moved to an intro page, because level=0 pages must contain the index of their subpages. ''' #PAGES LEVEL = 0 #If they contain text we have to create a new page #called introduction (localized) for p in [x for x in self.pages.values() if x.level==0]: if len(p.text)>0: #creating new page for text inside text page. p_intro = Page(self.keywords['intro'], 'section',1, self.keywords) p_intro.text = p.text #saving the intro page self.pages[p_intro.id] = p_intro self.titles[p_intro.id] = p_intro.title p.addSubpage_top(p_intro) #erasing text from section page p.text = '' #we don't need to fix labels for now #Now we move pages according to the max_level. #pages not collapsed and with higher level then #the max_level are moved as subpages of the #nearest max_level page. for p in [x for x in self.pages.values() if x.level==max_level]: parent_page = p.parent #list of subpages to move at the right level subpages_to_add = [] #now we are cycling on the pages with level>max_level for sp in p.get_subpages(): if not sp.collapsed: #removing page from the tree acting #directly on the parent page sp.parent.removeSubpage(sp) #saving the page for the movement subpages_to_add.append(sp) #adding the list of moved subpages to the parent_page #so getting the right level. parent_page.addSubpages(subpages_to_add, p) ###NB: remember that the subpages level #is AUTOMATICALLY refreshed for all pages added. def collapse_urls(self): '''This function creates the urls of the pages, checking is they are collapsed or not. If they are collapsed the url is parent_page#title. Then the references are resolved to urls throught labes''' self.root_page.collapseURL(self.configs['base_path']) def fix_references(self): '''This function fix the references inside the text with the right urls instead of urls''' for page in self.pages.values(): page.fixReferences(self.labels,self.pages) def create_indexes(self): '''This function create sections index and book total index''' self.create_sections_index() self.create_book_index() def create_sections_index(self): '''This function create the index for the sections (level=0) pages''' for page in self.pages.values(): if page.level == 0: index = [] for p in page.get_subpages(): if not p.collapsed: if len(p.text) >0: index.append('*'*p.level+ \ '[[' + p.url + '|' + p.title + ']]') else: index.append('*'*p.level+ p.title ) page.text = '\n'.join(index) def create_book_index(self): '''This function create the book total index and the book export page index''' base_page = self.root_page #book export: link book_url = self.doc_title.replace(' ','_') base_page.text+= '{{libro|Project:Libri/'+ book_url+\ '|'+ self.doc_title + '}}\n' #creating root index index = [] book_export_index = ['{{libro_salvato | setting-papersize = a4\ | setting-toc = auto | setting-columns = 1}}'] #book export: setting title book_export_index.append('==' + self.doc_title + '==') for page in self.pages.values(): if page.level == 0: index.append('{{Section\n|sectionTitle=') index.append(page.title + '\n') index.append('|sectionText=\n') #transcluding index for section index.append('{{:'+ page.url+ '}}') #book export index for chapters book_export_index.append(';' + page.title) #creating index for book for p in page.get_subpages(): if not p.collapsed: if len(p.text) > 0: book_export_index.append( ':[[' + p.url + '|' + p.title + ']]') #closing section index.append('}}\n{{ForceBreak}}\n') base_page.text += '\n'.join(index) #creating book export page book_title = 'Project:Libri_' + book_url book_export_page = Page(book_title, 'root', -1,None) book_export_page.url = self.configs['base_path']+ \ 'Project:Libri/' + self.doc_title #inserting index text book_export_page.addText(u'\n'.join(book_export_index)) #the export book page is inserted in the pages dict and index self.pages['Project:Libri/' + self.doc_title] = book_export_page diff --git a/unit_tests.py b/unit_tests.py index 1e2c910..32b2bdd 100644 --- a/unit_tests.py +++ b/unit_tests.py @@ -1,212 +1,219 @@ import re import texla.Parser.Blocks from texla.Parser.Blocks.Utilities import * import texla.Renderers.utils as ut +import texla.PageTree.PageTree as pt import unittest class CommandParserTest(unittest.TestCase): def test_grammar_complete(self): a = '[option]{text}other text' grammar = [('opt','[',']'),('content','{','}')] self.assertEqual(CommandParser.parse_options(a, grammar), ({'opt':'option','content':'text'}, 'other text')) def test_grammar_command_near_command(self): a = '[option]{text}\cmd{other text}' grammar = [('opt','[',']'),('content','{','}')] self.assertEqual(CommandParser.parse_options(a,grammar), ({'opt':'option','content':'text'},'\cmd{other text}')) def test_grammar_blank_left(self): a = '{text} other text' grammar = [('opt','[',']'),('content','{','}')] self.assertEqual(CommandParser.parse_options(a,grammar), ({'opt':None,'content':'text'},' other text')) def test_grammar_blank_right(self): a = '[option] other text' grammar = [('opt','[',']'),('content','{','}')] self.assertEqual(CommandParser.parse_options(a, grammar), ({'opt':'option','content':None},' other text')) def test_grammar_blank_middle(self): a = '[option1][option2] other text' grammar = [('opt1','[',']'),('content','{','}'), ('opt2','[',']')] self.assertEqual(CommandParser.parse_options(a, grammar), ({'opt1':'option1','content':None, 'opt2':'option2'},' other text')) def test_grammar_blank_same(self): a = '[option1][option2] other text' grammar = [('opt1','[',']'),('opt2','[',']'), ('opt3','[',']')] self.assertEqual(CommandParser.parse_options(a, grammar), ({'opt1':'option1','opt2':'option2', 'opt3':None},' other text')) def test_grammar_nooptions(self): self.assertEqual(CommandParser.parse_options(' text',[]), ({},' text')) def test_grammar_envinside(self): a = '{test $x=y$ text \\begin{align*}A'+\ '\end{align*} text}' grammar = [('content','{','}')] self.assertEqual(CommandParser.parse_options(a,grammar), ({'content':'test $x=y$ text \\begin{align*}A'+\ '\end{align*} text'},'')) def test_grammar_get_param_with_env(self): a = '{test $x=y$ text \\begin{align*}A'+\ '\end{align*} text}' grammar = [('content','{','}')] self.assertEqual( CommandParser.parse_options(a,grammar)[0]['content'], 'test $x=y$ text \\begin{align*}A'+\ '\end{align*} text') def test_get_parenthesis_nested(self): a = '[a][b]{abc\command{}[]}[d]{boh[]} tests' self.assertEqual(CommandParser.get_parenthesis(a)[:-1], [('[','a',']'),('[','b',']'),('{','abc\command{}[]','}'), ('[','d',']'),('{','boh[]','}')]) self.assertEqual(CommandParser.get_parenthesis(a)[-1:], [('out',' tests','')]) def test_get_parenthesis_env(self): a = '{test $x=y$ text \\begin{align*}A'+\ '\end{align*} text}' self.assertEqual(CommandParser.get_parenthesis(a), [('{','test $x=y$ text \\begin{align*}A'+\ '\end{align*} text','}'),('out','','')]) def test_get_parenthesis_near_command(self): a = '[a][b]{abc\command{}[]}[d]{boh[]}\cmd2{tests}' self.assertEqual(CommandParser.get_parenthesis(a)[:-1], [('[','a',']'),('[','b',']'),('{','abc\command{}[]','}'), ('[','d',']'),('{','boh[]','}')]) self.assertEqual(CommandParser.get_parenthesis(a)[-1:], [('out','\cmd2{tests}','')]) def test_get_command_greedy(self): a = '\command[a][b]{abc\emph{}[]}[d]{boh[]}\cmd2{tests}' self.assertEqual(CommandParser.get_command_greedy(a), ('command','\command[a][b]{abc\emph{}[]}[d]{boh[]}', '\cmd2{tests}',38)) def test_get_command_options(self): a = '[boh]{test{test}}[{boh}a] left tex' self.assertEqual(CommandParser.get_command_options(a), ('[boh]{test{test}}[{boh}a]', ' left tex',25)) class UtilityTest(unittest.TestCase): def test_get_environment_content(self): a = '\\begin{A} test \\begin{A}\nt\n\\end{A}'+\ 'test\ntest\n\\end{A} other test' self.assertEqual(EnvironmentParser.get_environment_content(a,'A'), ' test \\begin{A}\nt\n\\end{A}test\ntest\n') def test_get_environment_content_left(self): a = 'test \\begin{A} test \\begin{A}\nt\n\\end{A}'+\ 'test\ntest\n\\end{A} other test' self.assertEqual(EnvironmentParser.get_environment_content(a,'A'), ' test \\begin{A}\nt\n\\end{A}test\ntest\n') def test_get_environment(self): a = 'test \\begin{A} test \\begin{A}\nt\n\\end{A}'+\ 'test\ntest\n\\end{A} other test' r = EnvironmentParser.get_environment(a,'A') s = r[0] e = r[1] c = r[2] self.assertEqual(a[s:e],'\\begin{A} test \\begin{A}\nt\n\\end{A}'+\ 'test\ntest\n\\end{A}') class RegexTest(unittest.TestCase): def test_section_regex(self): sec = re.compile(r'\\section' + \ r'(?:[*])?(?: *)'+\ r'(?=[\{\[])') a = 'text tex \\section* {} text '+\ '\\section text' self.assertEqual(sec.split(a), ['text tex ','{} text \\section text']) def test_section_regex2(self): sec = re.compile(r'\\section' + \ r'(?:[*])?(?: *)'+\ r'(?=[\{\[])') a = 'text tex \\section* {} text '+\ '\\section[] text' self.assertEqual(sec.split(a), ['text tex ','{} text ','[] text']) class UtilTest(unittest.TestCase): def test_remove_command_greedy1(self): a = 'tex text \\command[option][option2]{ok \\test{} ok} text' result = ut.remove_command_greedy(a, 'command', False) self.assertEqual(result, 'tex text ok \\test{} ok text') def test_remove_command_greedy2(self): a = 'tex text \\command{ok \\test{} ok} text' result = ut.remove_command_greedy(a, 'command', True) self.assertEqual(result, 'tex text text') def test_remove_command_no_option(self): a = 'tex text \\command text \\command' result = ut.remove_command_no_options(a, 'command') self.assertEqual(result, 'tex text text ') def test_remove_command_greedy_multi(self): a = 'tex \\cmd{bo} tex \\cmd{foo} text' result = ut.remove_command_greedy(a, 'cmd', False) self.assertEqual(result, 'tex bo tex foo text') def test_replace_command_greedy1(self): a = 'tex text \\command[option]{ok ok} text' result = ut.replace_command_greedy(a, 'command','cmd',False) self.assertEqual(result, 'tex text \\cmd{ok ok} text', msg=None) def test_replace_command_greedy2(self): a = 'tex text \\command[option]{ok ok} text' result = ut.replace_command_greedy(a, 'command','cmd',True) self.assertEqual(result, 'tex text \\cmd text', msg=None) def test_replace_command_greedy3(self): a = 'tex text \\command{ok ok} text' result = ut.replace_command_greedy(a, 'command','cmd',False) self.assertEqual(result, 'tex text \\cmd{ok ok} text', msg=None) def test_replace_command_greedy_multi(self): a = 'tex \\cmd{tex} text \\cmd{ok ok} text' result = ut.replace_command_greedy(a, 'cmd','command',False) self.assertEqual(result, 'tex \\command{tex} text \\command{ok ok} text') def test_replace_command_no_options(self): a = 'tex text \\dag\\int text' result = ut.replace_command_no_options(a, 'dag','dagger') self.assertEqual(result, 'tex text \\dagger\\int text', msg=None) def test_replace_command_greedy_delim(self): a = 'tex text \\modul{10} text' result = ut.replace_command_greedy(a, 'modul','',False, "|","|",rm_slash=True) self.assertEqual(result, 'tex text |10| text', msg=None) def test_get_content_greedy(self): a = 'tex text \\command[option]{content} text' result = ut.get_content_greedy(a, 'command') self.assertEqual(result,"content", msg=None) +class TitleTest(unittest.TestCase): + def test_title_normalizaton(self): + title = 'title $math \\frac{1}{2}$' + result = pt.PageTree.get_normalized_title(title) + self.assertEqual(result, 'title math frac12') + if __name__ == '__main__': unittest.main()