Spaces:
Runtime error
Runtime error
''' | |
Convert Google Code .wiki files into .tex formatted files. | |
Output is designed to be included within a larger TeX project, it is | |
not standalone. | |
''' | |
import sys | |
import re | |
import codecs | |
print(sys.argv) | |
''' | |
A "rule" is a begin tag, an end tag, and how to reformat the inner text | |
(function) | |
''' | |
def encase(pre, post, strip=False): | |
"""Return a function that prepends pre and postpends post""" | |
def f(txt): | |
if strip: | |
return pre + txt.strip() + post | |
else: | |
return pre + txt + post | |
return f | |
def constant(text): | |
def f(txt): | |
return text | |
return f | |
def encase_with_rules(pre, post, rules, strip=False): | |
def f(txt): | |
if strip: | |
return pre + apply_rules(txt, rules).strip() + post | |
else: | |
return pre + apply_rules(txt, rules) + post | |
return f | |
def encase_escape_underscore(pre, post): | |
def f(txt): | |
txt = sub(r'_', r'\_', txt) | |
return pre + txt + post | |
return f | |
def sub(pat, repl, txt): | |
"""Substitute in repl for pat in txt, txt can be multiple lines""" | |
return re.compile(pat, re.MULTILINE).sub(repl, txt) | |
def process_list(rules): | |
def f(txt): | |
txt = ' *' + txt # was removed to match begin tag of list | |
res = '\\begin{itemize}\n' | |
for ln in txt.split('\n'): | |
# Convert " *" to "\item " | |
ln = sub(r'^ \*', r'\\item ', ln) | |
res += apply_rules(ln, rules) + '\n' | |
res += '\\end{itemize}\n' | |
return res | |
return f | |
def process_link(rules): | |
def f(txt): | |
lst = txt.split(' ') | |
lnk = lst[0] | |
desc = apply_rules(' '.join(lst[1:]), rules) | |
if lnk[:7] == 'http://': | |
desc = apply_rules(' '.join(lst[1:]), rules) | |
return r'\href{' + lnk + r'}{' + desc + r'}' | |
if len(lst) > 1: | |
return r'\href{}{' + desc + r'}' | |
return r'\href{}{' + lnk + r'}' | |
return f | |
# Some rules can be used inside some other rules (backticks in section names) | |
link_rules = [ | |
['_', '', constant(r'\_')], | |
] | |
section_rules = [ | |
['`', '`', encase_escape_underscore(r'\texttt{', r'}')], | |
] | |
item_rules = [ | |
['`', '`', encase(r'\verb|', r'|')], | |
['[', ']', process_link(link_rules)], | |
] | |
# Main rules for Latex formatting | |
rules = [ | |
['{{{', '}}}', encase(r'\begin{lstlisting}[language=c++]', r'\end{lstlisting}')], | |
['[', ']', process_link(link_rules)], | |
[' *', '\n\n', process_list(item_rules)], | |
['"', '"', encase("``", "''")], | |
['`', '`', encase(r'\verb|', r'|')], | |
['*', '*', encase(r'\emph{', r'}')], | |
['_', '_', encase(r'\emph{', r'}')], | |
['==', '==', encase_with_rules(r'\section{', r'}', section_rules, True)], | |
['=', '=', encase_with_rules(r'\chapter{', r'}', section_rules, True)], | |
['(e.g. f(x) -> y and f(x,y) -> ', 'z)', constant(r'(e.g. $f(x)\to y$ and $f(x,y)\to z$)')], | |
] | |
def match_rules(txt, rules): | |
"""Find rule that first matches in txt""" | |
# Find first begin tag | |
first_begin_loc = 10e100 | |
matching_rule = None | |
for rule in rules: | |
begin_tag, end_tag, func = rule | |
loc = txt.find(begin_tag) | |
if loc > -1 and loc < first_begin_loc: | |
first_begin_loc = loc | |
matching_rule = rule | |
return (matching_rule, first_begin_loc) | |
def apply_rules(txt, rules): | |
"""Apply set of rules to give txt, return transformed version of txt""" | |
matching_rule, first_begin_loc = match_rules(txt, rules) | |
if matching_rule is None: | |
return txt | |
begin_tag, end_tag, func = matching_rule | |
end_loc = txt.find(end_tag, first_begin_loc + 1) | |
if end_loc == -1: | |
sys.exit('Could not find end tag {0} after position {1}'.format(end_tag, first_begin_loc + 1)) | |
inner_txt = txt[first_begin_loc + len(begin_tag) : end_loc] | |
# Copy characters up until begin tag | |
# Then have output of rule function on inner text | |
new_txt_start = txt[:first_begin_loc] + func(inner_txt) | |
# Follow with the remaining processed text | |
remaining_txt = txt[end_loc + len(end_tag):] | |
return new_txt_start + apply_rules(remaining_txt, rules) | |
def split_sections(contents): | |
"""Given one string of all file contents, return list of sections | |
Return format is list of pairs, each pair has section title | |
and list of lines. Result is ordered as the original input. | |
""" | |
res = [] | |
cur_section = '' | |
section = [] | |
for ln in contents.split('\n'): | |
if len(ln) > 0 and ln[0] == '=': | |
# remove = formatting from line | |
section_title = sub(r'^\=+ (.*) \=+', r'\1', ln) | |
res.append((cur_section, section)) | |
cur_section = section_title | |
section = [ln] | |
else: | |
section.append(ln) | |
res.append((cur_section, section)) | |
return res | |
def filter_sections(splitinput, removelst): | |
"""Take split input and remove sections in removelst""" | |
res = [] | |
for sectname, sectcontents in splitinput: | |
if sectname in removelst: | |
pass | |
else: | |
res.extend(sectcontents) | |
# convert to single string for output | |
return '\n'.join(res) | |
def main(): | |
infile = codecs.open(sys.argv[1], encoding='utf-8') | |
outfile = codecs.open(sys.argv[2], mode='w', encoding='utf-8') | |
contents = infile.read() | |
# Remove first three lines | |
contents = '\n'.join(contents.split('\n')[3:]) | |
# Split sections and filter out some of them | |
sections = split_sections(contents) | |
contents = filter_sections(sections, ['Introduction', 'Prerequisites', 'Simple Example']) | |
# Convert to latex format | |
contents = apply_rules(contents, rules) | |
infile.close() | |
outfile.write(contents) | |
outfile.close() | |
return 0 | |
if __name__ == '__main__': | |
sys.exit(main()) | |