import re from .formats import BaseFormat, placeholder from .tokens import * # These are done in order, so make sure they don't interfere with each other QUOTE_MAP = ( ('\\',r'\textbackslash '), # This should almost certainly be first ('{',r'\{'), ('}',r'\}'), # Anything with {}-delimited arguments goes after here ('%',r'\%'), ('&',r'\&'), ('$',r'\$'), ('#',r'\#'), ('_',r'\_'), ('~',r'\~{}'), ('^',r'\^{}'), ('<',r'$<$'), ('>',r'$>$'), ('[',r'{[}'), # Needed, say, right after \item (']',r'{]}'), # Needed /in/ an optional argument, for example \item[{]}] ("'",r'\textquotesingle{}'), # To keep from smarting (we do that above) ('"',r'\textquotedbl{}'), ('\n',r' '), # To keep \n\n from being a paragraph break ) FONT_SUBSTS = [ # (re.compile(ur'([\u2700-\u27BF]+)'), r'{\\dingbat \1}'), (re.compile(ur'([\u2200-\u22FF\u25A0-\u27BF]+)'), r'{\\symbol \1}'), (re.compile(ur'([' ur'\u2E80-\u33FF' ur'\u3400-\u4DB5' ur'\u4E00-\u9FCC' ur'\uA000-\uA4FF' ur'\uA960-\uA97F' ur'\uD7B0-\uD7FF' ur'\uF900-\uFAD9' ur'\uFE10-\uFE6F' ur'\uFF00-\uFFEF]+)'), r'{\\japanese \1}'), ] _NON_BMP = ( (u'\U00010000', u'\U0001007F', 'japanese'), (u'\U0001B000', u'\U0001B0FF', 'japanese'), (u'\U0001D100', u'\U0001D1FF', 'japanese'), (u'\U0001D360', u'\U0001D37F', 'japanese'), (u'\U00010000', u'\U000100F7', 'japanese'), (u'\U0001F000', u'\U0001F02F', 'japanese'), (u'\U0001F100', u'\U0001F7FF', 'japanese'), (u'\U00020000', u'\U0002B81F', 'japaneseext'), (u'\U0002F800', u'\U0002FA1F', 'japanese'), ) # We can't match these in a clean way, because standard python # sucks at matching non-BMP Unicode. (We do it by # explicitly matching surrogate pairs on OS X and doing it # properly on scripts). (You could be fine matching # either, except the first is an invalid regexp on mac.) def sub_non_bmp(match): c = match.group(1) for start, end, font in _NON_BMP: assert len(c) == len(start) if start <= c and c <= end: return ur'{\%s %s}' % (font, c) return c try: FONT_SUBSTS.append((re.compile(ur'([\U00010000-\U0010FFFF])'), sub_non_bmp)) except re.error: FONT_SUBSTS.append((re.compile(ur'([\uD800-\uDFFF][\uD800-\uDFFF])'), sub_non_bmp)) CMD_MAP = {SUPERSCRIPT: 'textsuperscript', SUBSCRIPT: 'textsubscript', UNDERLINE: 'uline', STRIKE: 'sout', FOOTNOTE: 'footnote', } BLOCK_MAP = {BOLD: 'bfseries', ITALIC: 'em', MONOSPACE: 'ttfamily', } ENV_MAP = {CENTER: 'thincenter', RIGHT: 'thinright', } LATEX_HEADINGS = { 1: r'chapter', 2: r'section', 3: r'subsection', 4: r'subsubsection', 5: r'subsubsubsection', 6: r'paragraph' } LATEX_SIZES = { -4: 'tiny', -3: 'scriptsize', -2: 'footnotesize', -1: 'small', 0: 'normalsize', 1: 'large', 2: 'Large', 3: 'LARGE', 4: 'huge', 5: 'Huge' } LISTMAP = {ORDERED: 'enumerate', UNORDERED: 'itemize', BLOCKQUOTE: 'bazquote', } PX_TO_PT = 12. / 16 def url_to_path(url): assert '://' not in url assert url.startswith('/') path = '.' + url.replace('%5E', '--').replace('%20', '_').replace('%2C', ':').replace('.', '-') assert '%' not in path, path return path class LaTeXFormat(BaseFormat): def __init__(self): self.verbatim = False self.table_row_start = None self.row_length = 0 self.longest_row = 0 def escape(self, text): if self.verbatim: return text else: for bad,repl in QUOTE_MAP: text = text.replace(bad,repl) # Some characters need special font handling for regexp,subst in FONT_SUBSTS: text = regexp.sub(subst, text) return text def text(self, text): yield self.escape(text) def start(self, t, arg=None): if t in CMD_MAP: yield ur'\%s{' % CMD_MAP[t] elif t in BLOCK_MAP: yield ur'{\%s ' % BLOCK_MAP[t] elif t == SIZE: yield ur'{\%s ' % LATEX_SIZES[arg] elif t in ENV_MAP: yield u'\\begin{%s}\n' % ENV_MAP[t] elif t in LISTMAP: yield u'\\begin{%s}\n' % LISTMAP[t] elif t in (ORDERED_ITEM, UNORDERED_ITEM): yield ur'\item ' elif t == BLOCKQUOTE_LINE: yield '' elif t == PARAGRAPH: yield '' elif t == LINK: yield ur'\href{%s}{' % (self.escape(arg['url'])) #if arg['style'] == 'external': # yield ur'\nolinkurl{' elif t == HEADING: yield ur'\%s{' % LATEX_HEADINGS[arg] elif t == CODEBLOCK: self.verbatim = True yield u'\\begin{verbatim}\n' elif t == NOINDENT: yield u'\n\n\\noindent\n' elif t == ERROR: yield r'\textbf{\emph{\color{red}' elif t == TABLE: tabulartype = '' if arg: if 'mode' in arg and arg['mode'] == 'equal': tabulartype = 'x' self.longest_row = 0 yield '\\starttable%s\n' % tabulartype if not arg or 'border' not in arg or arg['border']: yield '\\hline\n' self.tableborder = True else: self.tableborder = False elif t == TABLE_ROW: self.table_row_start = True self.row_length = 0 yield '' elif t == TABLE_CELL: self.row_length += 1 if self.table_row_start: self.table_row_start = False yield '' else: yield r'&' elif t == TABLE_HEADING: self.row_length += 1 if self.table_row_start: self.table_row_start = False yield r'{\bfseries ' else: yield r'& {\bfseries ' elif t == IMAGE: # This only works because of the magic image fetching logic # that bazki.latex does, which also uses url_to_path(). path = url_to_path(arg['url']) params = [] if 'height' in arg: if hasattr(arg['height'], 'to_str'): height = arg['height'].to_str() else: height = str(arg['height'] * PX_TO_PT) + 'pt' params.append('height=%s' % (height)) if 'width' in arg: if hasattr(arg['width'], 'to_str'): width = arg['width'].to_str() else: width = str(arg['width'] * PX_TO_PT) + 'pt' params.append('width=%s' % (width)) if not arg.get('force', False): params.append('keepaspectratio') if len(params) > 0: paramstr = '[%s]' % ','.join(params) else: paramstr = '' yield '\n\\includegraphics%s{%s}\n\ignore{' % (paramstr, path) else: assert False, t def end(self, t, arg=None): if t in CMD_MAP or t in BLOCK_MAP or t in (SIZE, HEADING): yield u'}' elif t in ENV_MAP: yield u'\n\\end{%s}' % ENV_MAP[t] elif t in LISTMAP: yield u'\\end{%s}\n' % LISTMAP[t] elif t in (ORDERED_ITEM, UNORDERED_ITEM, BLOCKQUOTE_LINE): yield '\n' elif t == PARAGRAPH: yield '' elif t == LINK: #if arg['style'] == 'external': # yield ur'}}' #else: yield '}' elif t == CODEBLOCK: self.verbatim = False yield u'\n\end{verbatim}' elif t == NOINDENT: yield '' elif t == ERROR: yield '}}' elif t == TABLE: tabulartype = '' achar = 'l' bchar = '|' specs = [] if arg: if 'align' in arg: achar = arg['align'][0].lower() if 'border' in arg and not arg['border']: bchar = '' if 'specs' in arg: specs = arg['specs'] if 'mode' in arg and arg['mode'] == 'equal': tablulartype = 'x' achar = achar.upper() while len(specs) < self.longest_row: specs.append(achar) yield '\\endtable%s{%s%s%s}\n' % (tabulartype, bchar, bchar.join(specs), bchar) elif t == TABLE_ROW: if self.row_length > self.longest_row: self.longest_row = self.row_length yield u'\\\\\n' if self.tableborder: yield '\\hline\n' elif t == TABLE_CELL: yield '' elif t == TABLE_HEADING: yield '}' elif t == IMAGE: yield '}' else: assert False, t def entity(self, t, arg=None): if t == HRULE: yield ur'\hrule{}' elif t == LINEBREAK: yield u'\\\\' elif t == ENV_BREAK: yield u'\n\n' elif t == NOINDENT: yield u'\\noindent{}' elif t == ERROR: for s in self.start(ERROR): yield s yield arg for e in self.end(ERROR): yield e elif t == REF: yield placeholder(arg) else: assert False, t