import sys, itertools from tokens import * from creole import tokenize debug = False class ParserException(Exception): pass class inner_macro_it: def __init__(self, macroname, it): self.macroname = macroname self.it = it self.closed = False self.macro_stack = [] def __iter__(self): return self def next(self): endmacroname = None t = self.it.next() if t.style == MACRO: if debug: print >>sys.stderr, 'imi', self.macroname, t.op, t.arg, if debug: print >>sys.stderr, self.macro_stack if t.op == END: if t.arg == self.macroname and not self.macro_stack: self.closed = t raise StopIteration elif self.macro_stack[-1] == t.arg: self.macro_stack.pop() elif t.op == START: self.macro_stack.append(t.arg[0]) return t def macro_it(macro_func, macroname, argstr, it): imit = inner_macro_it(macroname, it) mit = macro_func(macroname, argstr, imit) for i in mit: yield i if not imit.closed: for i in imit: pass if not imit.closed: raise ParserException("Unclosed macro <<%s %s>>" % (macroname, argstr)) yield imit.closed for i in it: yield i # Returns default display, {'url': url, 'style': css class, other metadata} def default_link_func(h, sty): return h, {'url': h, 'style': 'external' if '://' in h else 'internal'} class Parser(object): def __init__(self, format, macro_func, link_func=default_link_func): self.format = format self.macro_func = macro_func self.link_func = link_func def parse(self, markup, format=None, link_func=None): assert isinstance(markup, basestring), markup return self.render(tokenize(markup), format=format, link_func=link_func) def iparse(self, markup, format=None, link_func=None): return self.irender(tokenize(markup), format=format, link_func=link_func) def render(self, tokens, format=None, link_func=None): return u''.join(self.irender(tokens, format=format, link_func=link_func)) END_ENTITY_MACRO = Op('END_ENTITY_MACRO') def irender(self, tokens, format=None, link_func=None): if format is None: format = self.format() if link_func is None: link_func = self.link_func it = iter(tokens) env_stack = [] start_depth = 0 macro_env_stack = [] env_closed = False start_stack = [] while True: try: t = it.next() except StopIteration: if debug: print >>sys.stderr, StopIteration, env_stack while env_stack: env, envarg = env_stack.pop() if env is not True: yield format.end(env, envarg) break if debug: print >>sys.stderr, t assert isinstance(t, Token), (repr(it), macroname, repr(t)) sty = t.style is_macro_close = ((t.op == END and sty == MACRO) or t.op is self.END_ENTITY_MACRO) is_nonmacro_close = (t.op == END and sty != MACRO and start_stack and sty == start_stack[-1][0] and start_stack[-1][1] is not None) if t.op == END and sty != MACRO: if debug: print >>sys.stderr, 'nmc?', sty, start_stack can_clear_env = (( #(not macro_env_stack # or (is_macro_close # and len(macro_env_stack) == 1) # or (env_stack and env_stack[-1][0] == PARAGRAPH) # ) and start_depth == 0) or (sty == ENV_BREAK and t.arg is True)) text_when_closed = (env_closed or not start_stack ) and t.op == TEXT and t.arg.strip() if ((sty and t.op != END and sty.env is not False and (sty.env is not None or can_clear_env)) #or (macro_env_stack and is_macro_close) or (env_stack and env_stack[-1][0] is not False and is_nonmacro_close) or (env_stack and text_when_closed)): if is_macro_close: if debug: print >>sys.stderr, 'mac_close', macro_env_stack dest_env, arg = macro_env_stack.pop() elif is_nonmacro_close: if debug: print >>sys.stderr, 'nmclose', env_stack dest_env = False arg = None elif text_when_closed: if debug: print >>sys.stderr, 'text_when_closed', t dest_env = PARAGRAPH arg = None else: dest_env = sty.env arg = t.arg if t.op == END: env_closed = True else: env_closed = False incr_done = False if (env_stack and (can_clear_env or is_nonmacro_close or (dest_env is not True and dest_env.group and env_stack[-1][0] is not True and env_stack[-1][0] is not False and dest_env.group == env_stack[-1][0].group) or is_macro_close)): if dest_env != env_stack[-1][0] and ( not dest_env or dest_env is True or not dest_env.group or env_stack[-1][0] is True or (dest_env is not True and env_stack[-1][0] is not True and dest_env.group != env_stack[-1][0].group)): if debug: print >>sys.stderr, ( "break", env_stack, "to", dest_env, "via", sty, arg, start_depth, can_clear_env) clear = False while env_stack: if env_stack[-1][0] == dest_env: break env, envarg = env_stack.pop() if env is not True: yield format.end(env, envarg) else: clear = True if debug: print >>sys.stderr, 'cleared to', env_stack if sty == ENV_BREAK: # We closed the environment. if debug: print >>sys.stderr, "clear EB=>", clear t.arg = clear elif clear: if debug: print >>sys.stderr, "clear so ENV_BREAK" yield format.entity(ENV_BREAK, True) elif (dest_env and dest_env is not True and dest_env.group and dest_env.group == env_stack[-1][0].group): top_env, top_arg = env_stack[-1] if debug: print >>sys.stderr, "trans", dest_env, if debug: print >>sys.stderr, top_arg, arg if top_arg > arg: for x in xrange(top_arg, arg, -1): env, envarg = env_stack.pop() assert envarg == x, (envarg, x) yield format.end(env, envarg) elif top_arg < arg: for x in xrange(top_arg + 1, arg + 1): env_stack.append([dest_env, x]) yield format.start(dest_env, x) elif top_env != dest_env: env_stack.pop() yield format.end(top_env, top_arg) env_stack.append([dest_env, arg]) yield format.start(dest_env, arg) incr_done = True if sty == ENV_BREAK: if debug: print >>sys.stderr, "oother EB case" else: if sty == ENV_BREAK: if debug: print >>sys.stderr, "other EB case" else: if debug: print >>sys.stderr, "start env", sty, env_stack if not env_stack and sty == ENV_BREAK: t.arg = True # If we had something incompatable, we'd have cleared it out # above. if (env_stack and env_stack[-1][0] is True and dest_env is True and t.op == END): if debug: print >>sys.stderr, "pop True" env_stack.pop() elif (dest_env and not incr_done and (not env_stack or env_stack[-1][0] != dest_env)): if dest_env is not True and dest_env.group: for x in xrange(arg): env_stack.append([dest_env, x + 1]) yield format.start(dest_env, x + 1) else: env_stack.append([dest_env, arg]) if dest_env is not True: yield format.start(dest_env, arg) elif not env_stack and sty != ENV_BREAK and sty != MACRO and t.op != END: if t.op == TEXT and not t.arg.strip(): continue if debug: print >>sys.stderr, "start", PARAGRAPH, "due to", sty yield format.start(PARAGRAPH) env_stack.append([PARAGRAPH, None]) elif sty == ENV_BREAK: if debug: print >>sys.stderr, 'C', sty, env_stack, can_clear_env, macro_env_stack, is_macro_close, start_stack if t.op is self.END_ENTITY_MACRO: continue elif t.op == START: if sty == MACRO: macroname, argstr = t.arg if env_stack and env_stack[-1][0] != PARAGRAPH: macro_env_stack.append(env_stack[-1]) it = macro_it(self.macro_func, macroname, argstr, it) else: if (env_stack and env_stack[-1][0] != PARAGRAPH): saved_env = env_stack env_stack = [[False, None]] else: saved_env = None start_stack.append([sty, saved_env]) start_depth += 1 if sty.link: display, metadata = link_func(t.arg, sty) yield format.start(sty, metadata) else: yield format.start(sty, t.arg) elif t.op == END: if sty == MACRO: pass else: assert start_stack[-1][0] == sty, (sty, start_stack) oldsty, oldenv = start_stack.pop() if oldenv is not None: assert env_stack == [[False, None]], (sty, env_stack) env_stack = oldenv start_depth -= 1 if sty.link: display, metadata = link_func(t.arg, sty) yield format.end(sty, metadata) else: yield format.end(sty, t.arg) elif t.op == ENTITY: if sty == MACRO: macroname, argstr = t.arg if env_stack and env_stack[-1][0] != PARAGRAPH: macro_env_stack.append(env_stack[-1]) it = itertools.chain(self.macro_func(macroname, argstr), [Token(self.END_ENTITY_MACRO)], it) elif sty == ENV_BREAK: # Environment closing handled above; arg may have been set # then. yield format.entity(sty, t.arg) elif sty.link: display, metadata = link_func(t.arg, sty) yield format.start(sty, metadata) yield format.text(display) yield format.end(sty, metadata) else: yield format.entity(sty) elif t.op == TEXT: yield format.text(t.arg) elif t.op == ERROR: yield format.error(t.arg) elif t.op == LITERAL: yield t.arg else: assert False, t if __name__ == '__main__': print [dir(t) for t in Parser().tokenize('This* is a **bold** [[Plan|plan]] at ~** http://plan.com/.')]