from pygments.lexer import RegexLexer, include from pygments.token import * class CreoleLexer(RegexLexer): name = 'Creole' tokens = { 'global': [ (r'~[a-z]+://', Token.EscapedImplicitInfix), (r'~.', Token.Escape), (r'\n', Token.NL), (r'\-\-\-', Token.EmDash), (r'\-\-', Token.EnDash), (r'\`\`', Token.OpenQuote), (r"\'\'", Token.CloseQuote), (r'\`', Token.OpenSingleQuote), (r"\'", Token.SingleQuote), (r'\"', Token.DoubleQuote), (r'\.\.\.', Token.Ellipsis), (r'\<\=\>', Token.DoubleBoth), (r'\<\-\>', Token.SingleBoth), (r'\=\>', Token.DoubleRight), (r'\-\>', Token.SingleRight), (r'\<\=', Token.DoubleLeft), (r'\<\-', Token.SingleLeft), (r'[ \t]+', Token.SP), (r'\w+', Text), (r'\W', Punctuation), ], 'out_mu': [ (r'^ *[=]+', Token.Heading), (r'^[:>]+', Token.Blockquote), (r'\\\\', Token.LineBreak), (r'\*\*', Token.Inline.Bold), (r'\/\/', Token.Inline.Italic), (r'\#\#', Token.Inline.Monospace), (r'\^\^', Token.Inline.Superscript), (r'\,\,', Token.Inline.Subscript), (r'\_\_', Token.Inline.Underline), (r'\{\{\{', Token.Open.Code, 'pre'), (r'\n\<\<\/', Token.Macro.Start.Close, 'macro'), (r'\<\<\/', Token.Macro.Start.Close, 'macro'), (r'\<\<', Token.Macro.Start.Open, 'macro'), (r'\\\W\{', Token.Macro.TeX.Open, 'texmacro'), (r'\\\w+(?:\.\w+)*\{', Token.Macro.TeX.Open, 'texmacro'), (r'\\\w+(?:\.\w+)*\[(?!\[)', Token.Macro.TeX.OptArgOpen, 'texargmacro'), (r'\{\\\w+(?:\.\w+)* ', Token.Macro.TeX.ContentOpen, 'texmacro'), (r'\{\\\w+(?:\.\w+)*', Token.Macro.TeX.ContentOpen, 'texmacro'), (r'\\[^\w\n]\w', Token.Macro.TeX.SingleChar), (r'\\\w+(?:\.\w+)*', Token.Macro.TeX.Entity), (r'\{\{', Token.Start.Image, 'img'), (r'\[\[', Token.Start.Link, 'link'), (r'://', Token.LinkImplicitInfix, 'implicitlink'), ], 'out': [ include('out_mu'), include('global'), ], 'root': [ (r'^\{\{\{\n', Token.Open.Codeblock, 'codeblock'), (r'^\|\=', Token.Table.Heading, 'table'), (r'^\|', Token.Table.Cell, 'table'), (r'^\-\-\-\-', Token.Hrule), (r'\n[ ]*\n', Token.ParaBreak), include('out_mu'), (r'^ *[*](?![*])', Token.List.Unordered, 'list'), (r'^ *[#](?![#])', Token.List.Ordered, 'list'), include('global'), ], 'list': [ (r'^ *[*]+', Token.List.Unordered), (r'^ *[#]+', Token.List.Ordered), (r'\n\n', Token.ParaBreak, 'root'), include('out'), ], 'table': [ (r' *(?:\||\|\=)\n', Token.NL), (r'\|\=', Token.Table.Heading), (r'\|', Token.Table.Cell), (r'\n\n', Token.ParaBreak, 'root'), include('out'), ], 'img': [ (r'\}\}', Token.End.Image, '#pop'), (r'\|', Token.LinkPipe, 'imgout'), (r'[^}|]+', Text), (r'\}', Text), ], 'imgout': [ (r'\}\}', Token.End.Image, '#pop:2'), include('out'), ], 'link': [ (r'\]\]', Token.End.Link, '#pop'), (r'\|', Token.LinkPipe, 'linkout'), (r'[^]|]+', Text), (r'\]', Text), ], 'linkout': [ (r'\]\]', Token.End.Link, '#pop:2'), include('out'), ], 'implicitlink': [ (r'(?=[][ \t\n<>()\\"\'])', lambda lexer, match:[], '#pop'), (r'(?=##)', lambda lexer, match:[], '#pop'), (r'\w+', Token.ImplicitLinkText), (r'\W', Token.ImplicitLinkText), ], 'macro': [ (r' ?\/\>\>', Token.Macro.End.Entity, '#pop'), (r' ?\/\>', Token.Macro.End.Broken, '#pop'), (r'\>\>', Token.Macro.End.Inline, '#pop'), (r'\"', Text, 'macroquote'), (r'\w+', Text), (r'[^/> \n\w"]', Text), (r'[/>]', Text), (r'[ \n]', Token.SP), ], 'macroquote': [ (r'\"', Text, '#pop'), (r'[^"]', Text), ], 'texargmacro': [ (r'\]\[(?!\[)', Token.Macro.TeX.OptArgBreak), (r'\]\{', Token.Macro.TeX.EndOptArgs, 'texafterargmacro'), (r'\]', Token.Macro.TeX.OptArgClose, '#pop'), (r'\[', Text, 'texargmacropushed'), (r'[^][]+', Text), ], 'texargmacropushed': [ (r'\]', Text, '#pop'), (r'\[', Text, '#push'), (r'[^][]+', Text), ], 'texafterargmacro': [ (r'\}\{', Token.Macro.TeX.ArgBreak), (r'\}', Token.Macro.TeX.Close, '#pop:2'), include('out'), ], 'texmacro': [ (r'\}\{', Token.Macro.TeX.ArgBreak), (r'\}', Token.Macro.TeX.Close, '#pop'), include('out'), ], 'codeblock': [ (r'\n\}\}\}', Token.Close.Codeblock, '#pop'), (r'[^}\n]+', Text), (r'[}\n]', Text), ], 'pre': [ (r'\}\}\}', Token.Close.Code, '#pop'), (r'[^}]+', Text), (r'[}]', Text), ], }