from __future__ import with_statement from __future__ import absolute_import import sys, os, pwd, tempfile, shutil import warnings import errno import threading, copy from bazjunk.path import makedirs from . import wiki, model, flavors, custom, db, dependencies from .translators import Intermed, TRANSLATORS, ConversionFailedException from .filters import FILTERS from .benchmark import benchmarking # Performs comparably to jsonlib and marshal in my testing. from cPickle import dump, load SUFFIX = '.pickle' def cache_dir(): return "/tmp/%s.%s.bazki/cache" % (pwd.getpwuid(os.getuid())[0], custom.APP_NAME) cache_state = threading.local() # These files store data of the form: # {'revision': 17, # 'dependencies': [(ename, propname, propval_id, version), ...], # 'formats': {'txt': {'value': ..., 'metadata': ..., 'dependencies': ...}, # '.tex': {...}, ...}} # # _metadata files' formats dictionary is more like metadata, of the # form: {'final_map': {...}} # TODO(xavid): storing dependencies this way is redundant def cache_file_name(ename, prop_name): if ename is None: assert prop_name is None return os.path.join(cache_dir(), '_' + SUFFIX) else: return os.path.join(cache_dir(), ename, (prop_name or '_') + SUFFIX) def cache_data_for(ename, prop_name): pvt = (ename, prop_name) # See if we have something prevalidated if pvt in cache_state.validated: return cache_state.validated[pvt] # TODO(xavid): check cache_state.transient try: with open(cache_file_name(ename, prop_name)) as fil: ret = load(fil) except IOError: return None else: #with benchmarking('validating cache entry for %s.%s' # % (ename, prop_name)): if True: # SVN revision short-circuit. means no revision # shortcutting possible; i.e., no version control hook. rev = db.get_revision() if rev is not None and ret['revision'] == rev: cache_state.validated[pvt] = ret return ret for dep in ret['dependencies']: assert len(dep) == 4, dep dename,dpname,id,version = dep try: delement = model.Element.get(dename) except model.NoResultFound: if dpname != '__exists': return None else: if dpname == '__version': if delement.id != id or delement.version != version: return None elif dpname == '__exists': return None else: try: dpropval = delement[dpname] except KeyError: # None, None is the NoPropvalDep if id is not None or version is not None: return None else: if (dpropval.id != id or dpropval.version != version): return None # Put in transient so it'll get rewritten with the new revision. pvt = (ename, prop_name) if pvt in cache_state.transient: for f in ret['formats']: if f not in cache_state.transient[pvt]: cache_state.transient[pvt][f] = ret['formats'][f] else: cache_state.transient[pvt] = ret['formats'] return ret def cached_formats(ename, prop_name, cache_data): if cache_data is not None: ret = set(cache_data['formats'].keys()) else: ret = set() pvt = (ename, prop_name) if (pvt in cache_state.transient): ret.update(cache_state.transient[pvt].keys()) return ret def get_from_cache(ename, prop_name, format, cache_data=None): pvt = (ename, prop_name) # TODO(xavid): move into cache_data_for() if (pvt in cache_state.transient and format in cache_state.transient[pvt]): dct = copy.deepcopy( cache_state.transient[pvt][format]) else: if cache_data is None: cache_data = cache_data_for(ename, prop_name) if cache_data is None or format not in cache_data['formats']: return None else: dct = cache_data['formats'][format] return dct def cache_propval(ename, prop_name, format, value, deps, metadata, cache_data=None): for d in deps: assert len(d) == 4, d if format in UNICODE_FORMATS: assert isinstance(value, unicode), repr(value) elif prop_name != METADATA: assert isinstance(value, str), repr(value) pvt = (ename, prop_name) if pvt not in cache_state.transient: if cache_data is None: cache_data = cache_data_for(ename, prop_name) if cache_data is None: cache_state.transient[pvt] = {} else: cache_state.transient[pvt] = dict(cache_data['formats']) cache_state.transient[pvt][format] = dict( value=value, metadata=metadata, dependencies=deps) METADATA = '_metadata' def get_element_metadata(element, key): cache_entry = get_from_cache(element.ename, METADATA, key) if cache_entry is None: with benchmarking('calculating %s metadata for %s' % (key, element.ename)): value, deps = element.calculate_metadata(key) cache_propval(element.ename, METADATA, key, value, deps, {}) else: value = cache_entry['value'] return value def invalidate_cache(): cache_state.transient = {} cache_state.validated = {} try: shutil.rmtree(cache_dir()) except OSError, e: if e.errno == errno.ENOENT: pass else: raise def flatten_filters(filters): # metadata is not a cachable filter, somehow assert 'metadata' not in filters mfilters = dict(filters) if 'let' in mfilters: assert 'elet' not in mfilters assert 'mlet' not in mfilters elet = {} mlet = {} for k,v in mfilters['let'].items(): if isinstance(v, model.Element): elet[k] = v else: assert isinstance(v, str) assert ';' not in v assert '|' not in v mlet[k] = v del mfilters['let'] if len(elet) > 0: mfilters['elet'] = ';'.join("%s=%s" % (k,v.ename) for k,v in elet.items()) if len(mlet) > 0: mfilters['mlet'] = ';'.join("%s=%s" % (k,v) for k,v in elet.items()) assert all(isinstance(v, basestring) for v in mfilters.values()), mfilters if len(mfilters) > 0: return '|'+u'|'.join('%s=%s'%(k, mfilters[k]) for k in sorted(mfilters)).encode('utf-8') else: return '' # These extensions require proper image metadata for resulting forms # to be produced properly, and thus can't be cached if they have images. # TODO(xavid): either cache image metadata or do something more clever. IMAGE_METADATA_FORMATS = ('tex', '.tex') # Formats where cached values should be treated as utf-8-encoded Unicode, # not raw bytes in a str. UNICODE_FORMATS = ('txt', 'html', 'tex') def apply_filters(propval, rendering, rendering_ext, deps, metadata, flist, cache_tag=None): if rendering_ext in UNICODE_FORMATS: assert isinstance(rendering, unicode), repr(rendering) else: assert isinstance(rendering, str), repr(rendering) if propval is not None: ename = propval.element.ename prop_name = propval.prop.name else: ename = prop_name = None im = Intermed(metadata) im.setData(rendering, rendering_ext) im.addDeps(deps) for f in flist: # Only hold on to data that's already in memory, # for efficiency. (The only case we care about, # html => .html, will be in memory.) # We're not cachable if we have a dependency on DISCORDIA # or we have filters we haven't dealt with or image metadata # we need. if (cache_tag is not None and not im.isPath() and not dependencies.DISCORDIA in im.getDeps() and len(im.metadata().get('filters', {})) == 0 and (im.getExtension() not in IMAGE_METADATA_FORMATS or len(im.metadata().get('images', [])) == 0)): oldext = im.getExtension() olddeps = set(im.getDeps()) olddata = im.asData() oldmetadata = dict(im.metadata()) else: olddata = None f(im) # Cache it if we just developed a dependency on DISCORDIA and # we had some prior cacheable data. if (olddata is not None and dependencies.DISCORDIA in im.getDeps()): cache_propval(ename, prop_name, oldext+cache_tag, olddata, olddeps, oldmetadata) filters = im.metadata().get('filters', {}) for filt in list(filters): FILTERS[im.getExtension()][filt](im,filters[filt]) # im now holds the return data and extension, let's see # if we can cache it value = im.asData() ext = im.getExtension() deps = im.getDeps() if propval is not None: if not flavors.FLAVORS[propval.prop.flavor].binary: if ext in UNICODE_FORMATS: assert isinstance(value, unicode), [repr(value), propval.prop.name] else: assert isinstance(value, str), [repr(value), propval.prop.name] # Do dependencies if cache_tag is not None: if (dependencies.DISCORDIA not in deps and (ext not in IMAGE_METADATA_FORMATS or len(im.metadata().get('images', [])) == 0)): cache_propval(ename, prop_name, ext+cache_tag, value, deps, im.metadata()) else: assert ext != (model.TXT,) return im def cached(ename, prop_name, format): entry = get_from_cache(ename, prop_name, format=format) if entry is None: return None else: return entry['value'] def convert_any(ename, prop_name, dests, filters={}, reentrant=False, method='convert', offset=0): #with benchmarking('%sing %s.%s as %s' # % (method, ename, prop_name, # dests), # offset=offset + 1): if True: explicit_metadata = {} if filters: # metadata is defined to not affect anything cacheable, somehow. if 'metadata' in filters: explicit_metadata = filters['metadata'] del filters['metadata'] flat = flatten_filters(filters) else: flat = '' cache_data = cache_data_for(ename, prop_name) cached = cached_formats(ename, prop_name, cache_data) for f in dests: if f in cached: ce = get_from_cache(ename, prop_name, f, cache_data=cache_data) im = Intermed(ce['metadata']) im.setData(ce['value'], f) return im #print >>sys.stderr, '%s not in cache data for %s.%s' % (dests, ename, # prop_name) propval = model.Element.get(ename)[prop_name] flav = flavors.FLAVORS[propval.prop.flavor] exts = flav.getExtensions(propval) new = dict((d,([],d)) for d in dests) tried = set() while len(new) > 0: tried.update(new.keys()) for n in new: if n in exts or n+flat in cached: flist,dest = new[n] let = filters.pop('let', {}) if n+flat in cached: ce = get_from_cache(ename, prop_name, n+flat, cache_data=cache_data) assert ce is not None, (cached, n+flat) value = ce['value'] deps = ce['dependencies'] metadata = explicit_metadata else: value,deps,metadata = wiki.evaluate( propval, n, let=let, reentrant=reentrant) metadata.update(explicit_metadata) metadata['filters'] = filters metadata['let'] = let metadata['element'] = ename im = apply_filters(propval, value, n, deps, metadata, flist, flat) return im newnew = {} for n in new: flist,dest = new[n] if n in TRANSLATORS: for t in TRANSLATORS[n]: if t not in tried: newnew[t] = ([TRANSLATORS[n][t]]+flist,dest) new = newnew raise ConversionFailedException("Couldn't convert to any of %s!" %', '.join(dests)) def convert_markup(markup, dest, global_metadata={}, cacheable_as=None): """Convert a string of explicit markup to the given dest format.""" if cacheable_as is not None: ce = get_from_cache(None, None, format=dest+cacheable_as) if ce is not None: if dest in UNICODE_FORMATS: return unicode(ce['value'],'utf-8') else: return ce['value'] new = {dest: []} tried = set() while len(new) > 0: tried.update(new.keys()) for n in new: if n in flavors.FORMATS: flist = new[n] rendered, deps, metadata = wiki.evaluate(markup, n, element=None, flavor=flavors.text) metadata.update(global_metadata) im = apply_filters(None, rendered, n, deps, metadata, flist, cacheable_as) assert im.getExtension() == dest return im.asData() newnew = {} for n in new: flist = new[n] if n in TRANSLATORS: for t in TRANSLATORS[n]: if t not in tried: newnew[t] = [TRANSLATORS[n][t]]+flist new = newnew raise ConversionFailedException("Couldn't convert list to %s!" % dest) def render(element, prop_name, format='txt'): return convert_any(element.ename, prop_name, [format]).asData() def render_raw(ename, prop_name): # No assertion for efficiency, but this documents that prop_name is assumed # to be of flavor raw, and thus not have external dependencies. return convert_any(ename, prop_name, ['txt']).asData() class cache_hook(object): @staticmethod def begin(): cache_hook.clear() @staticmethod def clear(): cache_state.transient = {} cache_state.validated = {} @staticmethod def commit(): for ename, prop_name in cache_state.transient: destfile = cache_file_name(ename, prop_name) deps = set() formats = {} pvt = (ename, prop_name) for format in cache_state.transient[pvt]: formats[format] = dict(cache_state.transient[ pvt][format]) deps.update(formats[format]['dependencies']) assert dependencies.DISCORDIA not in deps fil = tempfile.NamedTemporaryFile(delete=False) dump({'dependencies': deps, 'formats': formats, 'revision': db.get_revision()}, fil.file) fil.close() makedirs(os.path.dirname(destfile)) os.rename(fil.name, destfile) @staticmethod def abort(): del cache_state.transient del cache_state.validated