import re, json FNAMES = { "English": {"Spanish": {"file": "en_es.txt", "regex": "([a-z]+)\s+([a-z]+) \("}, "French": {"file": "en_fr.txt", "regex": "([a-z]+)\s+([a-z]+) \(" } }, "German": {"Italian": {"file": "de_it.txt", "regex": "([a-z]+)\s+([a-z]+)\s*$"} } } output = {} r = re.compile("([a-z]+)\s+([a-z]+) \(") for lang_from in FNAMES: output[lang_from] = {} for lang_to in FNAMES[lang_from]: params = FNAMES[lang_from][lang_to] f = open(params["file"]) d = {} r = re.compile(params["regex"]) n = 0 total = 0 for line in f: total+=1 m = r.match(line) if m is not None: n+=1 d[m.group(1)] = m.group(2) output[lang_from][lang_to] = d print "%s -> %s: keeping %s words out of %s" %(lang_from, lang_to, n,total) f.close() f_out = open("dicts.json", "w") f_out.write("var dicts="+json.dumps(output)) f_out.close();