import itertools
from html import ampersand_encode
class WordPart(object):
def __init__(self, word, start, end):
self._word = word
self._start = start
self._end = end
pre = property(lambda self: self._word[:self._start])
part = property(lambda self: self._word[self._start:self._end])
post = property(lambda self: self._word[self._end:])
def to_html(self):
return '%s%s%s' % (self.pre,
self.part,
self.post)
def __eq__(self, other):
return self.pre == other.pre and self.part == other.part and self.post == other.post
def has_same_word(self, other):
return self._word == other._word
def is_compatible_with(self, other):
return self.has_same_word(other) and self._end == other._start
def combine(self, other):
if not self.is_compatible_with(other):
raise ValueError
return WordPart(self._word, self._start, other._end)
class Sound(str):
def __new__(cls, ipa, orig_pos=None, word_part=None, transliteration=None):
self = super().__new__(cls, ipa)
self._orig_pos = orig_pos
self._word_part = word_part
self._transliteration = transliteration or ''
return self
def _get_original_position(self):
return self._orig_pos
original_position = property(_get_original_position)
def to_html(self, include_ipa=True, include_word_part=True, include_transliteration=True):
rtn = '
\n'
if include_word_part:
blank_space = '
%s' % self._word_part.pre
else:
blank_space = ''
if include_ipa:
rtn += '
%s%s
\n' % (blank_space, ampersand_encode(self))
if include_transliteration:
rtn += '
%s%s
\n' % (blank_space, self._transliteration)
if include_word_part:
rtn += '
%s
\n' % self._word_part.to_html()
rtn += '
'
return rtn
def is_compatible_with(self, other):
return self._word_part.is_compatible_with(other._word_part)
def combine(self, other):
if not self.is_compatible_with(other):
raise ValueError
return Sound(self + other, word_part=self._word_part.combine(other._word_part))
def reduce_list(cls, sound_list):
rtn = []
cur = None
for sound in sound_list:
if cur is None:
cur = sound
elif cur.is_compatible_with(sound):
cur = cur.combine(sound)
else:
rtn.append(cur)
cur = sound
if cur is not None:
rtn.append(cur)
return rtn
reduce_list = classmethod(reduce_list)
def _make_word_parts(word_chunks):
full_word = ''.join(word_chunks).strip('. ')
start = 0
for part in word_chunks:
end = start + len(part)
yield WordPart(full_word, start, end)
start = end
def make_sound_list(ipa_words_chunk_list, words_chunk_list, transliteration_table={}):
return [Sound(ipa_chunk, i, word_part, (transliteration_table[ipa_chunk] if ipa_chunk in transliteration_table else None))
for i, (ipa_chunk, word_part) in
enumerate(itertools.chain(*[zip(ipa, eng) for ipa, eng in zip(ipa_words_chunk_list, map(_make_word_parts, words_chunk_list))]))]