import text.ger_to_ipa as ipa from text.ger_to_ipa import normalize_numbers import re from unidecode import unidecode # List of (ipa, lazy ipa) pairs: _lazy_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [ ('r', 'ɹ'), ('æ', 'e'), ('ɑ', 'a'), ('ɔ', 'o'), ('ð', 'z'), ('θ', 's'), ('ɛ', 'e'), ('ɪ', 'i'), ('ʊ', 'u'), ('ʒ', 'ʥ'), ('ʤ', 'ʥ'), ('ˈ', '↓'), ]] # List of (ipa, lazy ipa2) pairs: _lazy_ipa2 = [(re.compile('%s' % x[0]), x[1]) for x in [ ('r', 'ɹ'), ('ʤ', 'dʒ'), ('ʧ', 'tʃ'), ('r', 'ɹ'), ('æ', 'e'), ('ɑ', 'a'), ('ɔ', 'o'), ('ð', 'z'), ('θ', 's'), ('ɛ', 'e'), ('ɪ', 'i'), ('ʊ', 'u'), ('ʒ', 'ʥ'), ('ʤ', 'ʥ'), ('ˈ', '↓'), ('ɡ', 'g'), ('ɔ', 'o'), ('ɪ','i') ]] # List of (ipa, ipa2) pairs _ipa_to_ipa2 = [(re.compile('%s' % x[0]), x[1]) for x in [ ('æ', 'e'), ('ɑ', 'a'), ('ɔ', 'o'), ('β', 'ss'), ('ɛ', 'e'), ('ɪ', 'i'), ('ʊ', 'u'), ('ɡ', 'g'), ('ɔ', 'o'), ('ɪ','i'), ]] def german_to_ipa(text): text = unidecode(text).lower() text = normalize_numbers(text) phonemes = ipa.convert(text) phonemes = ipa.collapse_whitespace(phonemes) for regex, replacement in _lazy_ipa2: phonemes = re.sub(regex, replacement, phonemes) return phonemes def german_to_lazy_ipa2(text): text = german_to_ipa(text) for regex, replacement in _lazy_ipa2: text = re.sub(regex, replacement, text) return text