Spaces:
Build error
Build error
import text.ger_to_ipa as ipa | |
from text.ger_to_ipa import normalize_numbers | |
import re | |
from unidecode import unidecode | |
# List of (ipa, lazy ipa) pairs: | |
_lazy_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [ | |
('r', 'ɹ'), | |
('æ', 'e'), | |
('ɑ', 'a'), | |
('ɔ', 'o'), | |
('ð', 'z'), | |
('θ', 's'), | |
('ɛ', 'e'), | |
('ɪ', 'i'), | |
('ʊ', 'u'), | |
('ʒ', 'ʥ'), | |
('ʤ', 'ʥ'), | |
('ˈ', '↓'), | |
]] | |
# List of (ipa, lazy ipa2) pairs: | |
_lazy_ipa2 = [(re.compile('%s' % x[0]), x[1]) for x in [ | |
('r', 'ɹ'), | |
('ʤ', 'dʒ'), | |
('ʧ', 'tʃ'), | |
('r', 'ɹ'), | |
('æ', 'e'), | |
('ɑ', 'a'), | |
('ɔ', 'o'), | |
('ð', 'z'), | |
('θ', 's'), | |
('ɛ', 'e'), | |
('ɪ', 'i'), | |
('ʊ', 'u'), | |
('ʒ', 'ʥ'), | |
('ʤ', 'ʥ'), | |
('ˈ', '↓'), | |
('ɡ', 'g'), | |
('ɔ', 'o'), | |
('ɪ','i') | |
]] | |
# List of (ipa, ipa2) pairs | |
_ipa_to_ipa2 = [(re.compile('%s' % x[0]), x[1]) for x in [ | |
('æ', 'e'), | |
('ɑ', 'a'), | |
('ɔ', 'o'), | |
('β', 'ss'), | |
('ɛ', 'e'), | |
('ɪ', 'i'), | |
('ʊ', 'u'), | |
('ɡ', 'g'), | |
('ɔ', 'o'), | |
('ɪ','i'), | |
]] | |
def german_to_ipa(text): | |
text = unidecode(text).lower() | |
text = normalize_numbers(text) | |
phonemes = ipa.convert(text) | |
phonemes = ipa.collapse_whitespace(phonemes) | |
for regex, replacement in _lazy_ipa2: | |
phonemes = re.sub(regex, replacement, phonemes) | |
return phonemes | |
def german_to_lazy_ipa2(text): | |
text = german_to_ipa(text) | |
for regex, replacement in _lazy_ipa2: | |
text = re.sub(regex, replacement, text) | |
return text | |