HYTTS / text /german.py
云淡风轻
init
cbf648c
import text.ger_to_ipa as ipa
from text.ger_to_ipa import normalize_numbers
import re
from unidecode import unidecode
# List of (ipa, lazy ipa) pairs:
_lazy_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [
('r', 'ɹ'),
('æ', 'e'),
('ɑ', 'a'),
('ɔ', 'o'),
('ð', 'z'),
('θ', 's'),
('ɛ', 'e'),
('ɪ', 'i'),
('ʊ', 'u'),
('ʒ', 'ʥ'),
('ʤ', 'ʥ'),
('ˈ', '↓'),
]]
# List of (ipa, lazy ipa2) pairs:
_lazy_ipa2 = [(re.compile('%s' % x[0]), x[1]) for x in [
('r', 'ɹ'),
('ʤ', 'dʒ'),
('ʧ', 'tʃ'),
('r', 'ɹ'),
('æ', 'e'),
('ɑ', 'a'),
('ɔ', 'o'),
('ð', 'z'),
('θ', 's'),
('ɛ', 'e'),
('ɪ', 'i'),
('ʊ', 'u'),
('ʒ', 'ʥ'),
('ʤ', 'ʥ'),
('ˈ', '↓'),
('ɡ', 'g'),
('ɔ', 'o'),
('ɪ','i')
]]
# List of (ipa, ipa2) pairs
_ipa_to_ipa2 = [(re.compile('%s' % x[0]), x[1]) for x in [
('æ', 'e'),
('ɑ', 'a'),
('ɔ', 'o'),
('β', 'ss'),
('ɛ', 'e'),
('ɪ', 'i'),
('ʊ', 'u'),
('ɡ', 'g'),
('ɔ', 'o'),
('ɪ','i'),
]]
def german_to_ipa(text):
text = unidecode(text).lower()
text = normalize_numbers(text)
phonemes = ipa.convert(text)
phonemes = ipa.collapse_whitespace(phonemes)
for regex, replacement in _lazy_ipa2:
phonemes = re.sub(regex, replacement, phonemes)
return phonemes
def german_to_lazy_ipa2(text):
text = german_to_ipa(text)
for regex, replacement in _lazy_ipa2:
text = re.sub(regex, replacement, text)
return text