Spaces:
Build error
Build error
File size: 1,554 Bytes
cbf648c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import text.ger_to_ipa as ipa
from text.ger_to_ipa import normalize_numbers
import re
from unidecode import unidecode
# List of (ipa, lazy ipa) pairs:
_lazy_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [
('r', 'ɹ'),
('æ', 'e'),
('ɑ', 'a'),
('ɔ', 'o'),
('ð', 'z'),
('θ', 's'),
('ɛ', 'e'),
('ɪ', 'i'),
('ʊ', 'u'),
('ʒ', 'ʥ'),
('ʤ', 'ʥ'),
('ˈ', '↓'),
]]
# List of (ipa, lazy ipa2) pairs:
_lazy_ipa2 = [(re.compile('%s' % x[0]), x[1]) for x in [
('r', 'ɹ'),
('ʤ', 'dʒ'),
('ʧ', 'tʃ'),
('r', 'ɹ'),
('æ', 'e'),
('ɑ', 'a'),
('ɔ', 'o'),
('ð', 'z'),
('θ', 's'),
('ɛ', 'e'),
('ɪ', 'i'),
('ʊ', 'u'),
('ʒ', 'ʥ'),
('ʤ', 'ʥ'),
('ˈ', '↓'),
('ɡ', 'g'),
('ɔ', 'o'),
('ɪ','i')
]]
# List of (ipa, ipa2) pairs
_ipa_to_ipa2 = [(re.compile('%s' % x[0]), x[1]) for x in [
('æ', 'e'),
('ɑ', 'a'),
('ɔ', 'o'),
('β', 'ss'),
('ɛ', 'e'),
('ɪ', 'i'),
('ʊ', 'u'),
('ɡ', 'g'),
('ɔ', 'o'),
('ɪ','i'),
]]
def german_to_ipa(text):
text = unidecode(text).lower()
text = normalize_numbers(text)
phonemes = ipa.convert(text)
phonemes = ipa.collapse_whitespace(phonemes)
for regex, replacement in _lazy_ipa2:
phonemes = re.sub(regex, replacement, phonemes)
return phonemes
def german_to_lazy_ipa2(text):
text = german_to_ipa(text)
for regex, replacement in _lazy_ipa2:
text = re.sub(regex, replacement, text)
return text
|