|
|
|
|
|
|
|
|
|
|
|
from phonemizer.backend import EspeakBackend |
|
from phonemizer.separator import Separator |
|
from phonemizer.utils import list2str, str2list |
|
from typing import List, Union |
|
import os |
|
import json |
|
import sys |
|
|
|
|
|
separator = Separator(word=" _ ", syllable="|", phone=" ") |
|
|
|
phonemizer_zh = EspeakBackend( |
|
"cmn", preserve_punctuation=False, with_stress=False, language_switch="remove-flags" |
|
) |
|
|
|
|
|
phonemizer_en = EspeakBackend( |
|
"en-us", |
|
preserve_punctuation=False, |
|
with_stress=False, |
|
language_switch="remove-flags", |
|
) |
|
|
|
|
|
phonemizer_ja = EspeakBackend( |
|
"ja", preserve_punctuation=False, with_stress=False, language_switch="remove-flags" |
|
) |
|
|
|
|
|
phonemizer_ko = EspeakBackend( |
|
"ko", preserve_punctuation=False, with_stress=False, language_switch="remove-flags" |
|
) |
|
|
|
|
|
phonemizer_fr = EspeakBackend( |
|
"fr-fr", |
|
preserve_punctuation=False, |
|
with_stress=False, |
|
language_switch="remove-flags", |
|
) |
|
|
|
|
|
phonemizer_de = EspeakBackend( |
|
"de", preserve_punctuation=False, with_stress=False, language_switch="remove-flags" |
|
) |
|
|
|
|
|
|
|
lang2backend = { |
|
"zh": phonemizer_zh, |
|
"ja": phonemizer_ja, |
|
"en": phonemizer_en, |
|
"fr": phonemizer_fr, |
|
"ko": phonemizer_ko, |
|
"de": phonemizer_de, |
|
} |
|
|
|
with open("./diffrhythm/g2p/utils/mls_en.json", "r") as f: |
|
json_data = f.read() |
|
token = json.loads(json_data) |
|
|
|
|
|
def phonemizer_g2p(text, language): |
|
langbackend = lang2backend[language] |
|
phonemes = _phonemize( |
|
langbackend, |
|
text, |
|
separator, |
|
strip=True, |
|
njobs=1, |
|
prepend_text=False, |
|
preserve_empty_lines=False, |
|
) |
|
token_id = [] |
|
if isinstance(phonemes, list): |
|
for phone in phonemes: |
|
phonemes_split = phone.split(" ") |
|
token_id.append([token[p] for p in phonemes_split if p in token]) |
|
else: |
|
phonemes_split = phonemes.split(" ") |
|
token_id = [token[p] for p in phonemes_split if p in token] |
|
return phonemes, token_id |
|
|
|
|
|
def _phonemize( |
|
backend, |
|
text: Union[str, List[str]], |
|
separator: Separator, |
|
strip: bool, |
|
njobs: int, |
|
prepend_text: bool, |
|
preserve_empty_lines: bool, |
|
): |
|
"""Auxiliary function to phonemize() |
|
|
|
Does the phonemization and returns the phonemized text. Raises a |
|
RuntimeError on error. |
|
|
|
""" |
|
|
|
text_type = type(text) |
|
|
|
|
|
text = [line.strip(os.linesep) for line in str2list(text)] |
|
|
|
|
|
if preserve_empty_lines: |
|
empty_lines = [n for n, line in enumerate(text) if not line.strip()] |
|
|
|
|
|
text = [line for line in text if line.strip()] |
|
|
|
if text: |
|
|
|
phonemized = backend.phonemize( |
|
text, separator=separator, strip=strip, njobs=njobs |
|
) |
|
else: |
|
phonemized = [] |
|
|
|
|
|
if preserve_empty_lines: |
|
for i in empty_lines: |
|
if prepend_text: |
|
text.insert(i, "") |
|
phonemized.insert(i, "") |
|
|
|
|
|
|
|
if prepend_text: |
|
return list(zip(text, phonemized)) |
|
if text_type == str: |
|
return list2str(phonemized) |
|
return phonemized |
|
|