DiffRhythm / diffrhythm /g2p /g2p /text_tokenizers.py
ing0's picture
infer
b96e750
raw
history blame
3.2 kB
# Copyright (c) 2024 Amphion.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import re
import os
from typing import List, Pattern, Union
from phonemizer.utils import list2str, str2list
from phonemizer.backend import EspeakBackend
from phonemizer.backend.espeak.language_switch import LanguageSwitch
from phonemizer.backend.espeak.words_mismatch import WordMismatch
from phonemizer.punctuation import Punctuation
from phonemizer.separator import Separator
class TextTokenizer:
"""Phonemize Text."""
def __init__(
self,
language="en-us",
backend="espeak",
separator=Separator(word="|_|", syllable="-", phone="|"),
preserve_punctuation=True,
with_stress: bool = False,
tie: Union[bool, str] = False,
language_switch: LanguageSwitch = "remove-flags",
words_mismatch: WordMismatch = "ignore",
) -> None:
self.preserve_punctuation_marks = ",.?!;:'…"
self.backend = EspeakBackend(
language,
punctuation_marks=self.preserve_punctuation_marks,
preserve_punctuation=preserve_punctuation,
with_stress=with_stress,
tie=tie,
language_switch=language_switch,
words_mismatch=words_mismatch,
)
self.separator = separator
# convert chinese punctuation to english punctuation
def convert_chinese_punctuation(self, text: str) -> str:
text = text.replace(",", ",")
text = text.replace("。", ".")
text = text.replace("!", "!")
text = text.replace("?", "?")
text = text.replace(";", ";")
text = text.replace(":", ":")
text = text.replace("、", ",")
text = text.replace("‘", "'")
text = text.replace("’", "'")
text = text.replace("⋯", "…")
text = text.replace("···", "…")
text = text.replace("・・・", "…")
text = text.replace("...", "…")
return text
def __call__(self, text, strip=True) -> List[str]:
text_type = type(text)
normalized_text = []
for line in str2list(text):
line = self.convert_chinese_punctuation(line.strip())
line = re.sub(r"[^\w\s_,\.\?!;:\'…]", "", line)
line = re.sub(r"\s*([,\.\?!;:\'…])\s*", r"\1", line)
line = re.sub(r"\s+", " ", line)
normalized_text.append(line)
# print("Normalized test: ", normalized_text[0])
phonemized = self.backend.phonemize(
normalized_text, separator=self.separator, strip=strip, njobs=1
)
if text_type == str:
phonemized = re.sub(r"([,\.\?!;:\'…])", r"|\1|", list2str(phonemized))
phonemized = re.sub(r"\|+", "|", phonemized)
phonemized = phonemized.rstrip("|")
else:
for i in range(len(phonemized)):
phonemized[i] = re.sub(r"([,\.\?!;:\'…])", r"|\1|", phonemized[i])
phonemized[i] = re.sub(r"\|+", "|", phonemized[i])
phonemized[i] = phonemized[i].rstrip("|")
return phonemized