Spaces:
Running
Running
File size: 5,144 Bytes
9b2107c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
import importlib
from typing import List
import gruut
from gruut_ipa import IPA
from TTS.tts.utils.text.phonemizers.base import BasePhonemizer
from TTS.tts.utils.text.punctuation import Punctuation
# Table for str.translate to fix gruut/TTS phoneme mismatch
GRUUT_TRANS_TABLE = str.maketrans("g", "ɡ")
class Gruut(BasePhonemizer):
"""Gruut wrapper for G2P
Args:
language (str):
Valid language code for the used backend.
punctuations (str):
Characters to be treated as punctuation. Defaults to `Punctuation.default_puncs()`.
keep_puncs (bool):
If true, keep the punctuations after phonemization. Defaults to True.
use_espeak_phonemes (bool):
If true, use espeak lexicons instead of default Gruut lexicons. Defaults to False.
keep_stress (bool):
If true, keep the stress characters after phonemization. Defaults to False.
Example:
>>> from TTS.tts.utils.text.phonemizers.gruut_wrapper import Gruut
>>> phonemizer = Gruut('en-us')
>>> phonemizer.phonemize("Be a voice, not an! echo?", separator="|")
'b|i| ə| v|ɔ|ɪ|s, n|ɑ|t| ə|n! ɛ|k|o|ʊ?'
"""
def __init__(
self,
language: str,
punctuations=Punctuation.default_puncs(),
keep_puncs=True,
use_espeak_phonemes=False,
keep_stress=False,
):
super().__init__(language, punctuations=punctuations, keep_puncs=keep_puncs)
self.use_espeak_phonemes = use_espeak_phonemes
self.keep_stress = keep_stress
@staticmethod
def name():
return "gruut"
def phonemize_gruut(self, text: str, separator: str = "|", tie=False) -> str: # pylint: disable=unused-argument
"""Convert input text to phonemes.
Gruut phonemizes the given `str` by seperating each phoneme character with `separator`, even for characters
that constitude a single sound.
It doesn't affect 🐸TTS since it individually converts each character to token IDs.
Examples::
"hello how are you today?" -> `h|ɛ|l|o|ʊ| h|a|ʊ| ɑ|ɹ| j|u| t|ə|d|e|ɪ`
Args:
text (str):
Text to be converted to phonemes.
tie (bool, optional) : When True use a '͡' character between
consecutive characters of a single phoneme. Else separate phoneme
with '_'. This option requires espeak>=1.49. Default to False.
"""
ph_list = []
for sentence in gruut.sentences(text, lang=self.language, espeak=self.use_espeak_phonemes):
for word in sentence:
if word.is_break:
# Use actual character for break phoneme (e.g., comma)
if ph_list:
# Join with previous word
ph_list[-1].append(word.text)
else:
# First word is punctuation
ph_list.append([word.text])
elif word.phonemes:
# Add phonemes for word
word_phonemes = []
for word_phoneme in word.phonemes:
if not self.keep_stress:
# Remove primary/secondary stress
word_phoneme = IPA.without_stress(word_phoneme)
word_phoneme = word_phoneme.translate(GRUUT_TRANS_TABLE)
if word_phoneme:
# Flatten phonemes
word_phonemes.extend(word_phoneme)
if word_phonemes:
ph_list.append(word_phonemes)
ph_words = [separator.join(word_phonemes) for word_phonemes in ph_list]
ph = f"{separator} ".join(ph_words)
return ph
def _phonemize(self, text, separator):
return self.phonemize_gruut(text, separator, tie=False)
def is_supported_language(self, language):
"""Returns True if `language` is supported by the backend"""
return gruut.is_language_supported(language)
@staticmethod
def supported_languages() -> List:
"""Get a dictionary of supported languages.
Returns:
List: List of language codes.
"""
return list(gruut.get_supported_languages())
def version(self):
"""Get the version of the used backend.
Returns:
str: Version of the used backend.
"""
return gruut.__version__
@classmethod
def is_available(cls):
"""Return true if ESpeak is available else false"""
return importlib.util.find_spec("gruut") is not None
if __name__ == "__main__":
e = Gruut(language="en-us")
print(e.supported_languages())
print(e.version())
print(e.language)
print(e.name())
print(e.is_available())
e = Gruut(language="en-us", keep_puncs=False)
print("`" + e.phonemize("hello how are you today?") + "`")
e = Gruut(language="en-us", keep_puncs=True)
print("`" + e.phonemize("hello how, are you today?") + "`")
|