nickovchinnikov's picture
Init
9d61c9b
from dataclasses import dataclass
from typing import Dict
from models.config import PreprocessLangType
# TODO: now we only support english, but we need to support other languages!
SUPPORTED_LANGUAGES = [
"bg",
"cs",
"de",
"en",
"es",
"fr",
"ha",
"hr",
"ko",
"pl",
"pt",
"ru",
"sv",
"sw",
"th",
"tr",
"uk",
"vi",
"zh",
]
# Mappings from symbol to numeric ID and vice versa:
lang2id = {s: i for i, s in enumerate(SUPPORTED_LANGUAGES)}
id2lang = dict(enumerate(SUPPORTED_LANGUAGES))
@dataclass
class LangItem:
r"""A class for storing language information."""
phonemizer: str
phonemizer_espeak: str
nemo: str
processing_lang_type: PreprocessLangType
langs_map: Dict[str, LangItem] = {
"en": LangItem(
phonemizer="en_us",
phonemizer_espeak="en-us",
nemo="en",
processing_lang_type="english_only",
),
}
def get_lang_map(lang: str) -> LangItem:
r"""Returns a LangItem object for the given language.
Args:
lang (str): The language to get the LangItem for.
Raises:
ValueError: If the language is not supported.
Returns:
LangItem: The LangItem object for the given language.
"""
if lang not in langs_map:
raise ValueError(f"Language {lang} is not supported!")
return langs_map[lang]