File size: 1,379 Bytes
9d61c9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from dataclasses import dataclass
from typing import Dict

from models.config import PreprocessLangType

# TODO: now we only support english, but we need to support other languages!
SUPPORTED_LANGUAGES = [
    "bg",
    "cs",
    "de",
    "en",
    "es",
    "fr",
    "ha",
    "hr",
    "ko",
    "pl",
    "pt",
    "ru",
    "sv",
    "sw",
    "th",
    "tr",
    "uk",
    "vi",
    "zh",
]

# Mappings from symbol to numeric ID and vice versa:
lang2id = {s: i for i, s in enumerate(SUPPORTED_LANGUAGES)}
id2lang = dict(enumerate(SUPPORTED_LANGUAGES))

@dataclass
class LangItem:
    r"""A class for storing language information."""

    phonemizer: str
    phonemizer_espeak: str
    nemo: str
    processing_lang_type: PreprocessLangType

langs_map: Dict[str, LangItem] = {
    "en": LangItem(
        phonemizer="en_us",
        phonemizer_espeak="en-us",
        nemo="en",
        processing_lang_type="english_only",
    ),
}

def get_lang_map(lang: str) -> LangItem:
    r"""Returns a LangItem object for the given language.

    Args:
        lang (str): The language to get the LangItem for.

    Raises:
        ValueError: If the language is not supported.

    Returns:
        LangItem: The LangItem object for the given language.
    """
    if lang not in langs_map:
        raise ValueError(f"Language {lang} is not supported!")
    return langs_map[lang]