Jekyll2000 commited on
Commit
36e11f1
·
verified ·
1 Parent(s): 6888976

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +340 -0
app.py ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import lru_cache
2
+
3
+ import sherpa_onnx
4
+ from huggingface_hub import hf_hub_download
5
+
6
+
7
+ def get_file(
8
+ repo_id: str,
9
+ filename: str,
10
+ subfolder: str = ".",
11
+ ) -> str:
12
+ model_filename = hf_hub_download(
13
+ repo_id=repo_id,
14
+ filename=filename,
15
+ subfolder=subfolder,
16
+ )
17
+ return model_filename
18
+
19
+
20
+ @lru_cache(maxsize=10)
21
+ def _get_vits_vctk(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
22
+ assert repo_id == "csukuangfj/vits-vctk"
23
+
24
+ model = get_file(
25
+ repo_id=repo_id,
26
+ filename="vits-vctk.onnx",
27
+ subfolder=".",
28
+ )
29
+
30
+ lexicon = get_file(
31
+ repo_id=repo_id,
32
+ filename="lexicon.txt",
33
+ subfolder=".",
34
+ )
35
+
36
+ tokens = get_file(
37
+ repo_id=repo_id,
38
+ filename="tokens.txt",
39
+ subfolder=".",
40
+ )
41
+
42
+ tts_config = sherpa_onnx.OfflineTtsConfig(
43
+ model=sherpa_onnx.OfflineTtsModelConfig(
44
+ vits=sherpa_onnx.OfflineTtsVitsModelConfig(
45
+ model=model,
46
+ lexicon=lexicon,
47
+ tokens=tokens,
48
+ length_scale=1.0 / speed,
49
+ ),
50
+ provider="cpu",
51
+ debug=True,
52
+ num_threads=2,
53
+ )
54
+ )
55
+ tts = sherpa_onnx.OfflineTts(tts_config)
56
+
57
+ return tts
58
+
59
+
60
+ @lru_cache(maxsize=10)
61
+ def _get_vits_ljs(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
62
+ assert repo_id == "csukuangfj/vits-ljs"
63
+
64
+ model = get_file(
65
+ repo_id=repo_id,
66
+ filename="vits-ljs.onnx",
67
+ subfolder=".",
68
+ )
69
+
70
+ lexicon = get_file(
71
+ repo_id=repo_id,
72
+ filename="lexicon.txt",
73
+ subfolder=".",
74
+ )
75
+
76
+ tokens = get_file(
77
+ repo_id=repo_id,
78
+ filename="tokens.txt",
79
+ subfolder=".",
80
+ )
81
+
82
+ tts_config = sherpa_onnx.OfflineTtsConfig(
83
+ model=sherpa_onnx.OfflineTtsModelConfig(
84
+ vits=sherpa_onnx.OfflineTtsVitsModelConfig(
85
+ model=model,
86
+ lexicon=lexicon,
87
+ tokens=tokens,
88
+ length_scale=1.0 / speed,
89
+ ),
90
+ provider="cpu",
91
+ debug=True,
92
+ num_threads=2,
93
+ )
94
+ )
95
+ tts = sherpa_onnx.OfflineTts(tts_config)
96
+
97
+ return tts
98
+
99
+
100
+ @lru_cache(maxsize=10)
101
+ def _get_vits_piper(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
102
+ data_dir = "/tmp/espeak-ng-data"
103
+ if "coqui" in repo_id or "vits-mms" in repo_id:
104
+ name = "model"
105
+ elif "piper" in repo_id:
106
+ n = len("vits-piper-")
107
+ name = repo_id.split("/")[1][n:]
108
+ elif "mimic3" in repo_id:
109
+ n = len("vits-mimic3-")
110
+ name = repo_id.split("/")[1][n:]
111
+ else:
112
+ raise ValueError(f"Unsupported {repo_id}")
113
+
114
+ if "vits-coqui-uk-mai" in repo_id or "vits-mms" in repo_id:
115
+ data_dir = ""
116
+
117
+ model = get_file(
118
+ repo_id=repo_id,
119
+ filename=f"{name}.onnx",
120
+ subfolder=".",
121
+ )
122
+
123
+ tokens = get_file(
124
+ repo_id=repo_id,
125
+ filename="tokens.txt",
126
+ subfolder=".",
127
+ )
128
+
129
+ tts_config = sherpa_onnx.OfflineTtsConfig(
130
+ model=sherpa_onnx.OfflineTtsModelConfig(
131
+ vits=sherpa_onnx.OfflineTtsVitsModelConfig(
132
+ model=model,
133
+ lexicon="",
134
+ data_dir=data_dir,
135
+ tokens=tokens,
136
+ length_scale=1.0 / speed,
137
+ ),
138
+ provider="cpu",
139
+ debug=True,
140
+ num_threads=2,
141
+ )
142
+ )
143
+ tts = sherpa_onnx.OfflineTts(tts_config)
144
+
145
+ return tts
146
+
147
+
148
+ @lru_cache(maxsize=10)
149
+ def _get_vits_mms(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
150
+ return _get_vits_piper(repo_id, speed)
151
+
152
+
153
+ @lru_cache(maxsize=10)
154
+ def _get_vits_zh_aishell3(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
155
+ assert repo_id == "csukuangfj/vits-zh-aishell3"
156
+
157
+ model = get_file(
158
+ repo_id=repo_id,
159
+ filename="vits-aishell3.onnx",
160
+ subfolder=".",
161
+ )
162
+
163
+ lexicon = get_file(
164
+ repo_id=repo_id,
165
+ filename="lexicon.txt",
166
+ subfolder=".",
167
+ )
168
+
169
+ tokens = get_file(
170
+ repo_id=repo_id,
171
+ filename="tokens.txt",
172
+ subfolder=".",
173
+ )
174
+
175
+ rule_fst = get_file(
176
+ repo_id=repo_id,
177
+ filename="rule.fst",
178
+ subfolder=".",
179
+ )
180
+
181
+ tts_config = sherpa_onnx.OfflineTtsConfig(
182
+ model=sherpa_onnx.OfflineTtsModelConfig(
183
+ vits=sherpa_onnx.OfflineTtsVitsModelConfig(
184
+ model=model,
185
+ lexicon=lexicon,
186
+ tokens=tokens,
187
+ length_scale=1.0 / speed,
188
+ ),
189
+ provider="cpu",
190
+ debug=True,
191
+ num_threads=2,
192
+ ),
193
+ rule_fsts=rule_fst,
194
+ )
195
+ tts = sherpa_onnx.OfflineTts(tts_config)
196
+
197
+ return tts
198
+
199
+
200
+ @lru_cache(maxsize=10)
201
+ def _get_vits_hf(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
202
+ if "fanchen" in repo_id or "vits-cantonese-hf-xiaomaiiwn" in repo_id:
203
+ model = repo_id.split("/")[-1]
204
+ else:
205
+ model = repo_id.split("-")[-1]
206
+
207
+ model = get_file(
208
+ repo_id=repo_id,
209
+ filename=f"{model}.onnx",
210
+ subfolder=".",
211
+ )
212
+
213
+ lexicon = get_file(
214
+ repo_id=repo_id,
215
+ filename="lexicon.txt",
216
+ subfolder=".",
217
+ )
218
+
219
+ tokens = get_file(
220
+ repo_id=repo_id,
221
+ filename="tokens.txt",
222
+ subfolder=".",
223
+ )
224
+
225
+ rule_fst = get_file(
226
+ repo_id=repo_id,
227
+ filename="rule.fst",
228
+ subfolder=".",
229
+ )
230
+
231
+ tts_config = sherpa_onnx.OfflineTtsConfig(
232
+ model=sherpa_onnx.OfflineTtsModelConfig(
233
+ vits=sherpa_onnx.OfflineTtsVitsModelConfig(
234
+ model=model,
235
+ lexicon=lexicon,
236
+ tokens=tokens,
237
+ length_scale=1.0 / speed,
238
+ ),
239
+ provider="cpu",
240
+ debug=True,
241
+ num_threads=2,
242
+ ),
243
+ rule_fsts=rule_fst,
244
+ )
245
+ tts = sherpa_onnx.OfflineTts(tts_config)
246
+
247
+ return tts
248
+
249
+
250
+ @lru_cache(maxsize=10)
251
+ def get_pretrained_model(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
252
+ if repo_id in english_models:
253
+ return english_models[repo_id](repo_id, speed)
254
+ elif repo_id in arabic_models:
255
+ return arabic_models[repo_id](repo_id, speed)
256
+ elif repo_id in turkish_models:
257
+ return turkish_models[repo_id](repo_id, speed)
258
+ elif repo_id in persian_models:
259
+ return persian_models[repo_id](repo_id, speed)
260
+ elif repo_id in hindi_models:
261
+ return hindi_models[repo_id](repo_id, speed)
262
+ elif repo_id in gujarati_models:
263
+ return gujarati_models[repo_id](repo_id, speed)
264
+ else:
265
+ raise ValueError(f"Unsupported repo_id: {repo_id}")
266
+
267
+
268
+ english_models = {
269
+ "csukuangfj/vits-piper-en_US-glados": _get_vits_piper,
270
+ # coqui-ai
271
+ "csukuangfj/vits-coqui-en-ljspeech": _get_vits_piper,
272
+ "csukuangfj/vits-coqui-en-ljspeech-neon": _get_vits_piper,
273
+ "csukuangfj/vits-coqui-en-vctk": _get_vits_piper,
274
+ # piper, US
275
+ "csukuangfj/vits-piper-en_GB-sweetbbak-amy": _get_vits_piper,
276
+ "csukuangfj/vits-piper-en_US-amy-low": _get_vits_piper,
277
+ "csukuangfj/vits-piper-en_US-amy-medium": _get_vits_piper,
278
+ "csukuangfj/vits-piper-en_US-arctic-medium": _get_vits_piper, # 18 speakers
279
+ "csukuangfj/vits-piper-en_US-danny-low": _get_vits_piper,
280
+ "csukuangfj/vits-piper-en_US-hfc_male-medium": _get_vits_piper,
281
+ "csukuangfj/vits-piper-en_US-joe-medium": _get_vits_piper,
282
+ "csukuangfj/vits-piper-en_US-kathleen-low": _get_vits_piper,
283
+ "csukuangfj/vits-piper-en_US-kusal-medium": _get_vits_piper,
284
+ "csukuangfj/vits-piper-en_US-l2arctic-medium": _get_vits_piper, # 24 speakers
285
+ "csukuangfj/vits-piper-en_US-lessac-low": _get_vits_piper,
286
+ "csukuangfj/vits-piper-en_US-lessac-medium": _get_vits_piper,
287
+ "csukuangfj/vits-piper-en_US-lessac-high": _get_vits_piper,
288
+ "csukuangfj/vits-piper-en_US-libritts-high": _get_vits_piper, # 904 speakers
289
+ "csukuangfj/vits-piper-en_US-libritts_r-medium": _get_vits_piper, # 904 speakers
290
+ "csukuangfj/vits-piper-en_US-ryan-low": _get_vits_piper,
291
+ "csukuangfj/vits-piper-en_US-ryan-medium": _get_vits_piper,
292
+ "csukuangfj/vits-piper-en_US-ryan-high": _get_vits_piper,
293
+ # piper, GB
294
+ "csukuangfj/vits-piper-en_GB-alan-low": _get_vits_piper,
295
+ "csukuangfj/vits-piper-en_GB-alan-medium": _get_vits_piper,
296
+ "csukuangfj/vits-piper-en_GB-alba-medium": _get_vits_piper,
297
+ "csukuangfj/vits-piper-en_GB-jenny_dioco-medium": _get_vits_piper,
298
+ "csukuangfj/vits-piper-en_GB-northern_english_male-medium": _get_vits_piper,
299
+ "csukuangfj/vits-piper-en_GB-semaine-medium": _get_vits_piper,
300
+ "csukuangfj/vits-piper-en_GB-southern_english_female-low": _get_vits_piper,
301
+ "csukuangfj/vits-piper-en_GB-vctk-medium": _get_vits_piper,
302
+ #
303
+ "csukuangfj/vits-vctk": _get_vits_vctk, # 109 speakers
304
+ "csukuangfj/vits-ljs": _get_vits_ljs,
305
+ }
306
+
307
+
308
+ arabic_models = {
309
+ "csukuangfj/vits-piper-ar_JO-kareem-low": _get_vits_piper,
310
+ "csukuangfj/vits-piper-ar_JO-kareem-medium": _get_vits_piper,
311
+ }
312
+
313
+ turkish_models = {
314
+ "csukuangfj/vits-piper-tr_TR-dfki-medium": _get_vits_piper,
315
+ "csukuangfj/vits-piper-tr_TR-fahrettin-medium": _get_vits_piper,
316
+ }
317
+
318
+
319
+ persian_models = {
320
+ "csukuangfj/vits-piper-fa_IR-amir-medium": _get_vits_piper,
321
+ "csukuangfj/vits-piper-fa_IR-gyro-medium": _get_vits_piper,
322
+ "csukuangfj/vits-mimic3-fa-haaniye_low": _get_vits_piper,
323
+ }
324
+
325
+ gujarati_models = {
326
+ "csukuangfj/vits-mimic3-gu_IN-cmu-indic_low": _get_vits_piper,
327
+ }
328
+ hindi_models = {
329
+ "vosk-model-hi-0.22": _get_vits_piper,
330
+ }
331
+
332
+
333
+ language_to_models = {
334
+ "English": list(english_models.keys()),
335
+ "Arabic": list(arabic_models.keys()),
336
+ "Hindi": list(hindi_models.keys()),
337
+ "Gujarati": list(gujarati_models.keys()),
338
+ "Persian": list(persian_models.keys()),
339
+ "Turkish": list(turkish_models.keys()),
340
+ }