xJuuzouYTx commited on
Commit
eb3ba2e
1 Parent(s): 42cf67e

[ADD] edge tts

Browse files
Files changed (4) hide show
  1. app.py +87 -1
  2. requirements.txt +5 -2
  3. tts/constants.py +438 -0
  4. tts/conversion.py +138 -0
app.py CHANGED
@@ -6,6 +6,8 @@ import hashlib
6
  from utils.model import model_downloader, get_model
7
  import requests
8
  import json
 
 
9
 
10
  api_url = "https://rvc-models-api.onrender.com/uploadfile/"
11
 
@@ -67,6 +69,7 @@ def infer(model, f0_method, audio_file):
67
  else:
68
  return
69
 
 
70
  def post_model(name, model_url, version, creator):
71
  modelname = model_downloader(model_url, zips_folder, unzips_folder)
72
  model_files = get_model(unzips_folder, modelname)
@@ -116,6 +119,12 @@ def search_model(name):
116
  result.append(f"**Nombre del modelo: {model_name}**</br>{model_url}</br>")
117
  yield "</br>".join(result)
118
  cont += 1
 
 
 
 
 
 
119
 
120
  with gr.Blocks() as app:
121
  gr.HTML("<h1> Simple RVC Inference - by Juuxn 馃捇 </h1>")
@@ -134,6 +143,30 @@ with gr.Blocks() as app:
134
  btn = gr.Button(value="Convertir")
135
  btn.click(infer, inputs=[model_url, f0_method, audio_path], outputs=[vc_output1, vc_output2])
136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  with gr.Tab("Recursos"):
138
  gr.HTML("<h4>Buscar modelos</h4>")
139
  search_name = gr.Textbox(placeholder="Billie Eillish (RVC v2 - 100 epoch)", label="Nombre", show_label=True)
@@ -156,5 +189,58 @@ with gr.Blocks() as app:
156
 
157
  btn_post_model = gr.Button(value="Publicar")
158
  btn_post_model.click(fn=post_model, inputs=[post_name, post_model_url, post_version, post_creator], outputs=[post_output])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
- app.queue(concurrency_count=511, max_size=1022).launch(share=True)
 
 
6
  from utils.model import model_downloader, get_model
7
  import requests
8
  import json
9
+ from tts.constants import VOICE_METHODS, BARK_VOICES, EDGE_VOICES
10
+ from tts.conversion import tts_infer
11
 
12
  api_url = "https://rvc-models-api.onrender.com/uploadfile/"
13
 
 
69
  else:
70
  return
71
 
72
+
73
  def post_model(name, model_url, version, creator):
74
  modelname = model_downloader(model_url, zips_folder, unzips_folder)
75
  model_files = get_model(unzips_folder, modelname)
 
119
  result.append(f"**Nombre del modelo: {model_name}**</br>{model_url}</br>")
120
  yield "</br>".join(result)
121
  cont += 1
122
+
123
+ def update_tts_methods_voice(select_value):
124
+ if select_value == "Edge-tts":
125
+ return gr.update(choices=EDGE_VOICES)
126
+ elif select_value == "Bark-tts":
127
+ return gr.update(choices=BARK_VOICES)
128
 
129
  with gr.Blocks() as app:
130
  gr.HTML("<h1> Simple RVC Inference - by Juuxn 馃捇 </h1>")
 
143
  btn = gr.Button(value="Convertir")
144
  btn.click(infer, inputs=[model_url, f0_method, audio_path], outputs=[vc_output1, vc_output2])
145
 
146
+ with gr.TabItem("TTS"):
147
+ with gr.Row():
148
+ tts_text = gr.Textbox(
149
+ label="Texto:",
150
+ placeholder="Texto que deseas convertir a voz...",
151
+ lines=6,
152
+ )
153
+
154
+ with gr.Column():
155
+ with gr.Row():
156
+ tts_model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Url del modelo RVC", show_label=True)
157
+
158
+ with gr.Column():
159
+ tts_method = gr.Dropdown(choices=VOICE_METHODS, value="Edge-tts", label="M茅todo TTS:", visible=False)
160
+ tts_model = gr.Dropdown(choices=EDGE_VOICES, label="Modelo TTS:", visible=True, interactive=True)
161
+ tts_method.change(fn=update_tts_methods_voice, inputs=[tts_method], outputs=[tts_model])
162
+
163
+ with gr.Row():
164
+ tts_vc_output1 = gr.Textbox(label="Salida")
165
+ tts_vc_output2 = gr.Audio(label="Audio de salida")
166
+
167
+ tts_btn = gr.Button(value="Convertir")
168
+ tts_btn.click(fn=tts_infer, inputs=[tts_text, tts_model_url, tts_method, tts_model], outputs=[tts_vc_output1, tts_vc_output2])
169
+
170
  with gr.Tab("Recursos"):
171
  gr.HTML("<h4>Buscar modelos</h4>")
172
  search_name = gr.Textbox(placeholder="Billie Eillish (RVC v2 - 100 epoch)", label="Nombre", show_label=True)
 
189
 
190
  btn_post_model = gr.Button(value="Publicar")
191
  btn_post_model.click(fn=post_model, inputs=[post_name, post_model_url, post_version, post_creator], outputs=[post_output])
192
+
193
+
194
+ # with gr.Column():
195
+ # model_voice_path07 = gr.Dropdown(
196
+ # label=i18n("RVC Model:"),
197
+ # choices=sorted(names),
198
+ # value=default_weight,
199
+ # )
200
+ # best_match_index_path1, _ = match_index(
201
+ # model_voice_path07.value
202
+ # )
203
+
204
+ # file_index2_07 = gr.Dropdown(
205
+ # label=i18n("Select the .index file:"),
206
+ # choices=get_indexes(),
207
+ # value=best_match_index_path1,
208
+ # interactive=True,
209
+ # allow_custom_value=True,
210
+ # )
211
+ # with gr.Row():
212
+ # refresh_button_ = gr.Button(i18n("Refresh"), variant="primary")
213
+ # refresh_button_.click(
214
+ # fn=change_choices2,
215
+ # inputs=[],
216
+ # outputs=[model_voice_path07, file_index2_07],
217
+ # )
218
+ # with gr.Row():
219
+ # original_ttsvoice = gr.Audio(label=i18n("Audio TTS:"))
220
+ # ttsvoice = gr.Audio(label=i18n("Audio RVC:"))
221
+
222
+ # with gr.Row():
223
+ # button_test = gr.Button(i18n("Convert"), variant="primary")
224
+
225
+ # button_test.click(
226
+ # tts.use_tts,
227
+ # inputs=[
228
+ # text_test,
229
+ # tts_test,
230
+ # model_voice_path07,
231
+ # file_index2_07,
232
+ # # transpose_test,
233
+ # vc_transform0,
234
+ # f0method8,
235
+ # index_rate1,
236
+ # crepe_hop_length,
237
+ # f0_autotune,
238
+ # ttsmethod_test,
239
+ # ],
240
+ # outputs=[ttsvoice, original_ttsvoice],
241
+ # )
242
+
243
+
244
 
245
+ app.queue(concurrency_count=511, max_size=1022).launch()
246
+ #share=True
requirements.txt CHANGED
@@ -1,3 +1,4 @@
 
1
  absl-py==1.4.0
2
  aiofiles==23.1.0
3
  aiohttp==3.8.3
@@ -44,7 +45,7 @@ fsspec==2022.11.0
44
  future==0.18.2
45
  google-auth==2.16.2
46
  google-auth-oauthlib==1.0.0
47
- gradio==3.34.0
48
  gradio_client==0.2.10
49
  grpcio==1.51.3
50
  gspread==5.10.0
@@ -166,5 +167,7 @@ yarl==1.8.2
166
  zipp==3.15.0
167
  firebase
168
  firebase_admin
 
169
  gdown
170
- validators
 
 
1
+ [email protected]:huggingface/transformers.git@main#egg=transformers
2
  absl-py==1.4.0
3
  aiofiles==23.1.0
4
  aiohttp==3.8.3
 
45
  future==0.18.2
46
  google-auth==2.16.2
47
  google-auth-oauthlib==1.0.0
48
+ gradio
49
  gradio_client==0.2.10
50
  grpcio==1.51.3
51
  gspread==5.10.0
 
167
  zipp==3.15.0
168
  firebase
169
  firebase_admin
170
+ nltk
171
  gdown
172
+ validators
173
+ git+https://github.com/suno-ai/bark.git
tts/constants.py ADDED
@@ -0,0 +1,438 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ VOICE_METHODS = ["Edge-tts", "Bark-tts"]
2
+
3
+ BARK_VOICES = [
4
+ "v2/en_speaker_0-Male",
5
+ "v2/en_speaker_1-Male",
6
+ "v2/en_speaker_2-Male",
7
+ "v2/en_speaker_3-Male",
8
+ "v2/en_speaker_4-Male",
9
+ "v2/en_speaker_5-Male",
10
+ "v2/en_speaker_6-Male",
11
+ "v2/en_speaker_7-Male",
12
+ "v2/en_speaker_8-Male",
13
+ "v2/en_speaker_9-Female",
14
+ "v2/zh_speaker_0-Male",
15
+ "v2/zh_speaker_1-Male",
16
+ "v2/zh_speaker_2-Male",
17
+ "v2/zh_speaker_3-Male",
18
+ "v2/zh_speaker_4-Female",
19
+ "v2/zh_speaker_5-Male",
20
+ "v2/zh_speaker_6-Female",
21
+ "v2/zh_speaker_7-Female",
22
+ "v2/zh_speaker_8-Male",
23
+ "v2/zh_speaker_9-Female",
24
+ "v2/fr_speaker_0-Male",
25
+ "v2/fr_speaker_1-Female",
26
+ "v2/fr_speaker_2-Female",
27
+ "v2/fr_speaker_3-Male",
28
+ "v2/fr_speaker_4-Male",
29
+ "v2/fr_speaker_5-Female",
30
+ "v2/fr_speaker_6-Male",
31
+ "v2/fr_speaker_7-Male",
32
+ "v2/fr_speaker_8-Male",
33
+ "v2/fr_speaker_9-Male",
34
+ "v2/de_speaker_0-Male",
35
+ "v2/de_speaker_1-Male",
36
+ "v2/de_speaker_2-Male",
37
+ "v2/de_speaker_3-Female",
38
+ "v2/de_speaker_4-Male",
39
+ "v2/de_speaker_5-Male",
40
+ "v2/de_speaker_6-Male",
41
+ "v2/de_speaker_7-Male",
42
+ "v2/de_speaker_8-Female",
43
+ "v2/de_speaker_9-Male",
44
+ "v2/hi_speaker_0-Female",
45
+ "v2/hi_speaker_1-Female",
46
+ "v2/hi_speaker_2-Male",
47
+ "v2/hi_speaker_3-Female",
48
+ "v2/hi_speaker_4-Female",
49
+ "v2/hi_speaker_5-Male",
50
+ "v2/hi_speaker_6-Male",
51
+ "v2/hi_speaker_7-Male",
52
+ "v2/hi_speaker_8-Male",
53
+ "v2/hi_speaker_9-Female",
54
+ "v2/it_speaker_0-Male",
55
+ "v2/it_speaker_1-Male",
56
+ "v2/it_speaker_2-Female",
57
+ "v2/it_speaker_3-Male",
58
+ "v2/it_speaker_4-Male",
59
+ "v2/it_speaker_5-Male",
60
+ "v2/it_speaker_6-Male",
61
+ "v2/it_speaker_7-Female",
62
+ "v2/it_speaker_8-Male",
63
+ "v2/it_speaker_9-Female",
64
+ "v2/ja_speaker_0-Female",
65
+ "v2/ja_speaker_1-Female",
66
+ "v2/ja_speaker_2-Male",
67
+ "v2/ja_speaker_3-Female",
68
+ "v2/ja_speaker_4-Female",
69
+ "v2/ja_speaker_5-Female",
70
+ "v2/ja_speaker_6-Male",
71
+ "v2/ja_speaker_7-Female",
72
+ "v2/ja_speaker_8-Female",
73
+ "v2/ja_speaker_9-Female",
74
+ "v2/ko_speaker_0-Female",
75
+ "v2/ko_speaker_1-Male",
76
+ "v2/ko_speaker_2-Male",
77
+ "v2/ko_speaker_3-Male",
78
+ "v2/ko_speaker_4-Male",
79
+ "v2/ko_speaker_5-Male",
80
+ "v2/ko_speaker_6-Male",
81
+ "v2/ko_speaker_7-Male",
82
+ "v2/ko_speaker_8-Male",
83
+ "v2/ko_speaker_9-Male",
84
+ "v2/pl_speaker_0-Male",
85
+ "v2/pl_speaker_1-Male",
86
+ "v2/pl_speaker_2-Male",
87
+ "v2/pl_speaker_3-Male",
88
+ "v2/pl_speaker_4-Female",
89
+ "v2/pl_speaker_5-Male",
90
+ "v2/pl_speaker_6-Female",
91
+ "v2/pl_speaker_7-Male",
92
+ "v2/pl_speaker_8-Male",
93
+ "v2/pl_speaker_9-Female",
94
+ "v2/pt_speaker_0-Male",
95
+ "v2/pt_speaker_1-Male",
96
+ "v2/pt_speaker_2-Male",
97
+ "v2/pt_speaker_3-Male",
98
+ "v2/pt_speaker_4-Male",
99
+ "v2/pt_speaker_5-Male",
100
+ "v2/pt_speaker_6-Male",
101
+ "v2/pt_speaker_7-Male",
102
+ "v2/pt_speaker_8-Male",
103
+ "v2/pt_speaker_9-Male",
104
+ "v2/ru_speaker_0-Male",
105
+ "v2/ru_speaker_1-Male",
106
+ "v2/ru_speaker_2-Male",
107
+ "v2/ru_speaker_3-Male",
108
+ "v2/ru_speaker_4-Male",
109
+ "v2/ru_speaker_5-Female",
110
+ "v2/ru_speaker_6-Female",
111
+ "v2/ru_speaker_7-Male",
112
+ "v2/ru_speaker_8-Male",
113
+ "v2/ru_speaker_9-Female",
114
+ "v2/es_speaker_0-Male",
115
+ "v2/es_speaker_1-Male",
116
+ "v2/es_speaker_2-Male",
117
+ "v2/es_speaker_3-Male",
118
+ "v2/es_speaker_4-Male",
119
+ "v2/es_speaker_5-Male",
120
+ "v2/es_speaker_6-Male",
121
+ "v2/es_speaker_7-Male",
122
+ "v2/es_speaker_8-Female",
123
+ "v2/es_speaker_9-Female",
124
+ "v2/tr_speaker_0-Male",
125
+ "v2/tr_speaker_1-Male",
126
+ "v2/tr_speaker_2-Male",
127
+ "v2/tr_speaker_3-Male",
128
+ "v2/tr_speaker_4-Female",
129
+ "v2/tr_speaker_5-Female",
130
+ "v2/tr_speaker_6-Male",
131
+ "v2/tr_speaker_7-Male",
132
+ "v2/tr_speaker_8-Male",
133
+ "v2/tr_speaker_9-Male",
134
+ ]
135
+
136
+ EDGE_VOICES = [
137
+ "af-ZA-AdriNeural-Female",
138
+ "af-ZA-WillemNeural-Male",
139
+ "sq-AL-AnilaNeural-Female",
140
+ "sq-AL-IlirNeural-Male",
141
+ "am-ET-AmehaNeural-Male",
142
+ "am-ET-MekdesNeural-Female",
143
+ "ar-DZ-AminaNeural-Female",
144
+ "ar-DZ-IsmaelNeural-Male",
145
+ "ar-BH-AliNeural-Male",
146
+ "ar-BH-LailaNeural-Female",
147
+ "ar-EG-SalmaNeural-Female",
148
+ "ar-EG-ShakirNeural-Male",
149
+ "ar-IQ-BasselNeural-Male",
150
+ "ar-IQ-RanaNeural-Female",
151
+ "ar-JO-SanaNeural-Female",
152
+ "ar-JO-TaimNeural-Male",
153
+ "ar-KW-FahedNeural-Male",
154
+ "ar-KW-NouraNeural-Female",
155
+ "ar-LB-LaylaNeural-Female",
156
+ "ar-LB-RamiNeural-Male",
157
+ "ar-LY-ImanNeural-Female",
158
+ "ar-LY-OmarNeural-Male",
159
+ "ar-MA-JamalNeural-Male",
160
+ "ar-MA-MounaNeural-Female",
161
+ "ar-OM-AbdullahNeural-Male",
162
+ "ar-OM-AyshaNeural-Female",
163
+ "ar-QA-AmalNeural-Female",
164
+ "ar-QA-MoazNeural-Male",
165
+ "ar-SA-HamedNeural-Male",
166
+ "ar-SA-ZariyahNeural-Female",
167
+ "ar-SY-AmanyNeural-Female",
168
+ "ar-SY-LaithNeural-Male",
169
+ "ar-TN-HediNeural-Male",
170
+ "ar-TN-ReemNeural-Female",
171
+ "ar-AE-FatimaNeural-Female",
172
+ "ar-AE-HamdanNeural-Male",
173
+ "ar-YE-MaryamNeural-Female",
174
+ "ar-YE-SalehNeural-Male",
175
+ "az-AZ-BabekNeural-Male",
176
+ "az-AZ-BanuNeural-Female",
177
+ "bn-BD-NabanitaNeural-Female",
178
+ "bn-BD-PradeepNeural-Male",
179
+ "bn-IN-BashkarNeural-Male",
180
+ "bn-IN-TanishaaNeural-Female",
181
+ "bs-BA-GoranNeural-Male",
182
+ "bs-BA-VesnaNeural-Female",
183
+ "bg-BG-BorislavNeural-Male",
184
+ "bg-BG-KalinaNeural-Female",
185
+ "my-MM-NilarNeural-Female",
186
+ "my-MM-ThihaNeural-Male",
187
+ "ca-ES-EnricNeural-Male",
188
+ "ca-ES-JoanaNeural-Female",
189
+ "zh-HK-HiuGaaiNeural-Female",
190
+ "zh-HK-HiuMaanNeural-Female",
191
+ "zh-HK-WanLungNeural-Male",
192
+ "zh-CN-XiaoxiaoNeural-Female",
193
+ "zh-CN-XiaoyiNeural-Female",
194
+ "zh-CN-YunjianNeural-Male",
195
+ "zh-CN-YunxiNeural-Male",
196
+ "zh-CN-YunxiaNeural-Male",
197
+ "zh-CN-YunyangNeural-Male",
198
+ "zh-CN-liaoning-XiaobeiNeural-Female",
199
+ "zh-TW-HsiaoChenNeural-Female",
200
+ "zh-TW-YunJheNeural-Male",
201
+ "zh-TW-HsiaoYuNeural-Female",
202
+ "zh-CN-shaanxi-XiaoniNeural-Female",
203
+ "hr-HR-GabrijelaNeural-Female",
204
+ "hr-HR-SreckoNeural-Male",
205
+ "cs-CZ-AntoninNeural-Male",
206
+ "cs-CZ-VlastaNeural-Female",
207
+ "da-DK-ChristelNeural-Female",
208
+ "da-DK-JeppeNeural-Male",
209
+ "nl-BE-ArnaudNeural-Male",
210
+ "nl-BE-DenaNeural-Female",
211
+ "nl-NL-ColetteNeural-Female",
212
+ "nl-NL-FennaNeural-Female",
213
+ "nl-NL-MaartenNeural-Male",
214
+ "en-AU-NatashaNeural-Female",
215
+ "en-AU-WilliamNeural-Male",
216
+ "en-CA-ClaraNeural-Female",
217
+ "en-CA-LiamNeural-Male",
218
+ "en-HK-SamNeural-Male",
219
+ "en-HK-YanNeural-Female",
220
+ "en-IN-NeerjaExpressiveNeural-Female",
221
+ "en-IN-NeerjaNeural-Female",
222
+ "en-IN-PrabhatNeural-Male",
223
+ "en-IE-ConnorNeural-Male",
224
+ "en-IE-EmilyNeural-Female",
225
+ "en-KE-AsiliaNeural-Female",
226
+ "en-KE-ChilembaNeural-Male",
227
+ "en-NZ-MitchellNeural-Male",
228
+ "en-NZ-MollyNeural-Female",
229
+ "en-NG-AbeoNeural-Male",
230
+ "en-NG-EzinneNeural-Female",
231
+ "en-PH-JamesNeural-Male",
232
+ "en-PH-RosaNeural-Female",
233
+ "en-SG-LunaNeural-Female",
234
+ "en-SG-WayneNeural-Male",
235
+ "en-ZA-LeahNeural-Female",
236
+ "en-ZA-LukeNeural-Male",
237
+ "en-TZ-ElimuNeural-Male",
238
+ "en-TZ-ImaniNeural-Female",
239
+ "en-GB-LibbyNeural-Female",
240
+ "en-GB-MaisieNeural-Female",
241
+ "en-GB-RyanNeural-Male",
242
+ "en-GB-SoniaNeural-Female",
243
+ "en-GB-ThomasNeural-Male",
244
+ "en-US-AriaNeural-Female",
245
+ "en-US-AnaNeural-Female",
246
+ "en-US-ChristopherNeural-Male",
247
+ "en-US-EricNeural-Male",
248
+ "en-US-GuyNeural-Male",
249
+ "en-US-JennyNeural-Female",
250
+ "en-US-MichelleNeural-Female",
251
+ "en-US-RogerNeural-Male",
252
+ "en-US-SteffanNeural-Male",
253
+ "et-EE-AnuNeural-Female",
254
+ "et-EE-KertNeural-Male",
255
+ "fil-PH-AngeloNeural-Male",
256
+ "fil-PH-BlessicaNeural-Female",
257
+ "fi-FI-HarriNeural-Male",
258
+ "fi-FI-NooraNeural-Female",
259
+ "fr-BE-CharlineNeural-Female",
260
+ "fr-BE-GerardNeural-Male",
261
+ "fr-CA-AntoineNeural-Male",
262
+ "fr-CA-JeanNeural-Male",
263
+ "fr-CA-SylvieNeural-Female",
264
+ "fr-FR-DeniseNeural-Female",
265
+ "fr-FR-EloiseNeural-Female",
266
+ "fr-FR-HenriNeural-Male",
267
+ "fr-CH-ArianeNeural-Female",
268
+ "fr-CH-FabriceNeural-Male",
269
+ "gl-ES-RoiNeural-Male",
270
+ "gl-ES-SabelaNeural-Female",
271
+ "ka-GE-EkaNeural-Female",
272
+ "ka-GE-GiorgiNeural-Male",
273
+ "de-AT-IngridNeural-Female",
274
+ "de-AT-JonasNeural-Male",
275
+ "de-DE-AmalaNeural-Female",
276
+ "de-DE-ConradNeural-Male",
277
+ "de-DE-KatjaNeural-Female",
278
+ "de-DE-KillianNeural-Male",
279
+ "de-CH-JanNeural-Male",
280
+ "de-CH-LeniNeural-Female",
281
+ "el-GR-AthinaNeural-Female",
282
+ "el-GR-NestorasNeural-Male",
283
+ "gu-IN-DhwaniNeural-Female",
284
+ "gu-IN-NiranjanNeural-Male",
285
+ "he-IL-AvriNeural-Male",
286
+ "he-IL-HilaNeural-Female",
287
+ "hi-IN-MadhurNeural-Male",
288
+ "hi-IN-SwaraNeural-Female",
289
+ "hu-HU-NoemiNeural-Female",
290
+ "hu-HU-TamasNeural-Male",
291
+ "is-IS-GudrunNeural-Female",
292
+ "is-IS-GunnarNeural-Male",
293
+ "id-ID-ArdiNeural-Male",
294
+ "id-ID-GadisNeural-Female",
295
+ "ga-IE-ColmNeural-Male",
296
+ "ga-IE-OrlaNeural-Female",
297
+ "it-IT-DiegoNeural-Male",
298
+ "it-IT-ElsaNeural-Female",
299
+ "it-IT-IsabellaNeural-Female",
300
+ "ja-JP-KeitaNeural-Male",
301
+ "ja-JP-NanamiNeural-Female",
302
+ "jv-ID-DimasNeural-Male",
303
+ "jv-ID-SitiNeural-Female",
304
+ "kn-IN-GaganNeural-Male",
305
+ "kn-IN-SapnaNeural-Female",
306
+ "kk-KZ-AigulNeural-Female",
307
+ "kk-KZ-DauletNeural-Male",
308
+ "km-KH-PisethNeural-Male",
309
+ "km-KH-SreymomNeural-Female",
310
+ "ko-KR-InJoonNeural-Male",
311
+ "ko-KR-SunHiNeural-Female",
312
+ "lo-LA-ChanthavongNeural-Male",
313
+ "lo-LA-KeomanyNeural-Female",
314
+ "lv-LV-EveritaNeural-Female",
315
+ "lv-LV-NilsNeural-Male",
316
+ "lt-LT-LeonasNeural-Male",
317
+ "lt-LT-OnaNeural-Female",
318
+ "mk-MK-AleksandarNeural-Male",
319
+ "mk-MK-MarijaNeural-Female",
320
+ "ms-MY-OsmanNeural-Male",
321
+ "ms-MY-YasminNeural-Female",
322
+ "ml-IN-MidhunNeural-Male",
323
+ "ml-IN-SobhanaNeural-Female",
324
+ "mt-MT-GraceNeural-Female",
325
+ "mt-MT-JosephNeural-Male",
326
+ "mr-IN-AarohiNeural-Female",
327
+ "mr-IN-ManoharNeural-Male",
328
+ "mn-MN-BataaNeural-Male",
329
+ "mn-MN-YesuiNeural-Female",
330
+ "ne-NP-HemkalaNeural-Female",
331
+ "ne-NP-SagarNeural-Male",
332
+ "nb-NO-FinnNeural-Male",
333
+ "nb-NO-PernilleNeural-Female",
334
+ "ps-AF-GulNawazNeural-Male",
335
+ "ps-AF-LatifaNeural-Female",
336
+ "fa-IR-DilaraNeural-Female",
337
+ "fa-IR-FaridNeural-Male",
338
+ "pl-PL-MarekNeural-Male",
339
+ "pl-PL-ZofiaNeural-Female",
340
+ "pt-BR-AntonioNeural-Male",
341
+ "pt-BR-FranciscaNeural-Female",
342
+ "pt-PT-DuarteNeural-Male",
343
+ "pt-PT-RaquelNeural-Female",
344
+ "ro-RO-AlinaNeural-Female",
345
+ "ro-RO-EmilNeural-Male",
346
+ "ru-RU-DmitryNeural-Male",
347
+ "ru-RU-SvetlanaNeural-Female",
348
+ "sr-RS-NicholasNeural-Male",
349
+ "sr-RS-SophieNeural-Female",
350
+ "si-LK-SameeraNeural-Male",
351
+ "si-LK-ThiliniNeural-Female",
352
+ "sk-SK-LukasNeural-Male",
353
+ "sk-SK-ViktoriaNeural-Female",
354
+ "sl-SI-PetraNeural-Female",
355
+ "sl-SI-RokNeural-Male",
356
+ "so-SO-MuuseNeural-Male",
357
+ "so-SO-UbaxNeural-Female",
358
+ "es-AR-ElenaNeural-Female",
359
+ "es-AR-TomasNeural-Male",
360
+ "es-BO-MarceloNeural-Male",
361
+ "es-BO-SofiaNeural-Female",
362
+ "es-CL-CatalinaNeural-Female",
363
+ "es-CL-LorenzoNeural-Male",
364
+ "es-CO-GonzaloNeural-Male",
365
+ "es-CO-SalomeNeural-Female",
366
+ "es-CR-JuanNeural-Male",
367
+ "es-CR-MariaNeural-Female",
368
+ "es-CU-BelkysNeural-Female",
369
+ "es-CU-ManuelNeural-Male",
370
+ "es-DO-EmilioNeural-Male",
371
+ "es-DO-RamonaNeural-Female",
372
+ "es-EC-AndreaNeural-Female",
373
+ "es-EC-LuisNeural-Male",
374
+ "es-SV-LorenaNeural-Female",
375
+ "es-SV-RodrigoNeural-Male",
376
+ "es-GQ-JavierNeural-Male",
377
+ "es-GQ-TeresaNeural-Female",
378
+ "es-GT-AndresNeural-Male",
379
+ "es-GT-MartaNeural-Female",
380
+ "es-HN-CarlosNeural-Male",
381
+ "es-HN-KarlaNeural-Female",
382
+ "es-MX-DaliaNeural-Female",
383
+ "es-MX-JorgeNeural-Male",
384
+ "es-NI-FedericoNeural-Male",
385
+ "es-NI-YolandaNeural-Female",
386
+ "es-PA-MargaritaNeural-Female",
387
+ "es-PA-RobertoNeural-Male",
388
+ "es-PY-MarioNeural-Male",
389
+ "es-PY-TaniaNeural-Female",
390
+ "es-PE-AlexNeural-Male",
391
+ "es-PE-CamilaNeural-Female",
392
+ "es-PR-KarinaNeural-Female",
393
+ "es-PR-VictorNeural-Male",
394
+ "es-ES-AlvaroNeural-Male",
395
+ "es-ES-ElviraNeural-Female",
396
+ "es-US-AlonsoNeural-Male",
397
+ "es-US-PalomaNeural-Female",
398
+ "es-UY-MateoNeural-Male",
399
+ "es-UY-ValentinaNeural-Female",
400
+ "es-VE-PaolaNeural-Female",
401
+ "es-VE-SebastianNeural-Male",
402
+ "su-ID-JajangNeural-Male",
403
+ "su-ID-TutiNeural-Female",
404
+ "sw-KE-RafikiNeural-Male",
405
+ "sw-KE-ZuriNeural-Female",
406
+ "sw-TZ-DaudiNeural-Male",
407
+ "sw-TZ-RehemaNeural-Female",
408
+ "sv-SE-MattiasNeural-Male",
409
+ "sv-SE-SofieNeural-Female",
410
+ "ta-IN-PallaviNeural-Female",
411
+ "ta-IN-ValluvarNeural-Male",
412
+ "ta-MY-KaniNeural-Female",
413
+ "ta-MY-SuryaNeural-Male",
414
+ "ta-SG-AnbuNeural-Male",
415
+ "ta-SG-VenbaNeural-Female",
416
+ "ta-LK-KumarNeural-Male",
417
+ "ta-LK-SaranyaNeural-Female",
418
+ "te-IN-MohanNeural-Male",
419
+ "te-IN-ShrutiNeural-Female",
420
+ "th-TH-NiwatNeural-Male",
421
+ "th-TH-PremwadeeNeural-Female",
422
+ "tr-TR-AhmetNeural-Male",
423
+ "tr-TR-EmelNeural-Female",
424
+ "uk-UA-OstapNeural-Male",
425
+ "uk-UA-PolinaNeural-Female",
426
+ "ur-IN-GulNeural-Female",
427
+ "ur-IN-SalmanNeural-Male",
428
+ "ur-PK-AsadNeural-Male",
429
+ "ur-PK-UzmaNeural-Female",
430
+ "uz-UZ-MadinaNeural-Female",
431
+ "uz-UZ-SardorNeural-Male",
432
+ "vi-VN-HoaiMyNeural-Female",
433
+ "vi-VN-NamMinhNeural-Male",
434
+ "cy-GB-AledNeural-Male",
435
+ "cy-GB-NiaNeural-Female",
436
+ "zu-ZA-ThandoNeural-Female",
437
+ "zu-ZA-ThembaNeural-Male",
438
+ ]
tts/conversion.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uuid
3
+ import numpy as np
4
+ import torch
5
+ import soundfile as sf
6
+ from gtts import gTTS
7
+ import edge_tts
8
+ from inference import Inference
9
+ import asyncio
10
+ #git+https://github.com/suno-ai/bark.git
11
+ # from transformers import AutoProcessor, BarkModel
12
+ # import nltk
13
+ # from nltk.tokenize import sent_tokenize
14
+ # from bark import SAMPLE_RATE
15
+
16
+ # now_dir = os.getcwd()
17
+
18
+ def cast_to_device(tensor, device):
19
+ try:
20
+ return tensor.to(device)
21
+ except Exception as e:
22
+ print(e)
23
+ return tensor
24
+
25
+ # Buscar la forma de evitar descargar el archivo de 4gb cada vez que crea una instancia
26
+ # def _bark_conversion_(text, voice_preset):
27
+ # os.makedirs(os.path.join(now_dir, "tts"), exist_ok=True)
28
+
29
+ # device = "cuda:0" if torch.cuda.is_available() else "cpu"
30
+ # dtype = torch.float32 if "cpu" in device else torch.float16
31
+ # bark_processor = AutoProcessor.from_pretrained(
32
+ # "suno/bark",
33
+ # cache_dir=os.path.join(now_dir, "tts", "suno/bark"),
34
+ # torch_dtype=dtype,
35
+ # )
36
+ # bark_model = BarkModel.from_pretrained(
37
+ # "suno/bark",
38
+ # cache_dir=os.path.join(now_dir, "tts", "suno/bark"),
39
+ # torch_dtype=dtype,
40
+ # ).to(device)
41
+ # # bark_model.enable_cpu_offload()
42
+ # inputs = bark_processor(text=[text], return_tensors="pt", voice_preset=voice_preset)
43
+ # tensor_dict = {
44
+ # k: cast_to_device(v, device) if hasattr(v, "to") else v
45
+ # for k, v in inputs.items()
46
+ # }
47
+ # speech_values = bark_model.generate(**tensor_dict, do_sample=True)
48
+ # sampling_rate = bark_model.generation_config.sample_rate
49
+ # speech = speech_values.cpu().numpy().squeeze()
50
+ # return speech, sampling_rate
51
+
52
+
53
+ def tts_infer(tts_text, model_url, tts_method, tts_model):
54
+ print("*****************")
55
+ print(tts_text)
56
+ print(model_url)
57
+ if not tts_text:
58
+ return 'Primero escribe el texto que quieres convertir.', None
59
+ if not tts_model:
60
+ return 'Selecciona un modelo TTS antes de convertir.', None
61
+ if not model_url:
62
+ return 'Escribe la url de modelo que quieres usar antes de convertir.', None
63
+
64
+ f0_method = "harvest"
65
+ output_folder = "audios"
66
+ os.makedirs(output_folder, exist_ok=True)
67
+ converted_tts_filename = os.path.join(output_folder, f"tts_out_{uuid.uuid4()}.wav")
68
+ success = False
69
+
70
+ if len(tts_text) > 60:
71
+ tts_text = tts_text[:60]
72
+ print("DEMO; limit to 60 characters")
73
+
74
+ language = tts_model[:2]
75
+ if tts_method == "Edge-tts":
76
+ try:
77
+ asyncio.run(
78
+ edge_tts.Communicate(
79
+ tts_text, "-".join(tts_model.split("-")[:-1])
80
+ ).save(converted_tts_filename)
81
+ )
82
+ success = True
83
+ except Exception as e:
84
+ print("ERROR", e)
85
+ try:
86
+ tts = gTTS(tts_text, lang=language)
87
+ tts.save(converted_tts_filename)
88
+ print(
89
+ f"No audio was received. Please change the tts voice for {tts_model}. USING gTTS."
90
+ )
91
+ success = True
92
+ except:
93
+ tts = gTTS("a", lang=language)
94
+ tts.save(converted_tts_filename)
95
+ print("Error: Audio will be replaced.")
96
+ success = False
97
+
98
+ # elif tts_method == "Bark-tts":
99
+ # try:
100
+ # script = tts_text.replace("\n", " ").strip()
101
+ # sentences = sent_tokenize(script)
102
+ # silence = np.zeros(int(0.25 * SAMPLE_RATE))
103
+ # pieces = []
104
+ # for sentence in sentences:
105
+ # audio_array, _ = _bark_conversion_(sentence, tts_model.split("-")[0])
106
+ # pieces += [audio_array, silence.copy()]
107
+
108
+ # sf.write(
109
+ # file=converted_tts_filename, samplerate=SAMPLE_RATE, data=np.concatenate(pieces)
110
+ # )
111
+
112
+ # except Exception as e:
113
+ # print(f"{e}")
114
+ # return None, None
115
+
116
+ if success:
117
+ inference = Inference(
118
+ model_name=model_url,
119
+ f0_method=f0_method,
120
+ source_audio_path=converted_tts_filename,
121
+ output_file_name=os.path.join("./audio-outputs", os.path.basename(converted_tts_filename)),
122
+ )
123
+ output = inference.run()
124
+ if os.path.exists(converted_tts_filename):
125
+ os.remove(converted_tts_filename)
126
+
127
+ if os.path.exists(os.path.join("weights", inference.model_name)):
128
+ os.remove(os.path.join("weights", inference.model_name))
129
+
130
+ if 'success' in output and output['success']:
131
+ return output, output['file']
132
+ else:
133
+ return output, None
134
+ else:
135
+ return "Ocurri贸 un error durante la conversi贸n", None
136
+
137
+
138
+