Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -2,12 +2,28 @@ import gradio as gr
|
|
2 |
from TTS.api import TTS
|
3 |
from TTS.tts.configs.xtts_config import XttsConfig
|
4 |
from TTS.tts.models.xtts import Xtts
|
|
|
|
|
|
|
5 |
|
6 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
config = XttsConfig()
|
8 |
-
config.load_json(
|
9 |
model = Xtts.init_from_config(config)
|
10 |
-
model.load_checkpoint(config, checkpoint_path=
|
11 |
|
12 |
def sintetizar_voz(texto, idioma, audio_referencia, usar_microfono, audio_microfono):
|
13 |
if usar_microfono:
|
@@ -15,11 +31,22 @@ def sintetizar_voz(texto, idioma, audio_referencia, usar_microfono, audio_microf
|
|
15 |
else:
|
16 |
audio_entrada = audio_referencia
|
17 |
|
18 |
-
#
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
-
return
|
23 |
|
24 |
with gr.Blocks(theme=gr.themes.Base()) as demo:
|
25 |
gr.Markdown("# Sintetizador de Voz XTTS")
|
@@ -27,7 +54,7 @@ with gr.Blocks(theme=gr.themes.Base()) as demo:
|
|
27 |
with gr.Row():
|
28 |
with gr.Column():
|
29 |
texto_entrada = gr.Textbox(label="Texto a sintetizar", placeholder="Escribe aquí el texto que quieres convertir a voz...")
|
30 |
-
idioma = gr.Dropdown(label="Idioma", choices=
|
31 |
audio_referencia = gr.Audio(label="Audio de referencia", type="filepath")
|
32 |
usar_microfono = gr.Checkbox(label="Usar micrófono")
|
33 |
audio_microfono = gr.Audio(label="Grabar con micrófono", source="microphone", type="filepath", visible=False)
|
|
|
2 |
from TTS.api import TTS
|
3 |
from TTS.tts.configs.xtts_config import XttsConfig
|
4 |
from TTS.tts.models.xtts import Xtts
|
5 |
+
from TTS.utils.generic_utils import get_user_data_dir
|
6 |
+
import os
|
7 |
+
from huggingface_hub import hf_hub_download
|
8 |
|
9 |
+
# Configuración de rutas y descarga del modelo
|
10 |
+
repo_id = "Blakus/Pedro_Lab_XTTS"
|
11 |
+
local_dir = os.path.join(get_user_data_dir("tts"), "tts_models--multilingual--multi-dataset--xtts_v2")
|
12 |
+
os.makedirs(local_dir, exist_ok=True)
|
13 |
+
|
14 |
+
files_to_download = ["config.json", "model.pth", "vocab.json"]
|
15 |
+
for file_name in files_to_download:
|
16 |
+
hf_hub_download(repo_id=repo_id, filename=file_name, local_dir=local_dir)
|
17 |
+
|
18 |
+
config_path = os.path.join(local_dir, "config.json")
|
19 |
+
checkpoint_path = os.path.join(local_dir, "model.pth")
|
20 |
+
vocab_path = os.path.join(local_dir, "vocab.json")
|
21 |
+
|
22 |
+
# Cargar el modelo XTTS
|
23 |
config = XttsConfig()
|
24 |
+
config.load_json(config_path)
|
25 |
model = Xtts.init_from_config(config)
|
26 |
+
model.load_checkpoint(config, checkpoint_path=checkpoint_path, vocab_path=vocab_path, eval=True, use_deepspeed=False)
|
27 |
|
28 |
def sintetizar_voz(texto, idioma, audio_referencia, usar_microfono, audio_microfono):
|
29 |
if usar_microfono:
|
|
|
31 |
else:
|
32 |
audio_entrada = audio_referencia
|
33 |
|
34 |
+
# Lógica de síntesis de voz usando el modelo XTTS
|
35 |
+
gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=audio_entrada, gpt_cond_len=30, gpt_cond_chunk_len=4, max_ref_length=60)
|
36 |
+
out = model.inference(
|
37 |
+
texto,
|
38 |
+
language=idioma,
|
39 |
+
gpt_cond_latent=gpt_cond_latent,
|
40 |
+
speaker_embedding=speaker_embedding,
|
41 |
+
repetition_penalty=5.0,
|
42 |
+
temperature=0.75,
|
43 |
+
)
|
44 |
+
|
45 |
+
# Guardar el audio generado
|
46 |
+
output_path = "output.wav"
|
47 |
+
model.save_wav(wav=out["wav"], path=output_path)
|
48 |
|
49 |
+
return output_path, f"Tiempo de generación: {out['inference_time']:.2f} segundos"
|
50 |
|
51 |
with gr.Blocks(theme=gr.themes.Base()) as demo:
|
52 |
gr.Markdown("# Sintetizador de Voz XTTS")
|
|
|
54 |
with gr.Row():
|
55 |
with gr.Column():
|
56 |
texto_entrada = gr.Textbox(label="Texto a sintetizar", placeholder="Escribe aquí el texto que quieres convertir a voz...")
|
57 |
+
idioma = gr.Dropdown(label="Idioma", choices=config.languages, value="es")
|
58 |
audio_referencia = gr.Audio(label="Audio de referencia", type="filepath")
|
59 |
usar_microfono = gr.Checkbox(label="Usar micrófono")
|
60 |
audio_microfono = gr.Audio(label="Grabar con micrófono", source="microphone", type="filepath", visible=False)
|