Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -7,7 +7,7 @@ import uuid
|
|
7 |
import time
|
8 |
import torch
|
9 |
import torchaudio
|
10 |
-
|
11 |
# Mantenemos la descarga de MeCab
|
12 |
os.system('python -m unidic download')
|
13 |
|
@@ -76,12 +76,12 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic):
|
|
76 |
if len(prompt) < 2 or len(prompt) > 200:
|
77 |
return None, None, "El texto debe tener entre 2 y 200 caracteres."
|
78 |
|
79 |
-
# Usamos los valores de la configuración
|
80 |
-
temperature = config
|
81 |
-
repetition_penalty = config
|
82 |
-
gpt_cond_len = config
|
83 |
-
gpt_cond_chunk_len = config
|
84 |
-
max_ref_length = config
|
85 |
|
86 |
gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(
|
87 |
audio_path=speaker_wav,
|
@@ -90,6 +90,8 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic):
|
|
90 |
max_ref_length=max_ref_length
|
91 |
)
|
92 |
|
|
|
|
|
93 |
out = model.inference(
|
94 |
prompt,
|
95 |
language,
|
@@ -98,11 +100,16 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic):
|
|
98 |
temperature=temperature,
|
99 |
repetition_penalty=repetition_penalty,
|
100 |
)
|
|
|
101 |
|
102 |
torchaudio.save("output.wav", torch.tensor(out["wav"]).unsqueeze(0), 24000)
|
103 |
|
104 |
-
|
105 |
-
|
|
|
|
|
|
|
|
|
106 |
|
107 |
return gr.make_waveform("output.wav"), "output.wav", metrics_text
|
108 |
|
@@ -110,6 +117,7 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic):
|
|
110 |
print(f"Error detallado: {str(e)}")
|
111 |
return None, None, f"Error: {str(e)}"
|
112 |
|
|
|
113 |
# Interfaz de Gradio actualizada sin sliders
|
114 |
with gr.Blocks(theme=gr.themes.Base()) as demo:
|
115 |
gr.Markdown("# Sintetizador de Voz XTTS")
|
|
|
7 |
import time
|
8 |
import torch
|
9 |
import torchaudio
|
10 |
+
import time
|
11 |
# Mantenemos la descarga de MeCab
|
12 |
os.system('python -m unidic download')
|
13 |
|
|
|
76 |
if len(prompt) < 2 or len(prompt) > 200:
|
77 |
return None, None, "El texto debe tener entre 2 y 200 caracteres."
|
78 |
|
79 |
+
# Usamos los valores de la configuración directamente
|
80 |
+
temperature = getattr(config, "temperature", 0.75)
|
81 |
+
repetition_penalty = getattr(config, "repetition_penalty", 5.0)
|
82 |
+
gpt_cond_len = getattr(config, "gpt_cond_len", 30)
|
83 |
+
gpt_cond_chunk_len = getattr(config, "gpt_cond_chunk_len", 4)
|
84 |
+
max_ref_length = getattr(config, "max_ref_len", 60)
|
85 |
|
86 |
gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(
|
87 |
audio_path=speaker_wav,
|
|
|
90 |
max_ref_length=max_ref_length
|
91 |
)
|
92 |
|
93 |
+
# Medimos el tiempo de inferencia manualmente
|
94 |
+
start_time = time.time()
|
95 |
out = model.inference(
|
96 |
prompt,
|
97 |
language,
|
|
|
100 |
temperature=temperature,
|
101 |
repetition_penalty=repetition_penalty,
|
102 |
)
|
103 |
+
inference_time = time.time() - start_time
|
104 |
|
105 |
torchaudio.save("output.wav", torch.tensor(out["wav"]).unsqueeze(0), 24000)
|
106 |
|
107 |
+
# Calculamos las métricas usando el tiempo medido manualmente
|
108 |
+
audio_length = len(out["wav"]) / 24000 # duración del audio en segundos
|
109 |
+
real_time_factor = inference_time / audio_length
|
110 |
+
|
111 |
+
metrics_text = f"Tiempo de generación: {inference_time:.2f} segundos\n"
|
112 |
+
metrics_text += f"Factor de tiempo real: {real_time_factor:.2f}"
|
113 |
|
114 |
return gr.make_waveform("output.wav"), "output.wav", metrics_text
|
115 |
|
|
|
117 |
print(f"Error detallado: {str(e)}")
|
118 |
return None, None, f"Error: {str(e)}"
|
119 |
|
120 |
+
|
121 |
# Interfaz de Gradio actualizada sin sliders
|
122 |
with gr.Blocks(theme=gr.themes.Base()) as demo:
|
123 |
gr.Markdown("# Sintetizador de Voz XTTS")
|