Blakus commited on
Commit
dab9ee4
·
verified ·
1 Parent(s): 2628df2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -140
app.py CHANGED
@@ -31,7 +31,8 @@ def setup_mecab_and_unidic():
31
  os.environ['MECABRC'] = os.path.join(mecab_dic_dir, 'mecabrc')
32
  print(f"MECABRC configurado en: {os.environ['MECABRC']}")
33
 
34
- # Intentar descargar UniDic si es necesario
 
35
  subprocess.check_call([sys.executable, '-m', 'unidic', 'download'])
36
  print("UniDic descargado correctamente")
37
 
@@ -48,142 +49,4 @@ def setup_mecab_and_unidic():
48
  print("Configurando MeCab y UniDic...")
49
  setup_mecab_and_unidic()
50
 
51
- # Descargar y configurar el modelo
52
- print("Descargando y configurando el modelo...")
53
- repo_id = "Blakus/Pedro_Lab_XTTS"
54
- local_dir = os.path.join(get_user_data_dir("tts"), "tts_models--multilingual--multi-dataset--xtts_v2")
55
- os.makedirs(local_dir, exist_ok=True)
56
- files_to_download = ["config.json", "model.pth", "vocab.json"]
57
-
58
- for file_name in files_to_download:
59
- print(f"Descargando {file_name} de {repo_id}")
60
- hf_hub_download(repo_id=repo_id, filename=file_name, local_dir=local_dir)
61
-
62
- config_path = os.path.join(local_dir, "config.json")
63
- checkpoint_path = os.path.join(local_dir, "model.pth")
64
- vocab_path = os.path.join(local_dir, "vocab.json")
65
-
66
- config = XttsConfig()
67
- config.load_json(config_path)
68
-
69
- model = Xtts.init_from_config(config)
70
- model.load_checkpoint(config, checkpoint_path=checkpoint_path, vocab_path=vocab_path, eval=True, use_deepspeed=False)
71
-
72
- print("Modelo cargado en CPU")
73
-
74
- # Funciones auxiliares
75
- def split_text(text):
76
- return re.split(r'(?<=[.!?])\s+', text)
77
-
78
- def predict(prompt, language, reference_audio):
79
- try:
80
- if len(prompt) < 2 or len(prompt) > 600:
81
- return None, "El texto debe tener entre 2 y 600 caracteres."
82
-
83
- sentences = split_text(prompt)
84
-
85
- temperature = config.inference.get("temperature", 0.75)
86
- repetition_penalty = config.inference.get("repetition_penalty", 5.0)
87
- gpt_cond_len = config.inference.get("gpt_cond_len", 30)
88
- gpt_cond_chunk_len = config.inference.get("gpt_cond_chunk_len", 4)
89
- max_ref_length = config.inference.get("max_ref_length", 60)
90
-
91
- gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(
92
- audio_path=reference_audio,
93
- gpt_cond_len=gpt_cond_len,
94
- gpt_cond_chunk_len=gpt_cond_chunk_len,
95
- max_ref_length=max_ref_length
96
- )
97
-
98
- start_time = time.time()
99
- combined_audio = AudioSegment.empty()
100
-
101
- for sentence in sentences:
102
- out = model.inference(
103
- sentence,
104
- language,
105
- gpt_cond_latent,
106
- speaker_embedding,
107
- temperature=temperature,
108
- repetition_penalty=repetition_penalty,
109
- )
110
- audio_segment = AudioSegment(
111
- out["wav"].tobytes(),
112
- frame_rate=24000,
113
- sample_width=2,
114
- channels=1
115
- )
116
- combined_audio += audio_segment
117
- combined_audio += AudioSegment.silent(duration=500) # 0.5 segundos de silencio
118
-
119
- inference_time = time.time() - start_time
120
-
121
- output_path = "output.wav"
122
- combined_audio.export(output_path, format="wav")
123
-
124
- audio_length = len(combined_audio) / 1000 # duración del audio en segundos
125
- real_time_factor = inference_time / audio_length
126
-
127
- metrics_text = f"Tiempo de generación: {inference_time:.2f} segundos\n"
128
- metrics_text += f"Factor de tiempo real: {real_time_factor:.2f}"
129
-
130
- return output_path, metrics_text
131
-
132
- except Exception as e:
133
- print(f"Error detallado: {str(e)}")
134
- return None, f"Error: {str(e)}"
135
-
136
- # Configuración de la interfaz de Gradio
137
- supported_languages = ["es", "en"]
138
- reference_audios = [
139
- "serio.wav",
140
- "neutral.wav",
141
- "alegre.wav",
142
- ]
143
-
144
- theme = gr.themes.Soft(
145
- primary_hue="blue",
146
- secondary_hue="gray",
147
- ).set(
148
- body_background_fill='*neutral_100',
149
- body_background_fill_dark='*neutral_900',
150
- )
151
-
152
- description = """
153
- # Sintetizador de voz de Pedro Labattaglia 🎙️
154
-
155
- Sintetizador de voz con la voz del locutor argentino Pedro Labattaglia.
156
-
157
- ## Cómo usarlo:
158
- - Elija el idioma (Español o Inglés)
159
- - Elija un audio de referencia de la lista
160
- - Escriba el texto que desea sintetizar
161
- - Presione generar voz
162
- """
163
-
164
- # Interfaz de Gradio
165
- with gr.Blocks(theme=theme) as demo:
166
- gr.Markdown(description)
167
-
168
- with gr.Row():
169
- gr.Image("https://i1.sndcdn.com/artworks-000237574740-gwz61j-t500x500.jpg", label="", show_label=False, width=250, height=250)
170
-
171
- with gr.Row():
172
- with gr.Column(scale=2):
173
- language_selector = gr.Dropdown(label="Idioma", choices=supported_languages)
174
- reference_audio = gr.Dropdown(label="Audio de referencia", choices=reference_audios)
175
- input_text = gr.Textbox(label="Texto a sintetizar", placeholder="Escribe aquí el texto que quieres convertir a voz...")
176
- generate_button = gr.Button("Generar voz", variant="primary")
177
-
178
- with gr.Column(scale=1):
179
- generated_audio = gr.Audio(label="Audio generado", interactive=False)
180
- metrics_output = gr.Textbox(label="Métricas", value="Tiempo de generación: -- segundos\nFactor de tiempo real: --")
181
-
182
- generate_button.click(
183
- predict,
184
- inputs=[input_text, language_selector, reference_audio],
185
- outputs=[generated_audio, metrics_output]
186
- )
187
-
188
- if __name__ == "__main__":
189
- demo.launch()
 
31
  os.environ['MECABRC'] = os.path.join(mecab_dic_dir, 'mecabrc')
32
  print(f"MECABRC configurado en: {os.environ['MECABRC']}")
33
 
34
+ # Ejecutar explícitamente python -m unidic download
35
+ print("Descargando UniDic...")
36
  subprocess.check_call([sys.executable, '-m', 'unidic', 'download'])
37
  print("UniDic descargado correctamente")
38
 
 
49
  print("Configurando MeCab y UniDic...")
50
  setup_mecab_and_unidic()
51
 
52
+ # El resto del código permanece igual...