Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -13,17 +13,21 @@ import pysrt
|
|
13 |
from tqdm import tqdm
|
14 |
import shutil
|
15 |
|
16 |
-
|
|
|
17 |
|
|
|
18 |
def load_voices():
|
19 |
with open('voices.json', 'r', encoding='utf-8') as f:
|
20 |
return json.load(f)
|
21 |
|
|
|
22 |
def get_voice_options(language, voices_data):
|
23 |
if language in voices_data:
|
24 |
return [f"{voice['name']} | {voice['gender']}" for voice in voices_data[language]]
|
25 |
return []
|
26 |
|
|
|
27 |
def extract_voice_name(formatted_voice):
|
28 |
return formatted_voice.split(" | ")[0]
|
29 |
|
@@ -142,6 +146,7 @@ def controlador_generate_audio_from_file(file, voice_model_input, speed_input, p
|
|
142 |
|
143 |
return audio_file
|
144 |
|
|
|
145 |
def timetoms(time_obj):
|
146 |
return time_obj.hours * 3600000 + time_obj.minutes * 60000 + time_obj.seconds * 1000 + time_obj.milliseconds
|
147 |
|
@@ -162,7 +167,7 @@ async def merge_audio_files(output_folder, srt_file):
|
|
162 |
|
163 |
if audio_file.exists():
|
164 |
audio = AudioSegment.from_mp3(str(audio_file))
|
165 |
-
audio_segment = audio
|
166 |
else:
|
167 |
print(f"\nArquivo de áudio não encontrado: {audio_file}")
|
168 |
audio_segment = AudioSegment.silent(duration=end_time - start_time)
|
@@ -183,7 +188,9 @@ async def merge_audio_files(output_folder, srt_file):
|
|
183 |
|
184 |
final_audio += AudioSegment.silent(duration=additional_silence_duration)
|
185 |
|
186 |
-
|
|
|
|
|
187 |
final_audio.export(str(output_file), format="mp3")
|
188 |
print(f"\nÁudio final salvo em: {output_file}\n")
|
189 |
return str(output_file)
|
@@ -196,10 +203,13 @@ async def adjust_audio_speed(input_file, output_file, target_duration_ms):
|
|
196 |
print(f"Erro: Áudio em {input_file} tem duração zero.")
|
197 |
return audio
|
198 |
|
199 |
-
|
|
|
200 |
|
201 |
-
|
|
|
202 |
|
|
|
203 |
if len(adjusted_audio) > target_duration_ms:
|
204 |
adjusted_audio = adjusted_audio[:target_duration_ms]
|
205 |
elif len(adjusted_audio) < target_duration_ms:
|
@@ -230,12 +240,14 @@ async def process_srt_file(srt_file, voice, output_dir, pitch, volume):
|
|
230 |
target_duration_ms = timetoms(sub.end) - timetoms(sub.start)
|
231 |
|
232 |
if not output_file.exists() or output_file.stat().st_size == 0:
|
|
|
233 |
tts = EdgeTTS(text=sub.text, voice=voice, pitch=pitch_str, volume=volume_str)
|
234 |
tasks.append(tts.save(str(temp_file)))
|
235 |
|
236 |
if tasks:
|
237 |
await asyncio.gather(*tasks)
|
238 |
|
|
|
239 |
for i in batch:
|
240 |
sub = subs[i]
|
241 |
temp_file = output_dir / f"{sub.index:02d}_temp.mp3"
|
@@ -244,11 +256,12 @@ async def process_srt_file(srt_file, voice, output_dir, pitch, volume):
|
|
244 |
|
245 |
if temp_file.exists():
|
246 |
await adjust_audio_speed(temp_file, output_file, target_duration_ms)
|
247 |
-
os.remove(temp_file)
|
248 |
pbar.update(1)
|
249 |
|
250 |
final_audio = await merge_audio_files(output_dir, srt_file)
|
251 |
|
|
|
252 |
if srt_temp_deleta:
|
253 |
shutil.rmtree(output_dir, ignore_errors=True)
|
254 |
print(f"Pasta temporária {output_dir} apagada.")
|
@@ -267,6 +280,25 @@ def controlador_process_srt_file(srt_file, voice_model_input, pitch_input, volum
|
|
267 |
audio_file = asyncio.run(process_srt_file(srt_file, actual_voice, output_dir, pitch_input, volume_input))
|
268 |
return audio_file
|
269 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
with gr.Blocks(theme=gr.themes.Default(primary_hue="green", secondary_hue="blue"), title="QuickTTS") as iface:
|
271 |
gr.Markdown(badges)
|
272 |
gr.Markdown(description)
|
@@ -372,45 +404,88 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="green", secondary_hue="blue"
|
|
372 |
gr.Markdown("Agradecimentos a rany2 pelo Edge-TTS")
|
373 |
|
374 |
with gr.TabItem("Ler .SRT"):
|
375 |
-
gr.Markdown("Carregar um arquivo SRT e
|
376 |
-
with gr.
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
414 |
|
415 |
gr.Markdown("""
|
416 |
Desenvolvido por Rafael Godoy <br>
|
|
|
13 |
from tqdm import tqdm
|
14 |
import shutil
|
15 |
|
16 |
+
# Controla se a pasta temporária srt_temp será apagada após gerar o áudio
|
17 |
+
srt_temp_deleta = True # True apaga, False mantém
|
18 |
|
19 |
+
# Load voices from JSON file
|
20 |
def load_voices():
|
21 |
with open('voices.json', 'r', encoding='utf-8') as f:
|
22 |
return json.load(f)
|
23 |
|
24 |
+
# Get formatted voice options for specific language
|
25 |
def get_voice_options(language, voices_data):
|
26 |
if language in voices_data:
|
27 |
return [f"{voice['name']} | {voice['gender']}" for voice in voices_data[language]]
|
28 |
return []
|
29 |
|
30 |
+
# Extract voice name from formatted string
|
31 |
def extract_voice_name(formatted_voice):
|
32 |
return formatted_voice.split(" | ")[0]
|
33 |
|
|
|
146 |
|
147 |
return audio_file
|
148 |
|
149 |
+
# Funções adaptadas do TTS.py para processar SRT com ajuste de velocidade
|
150 |
def timetoms(time_obj):
|
151 |
return time_obj.hours * 3600000 + time_obj.minutes * 60000 + time_obj.seconds * 1000 + time_obj.milliseconds
|
152 |
|
|
|
167 |
|
168 |
if audio_file.exists():
|
169 |
audio = AudioSegment.from_mp3(str(audio_file))
|
170 |
+
audio_segment = audio # Já ajustado anteriormente
|
171 |
else:
|
172 |
print(f"\nArquivo de áudio não encontrado: {audio_file}")
|
173 |
audio_segment = AudioSegment.silent(duration=end_time - start_time)
|
|
|
188 |
|
189 |
final_audio += AudioSegment.silent(duration=additional_silence_duration)
|
190 |
|
191 |
+
srt_output_dir = Path("output/srt_output")
|
192 |
+
srt_output_dir.mkdir(parents=True, exist_ok=True)
|
193 |
+
output_file = srt_output_dir / f"{base_name}_final.mp3"
|
194 |
final_audio.export(str(output_file), format="mp3")
|
195 |
print(f"\nÁudio final salvo em: {output_file}\n")
|
196 |
return str(output_file)
|
|
|
203 |
print(f"Erro: Áudio em {input_file} tem duração zero.")
|
204 |
return audio
|
205 |
|
206 |
+
speed_factor = original_duration_ms / target_duration_ms
|
207 |
+
print(f"Fator de velocidade calculado: {speed_factor:.2f}x (original: {original_duration_ms}ms, alvo: {target_duration_ms}ms)")
|
208 |
|
209 |
+
# Ajustar a velocidade usando pydub
|
210 |
+
adjusted_audio = audio.speedup(playback_speed=speed_factor) if speed_factor > 1 else audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate * speed_factor)})
|
211 |
|
212 |
+
# Garantir que a duração ajustada seja próxima do alvo
|
213 |
if len(adjusted_audio) > target_duration_ms:
|
214 |
adjusted_audio = adjusted_audio[:target_duration_ms]
|
215 |
elif len(adjusted_audio) < target_duration_ms:
|
|
|
240 |
target_duration_ms = timetoms(sub.end) - timetoms(sub.start)
|
241 |
|
242 |
if not output_file.exists() or output_file.stat().st_size == 0:
|
243 |
+
# Gerar áudio inicial sem ajuste de velocidade
|
244 |
tts = EdgeTTS(text=sub.text, voice=voice, pitch=pitch_str, volume=volume_str)
|
245 |
tasks.append(tts.save(str(temp_file)))
|
246 |
|
247 |
if tasks:
|
248 |
await asyncio.gather(*tasks)
|
249 |
|
250 |
+
# Ajustar velocidade para cada áudio gerado no batch
|
251 |
for i in batch:
|
252 |
sub = subs[i]
|
253 |
temp_file = output_dir / f"{sub.index:02d}_temp.mp3"
|
|
|
256 |
|
257 |
if temp_file.exists():
|
258 |
await adjust_audio_speed(temp_file, output_file, target_duration_ms)
|
259 |
+
os.remove(temp_file) # Remover arquivo temporário
|
260 |
pbar.update(1)
|
261 |
|
262 |
final_audio = await merge_audio_files(output_dir, srt_file)
|
263 |
|
264 |
+
# Apagar a pasta temporária se srt_temp_deleta for True
|
265 |
if srt_temp_deleta:
|
266 |
shutil.rmtree(output_dir, ignore_errors=True)
|
267 |
print(f"Pasta temporária {output_dir} apagada.")
|
|
|
280 |
audio_file = asyncio.run(process_srt_file(srt_file, actual_voice, output_dir, pitch_input, volume_input))
|
281 |
return audio_file
|
282 |
|
283 |
+
# Funções para a aba "Arquivos gerados"
|
284 |
+
def listar_audios():
|
285 |
+
try:
|
286 |
+
srt_output_dir = "output/srt_output"
|
287 |
+
if not os.path.exists(srt_output_dir):
|
288 |
+
os.makedirs(srt_output_dir, exist_ok=True)
|
289 |
+
return ["Nenhum áudio gerado ainda"]
|
290 |
+
arquivos = [f for f in os.listdir(srt_output_dir) if f.endswith(('.mp3', '.wav'))]
|
291 |
+
return arquivos if arquivos else ["Nenhum áudio gerado ainda"]
|
292 |
+
except Exception as e:
|
293 |
+
print(f"Erro ao listar áudios: {e}")
|
294 |
+
return ["Erro ao listar arquivos"]
|
295 |
+
|
296 |
+
def tocar_audio(arquivo):
|
297 |
+
# Retorna o caminho do arquivo para reprodução, se válido
|
298 |
+
if arquivo and arquivo != "Nenhum áudio gerado ainda":
|
299 |
+
return f"output/srt_output/{arquivo}"
|
300 |
+
return None
|
301 |
+
|
302 |
with gr.Blocks(theme=gr.themes.Default(primary_hue="green", secondary_hue="blue"), title="QuickTTS") as iface:
|
303 |
gr.Markdown(badges)
|
304 |
gr.Markdown(description)
|
|
|
404 |
gr.Markdown("Agradecimentos a rany2 pelo Edge-TTS")
|
405 |
|
406 |
with gr.TabItem("Ler .SRT"):
|
407 |
+
gr.Markdown("Carregar um arquivo SRT e gerenciar áudios sincronizados com os tempos das legendas.")
|
408 |
+
with gr.Tabs():
|
409 |
+
with gr.TabItem("Gerar áudio"):
|
410 |
+
gr.Markdown("A velocidade é ajustada automaticamente para cada legenda.")
|
411 |
+
with gr.Row():
|
412 |
+
language_input_srt = gr.Dropdown(
|
413 |
+
choices=available_languages,
|
414 |
+
label="Idioma",
|
415 |
+
value=available_languages[52] if available_languages else None
|
416 |
+
)
|
417 |
+
initial_voices = get_voice_options(available_languages[52], voices_data) if available_languages else []
|
418 |
+
voice_model_input_srt = gr.Dropdown(
|
419 |
+
choices=initial_voices,
|
420 |
+
label="Modelo de Voz",
|
421 |
+
value=initial_voices[0] if initial_voices else None
|
422 |
+
)
|
423 |
+
|
424 |
+
language_input_srt.change(
|
425 |
+
fn=update_voice_options,
|
426 |
+
inputs=[language_input_srt],
|
427 |
+
outputs=[voice_model_input_srt]
|
428 |
+
)
|
429 |
+
|
430 |
+
srt_input = gr.File(label="Arquivo SRT", file_types=[".srt"], type="filepath")
|
431 |
+
|
432 |
+
with gr.Row():
|
433 |
+
with gr.Column():
|
434 |
+
pitch_input_srt = gr.Slider(minimum=-100, maximum=100, label="Tom (Hz)", value=0, interactive=True)
|
435 |
+
with gr.Column():
|
436 |
+
volume_input_srt = gr.Slider(minimum=-99, maximum=200, label="Volume (%)", value=0, interactive=True)
|
437 |
+
|
438 |
+
audio_output_srt = gr.Audio(label="Resultado", type="filepath", interactive=False)
|
439 |
+
with gr.Row():
|
440 |
+
srt_button = gr.Button(value="Gerar Áudio")
|
441 |
+
clear_button_srt = gr.ClearButton(srt_input, value='Limpar')
|
442 |
+
|
443 |
+
# Função para gerar áudio e atualizar a lista
|
444 |
+
def generate_and_update_list(srt_file, voice_model_input, pitch_input, volume_input):
|
445 |
+
audio_file = controlador_process_srt_file(srt_file, voice_model_input, pitch_input, volume_input)
|
446 |
+
updated_list = listar_audios()
|
447 |
+
return audio_file, updated_list
|
448 |
+
|
449 |
+
srt_button.click(
|
450 |
+
fn=generate_and_update_list,
|
451 |
+
inputs=[srt_input, voice_model_input_srt, pitch_input_srt, volume_input_srt],
|
452 |
+
outputs=[audio_output_srt, gr.Dropdown(visible=False)], # Componente oculto para a lista
|
453 |
+
queue=True # Garantir que o evento seja processado na fila
|
454 |
+
)
|
455 |
+
|
456 |
+
gr.Markdown("Agradecimentos a rany2 pelo Edge-TTS")
|
457 |
+
|
458 |
+
with gr.TabItem("Arquivos gerados"):
|
459 |
+
gr.Markdown("Lista de arquivos de áudio gerados na pasta 'output/srt_output'.")
|
460 |
+
audio_list = gr.Dropdown(
|
461 |
+
label="Arquivos de áudio",
|
462 |
+
choices=listar_audios(),
|
463 |
+
value=None,
|
464 |
+
interactive=True,
|
465 |
+
allow_custom_value=True
|
466 |
+
)
|
467 |
+
play_button = gr.Button(value="Tocar")
|
468 |
+
audio_player = gr.Audio(label="Reproduzir", type="filepath", interactive=False)
|
469 |
+
status_message = gr.Textbox(label="Status", interactive=False, visible=True)
|
470 |
+
|
471 |
+
def update_audio_list():
|
472 |
+
arquivos = listar_audios()
|
473 |
+
return gr.update(choices=arquivos, value=None), "Lista atualizada com sucesso" if "Erro" not in arquivos[0] else "Erro ao atualizar lista"
|
474 |
+
|
475 |
+
refresh_button = gr.Button(value="Atualizar Lista")
|
476 |
+
refresh_button.click(
|
477 |
+
fn=update_audio_list,
|
478 |
+
inputs=[],
|
479 |
+
outputs=[audio_list, status_message],
|
480 |
+
queue=True
|
481 |
+
)
|
482 |
+
|
483 |
+
play_button.click(
|
484 |
+
fn=tocar_audio,
|
485 |
+
inputs=[audio_list],
|
486 |
+
outputs=[audio_player],
|
487 |
+
queue=True
|
488 |
+
)
|
489 |
|
490 |
gr.Markdown("""
|
491 |
Desenvolvido por Rafael Godoy <br>
|