RafaG commited on
Commit
293a37a
·
verified ·
1 Parent(s): 69966fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -45
app.py CHANGED
@@ -13,17 +13,21 @@ import pysrt
13
  from tqdm import tqdm
14
  import shutil
15
 
16
- srt_temp_deleta = True
 
17
 
 
18
  def load_voices():
19
  with open('voices.json', 'r', encoding='utf-8') as f:
20
  return json.load(f)
21
 
 
22
  def get_voice_options(language, voices_data):
23
  if language in voices_data:
24
  return [f"{voice['name']} | {voice['gender']}" for voice in voices_data[language]]
25
  return []
26
 
 
27
  def extract_voice_name(formatted_voice):
28
  return formatted_voice.split(" | ")[0]
29
 
@@ -142,6 +146,7 @@ def controlador_generate_audio_from_file(file, voice_model_input, speed_input, p
142
 
143
  return audio_file
144
 
 
145
  def timetoms(time_obj):
146
  return time_obj.hours * 3600000 + time_obj.minutes * 60000 + time_obj.seconds * 1000 + time_obj.milliseconds
147
 
@@ -162,7 +167,7 @@ async def merge_audio_files(output_folder, srt_file):
162
 
163
  if audio_file.exists():
164
  audio = AudioSegment.from_mp3(str(audio_file))
165
- audio_segment = audio
166
  else:
167
  print(f"\nArquivo de áudio não encontrado: {audio_file}")
168
  audio_segment = AudioSegment.silent(duration=end_time - start_time)
@@ -183,7 +188,9 @@ async def merge_audio_files(output_folder, srt_file):
183
 
184
  final_audio += AudioSegment.silent(duration=additional_silence_duration)
185
 
186
- output_file = audio_dir.parent / f"{base_name}_final.mp3"
 
 
187
  final_audio.export(str(output_file), format="mp3")
188
  print(f"\nÁudio final salvo em: {output_file}\n")
189
  return str(output_file)
@@ -196,10 +203,13 @@ async def adjust_audio_speed(input_file, output_file, target_duration_ms):
196
  print(f"Erro: Áudio em {input_file} tem duração zero.")
197
  return audio
198
 
199
- speed_audios = original_duration_ms / target_duration_ms
 
200
 
201
- adjusted_audio = audio.speedup(playback_speed=speed_audios) if speed_audios > 1 else audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate * speed_audios)})
 
202
 
 
203
  if len(adjusted_audio) > target_duration_ms:
204
  adjusted_audio = adjusted_audio[:target_duration_ms]
205
  elif len(adjusted_audio) < target_duration_ms:
@@ -230,12 +240,14 @@ async def process_srt_file(srt_file, voice, output_dir, pitch, volume):
230
  target_duration_ms = timetoms(sub.end) - timetoms(sub.start)
231
 
232
  if not output_file.exists() or output_file.stat().st_size == 0:
 
233
  tts = EdgeTTS(text=sub.text, voice=voice, pitch=pitch_str, volume=volume_str)
234
  tasks.append(tts.save(str(temp_file)))
235
 
236
  if tasks:
237
  await asyncio.gather(*tasks)
238
 
 
239
  for i in batch:
240
  sub = subs[i]
241
  temp_file = output_dir / f"{sub.index:02d}_temp.mp3"
@@ -244,11 +256,12 @@ async def process_srt_file(srt_file, voice, output_dir, pitch, volume):
244
 
245
  if temp_file.exists():
246
  await adjust_audio_speed(temp_file, output_file, target_duration_ms)
247
- os.remove(temp_file)
248
  pbar.update(1)
249
 
250
  final_audio = await merge_audio_files(output_dir, srt_file)
251
 
 
252
  if srt_temp_deleta:
253
  shutil.rmtree(output_dir, ignore_errors=True)
254
  print(f"Pasta temporária {output_dir} apagada.")
@@ -267,6 +280,25 @@ def controlador_process_srt_file(srt_file, voice_model_input, pitch_input, volum
267
  audio_file = asyncio.run(process_srt_file(srt_file, actual_voice, output_dir, pitch_input, volume_input))
268
  return audio_file
269
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  with gr.Blocks(theme=gr.themes.Default(primary_hue="green", secondary_hue="blue"), title="QuickTTS") as iface:
271
  gr.Markdown(badges)
272
  gr.Markdown(description)
@@ -372,45 +404,88 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="green", secondary_hue="blue"
372
  gr.Markdown("Agradecimentos a rany2 pelo Edge-TTS")
373
 
374
  with gr.TabItem("Ler .SRT"):
375
- gr.Markdown("Carregar um arquivo SRT e gerar áudio sincronizado com os tempos das legendas. A velocidade é ajustada automaticamente para cada legenda.")
376
- with gr.Row():
377
- language_input_srt = gr.Dropdown(
378
- choices=available_languages,
379
- label="Idioma",
380
- value=available_languages[52] if available_languages else None
381
- )
382
- initial_voices = get_voice_options(available_languages[52], voices_data) if available_languages else []
383
- voice_model_input_srt = gr.Dropdown(
384
- choices=initial_voices,
385
- label="Modelo de Voz",
386
- value=initial_voices[0] if initial_voices else None
387
- )
388
-
389
- language_input_srt.change(
390
- fn=update_voice_options,
391
- inputs=[language_input_srt],
392
- outputs=[voice_model_input_srt]
393
- )
394
-
395
- srt_input = gr.File(label="Arquivo SRT", file_types=[".srt"], type="filepath")
396
-
397
- with gr.Row():
398
- with gr.Column():
399
- pitch_input_srt = gr.Slider(minimum=-100, maximum=100, label="Tom (Hz)", value=0, interactive=True)
400
- with gr.Column():
401
- volume_input_srt = gr.Slider(minimum=-99, maximum=100, label="Volume (%)", value=0, interactive=True)
402
-
403
- audio_output_srt = gr.Audio(label="Resultado", type="filepath", interactive=False)
404
- with gr.Row():
405
- srt_button = gr.Button(value="Gerar Áudio")
406
- srt_button.click(
407
- controlador_process_srt_file,
408
- inputs=[srt_input, voice_model_input_srt, pitch_input_srt, volume_input_srt],
409
- outputs=[audio_output_srt]
410
- )
411
- clear_button_srt = gr.ClearButton(srt_input, value='Limpar')
412
-
413
- gr.Markdown("Agradecimentos a rany2 pelo Edge-TTS")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
414
 
415
  gr.Markdown("""
416
  Desenvolvido por Rafael Godoy <br>
 
13
  from tqdm import tqdm
14
  import shutil
15
 
16
+ # Controla se a pasta temporária srt_temp será apagada após gerar o áudio
17
+ srt_temp_deleta = True # True apaga, False mantém
18
 
19
+ # Load voices from JSON file
20
  def load_voices():
21
  with open('voices.json', 'r', encoding='utf-8') as f:
22
  return json.load(f)
23
 
24
+ # Get formatted voice options for specific language
25
  def get_voice_options(language, voices_data):
26
  if language in voices_data:
27
  return [f"{voice['name']} | {voice['gender']}" for voice in voices_data[language]]
28
  return []
29
 
30
+ # Extract voice name from formatted string
31
  def extract_voice_name(formatted_voice):
32
  return formatted_voice.split(" | ")[0]
33
 
 
146
 
147
  return audio_file
148
 
149
+ # Funções adaptadas do TTS.py para processar SRT com ajuste de velocidade
150
  def timetoms(time_obj):
151
  return time_obj.hours * 3600000 + time_obj.minutes * 60000 + time_obj.seconds * 1000 + time_obj.milliseconds
152
 
 
167
 
168
  if audio_file.exists():
169
  audio = AudioSegment.from_mp3(str(audio_file))
170
+ audio_segment = audio # Já ajustado anteriormente
171
  else:
172
  print(f"\nArquivo de áudio não encontrado: {audio_file}")
173
  audio_segment = AudioSegment.silent(duration=end_time - start_time)
 
188
 
189
  final_audio += AudioSegment.silent(duration=additional_silence_duration)
190
 
191
+ srt_output_dir = Path("output/srt_output")
192
+ srt_output_dir.mkdir(parents=True, exist_ok=True)
193
+ output_file = srt_output_dir / f"{base_name}_final.mp3"
194
  final_audio.export(str(output_file), format="mp3")
195
  print(f"\nÁudio final salvo em: {output_file}\n")
196
  return str(output_file)
 
203
  print(f"Erro: Áudio em {input_file} tem duração zero.")
204
  return audio
205
 
206
+ speed_factor = original_duration_ms / target_duration_ms
207
+ print(f"Fator de velocidade calculado: {speed_factor:.2f}x (original: {original_duration_ms}ms, alvo: {target_duration_ms}ms)")
208
 
209
+ # Ajustar a velocidade usando pydub
210
+ adjusted_audio = audio.speedup(playback_speed=speed_factor) if speed_factor > 1 else audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate * speed_factor)})
211
 
212
+ # Garantir que a duração ajustada seja próxima do alvo
213
  if len(adjusted_audio) > target_duration_ms:
214
  adjusted_audio = adjusted_audio[:target_duration_ms]
215
  elif len(adjusted_audio) < target_duration_ms:
 
240
  target_duration_ms = timetoms(sub.end) - timetoms(sub.start)
241
 
242
  if not output_file.exists() or output_file.stat().st_size == 0:
243
+ # Gerar áudio inicial sem ajuste de velocidade
244
  tts = EdgeTTS(text=sub.text, voice=voice, pitch=pitch_str, volume=volume_str)
245
  tasks.append(tts.save(str(temp_file)))
246
 
247
  if tasks:
248
  await asyncio.gather(*tasks)
249
 
250
+ # Ajustar velocidade para cada áudio gerado no batch
251
  for i in batch:
252
  sub = subs[i]
253
  temp_file = output_dir / f"{sub.index:02d}_temp.mp3"
 
256
 
257
  if temp_file.exists():
258
  await adjust_audio_speed(temp_file, output_file, target_duration_ms)
259
+ os.remove(temp_file) # Remover arquivo temporário
260
  pbar.update(1)
261
 
262
  final_audio = await merge_audio_files(output_dir, srt_file)
263
 
264
+ # Apagar a pasta temporária se srt_temp_deleta for True
265
  if srt_temp_deleta:
266
  shutil.rmtree(output_dir, ignore_errors=True)
267
  print(f"Pasta temporária {output_dir} apagada.")
 
280
  audio_file = asyncio.run(process_srt_file(srt_file, actual_voice, output_dir, pitch_input, volume_input))
281
  return audio_file
282
 
283
+ # Funções para a aba "Arquivos gerados"
284
+ def listar_audios():
285
+ try:
286
+ srt_output_dir = "output/srt_output"
287
+ if not os.path.exists(srt_output_dir):
288
+ os.makedirs(srt_output_dir, exist_ok=True)
289
+ return ["Nenhum áudio gerado ainda"]
290
+ arquivos = [f for f in os.listdir(srt_output_dir) if f.endswith(('.mp3', '.wav'))]
291
+ return arquivos if arquivos else ["Nenhum áudio gerado ainda"]
292
+ except Exception as e:
293
+ print(f"Erro ao listar áudios: {e}")
294
+ return ["Erro ao listar arquivos"]
295
+
296
+ def tocar_audio(arquivo):
297
+ # Retorna o caminho do arquivo para reprodução, se válido
298
+ if arquivo and arquivo != "Nenhum áudio gerado ainda":
299
+ return f"output/srt_output/{arquivo}"
300
+ return None
301
+
302
  with gr.Blocks(theme=gr.themes.Default(primary_hue="green", secondary_hue="blue"), title="QuickTTS") as iface:
303
  gr.Markdown(badges)
304
  gr.Markdown(description)
 
404
  gr.Markdown("Agradecimentos a rany2 pelo Edge-TTS")
405
 
406
  with gr.TabItem("Ler .SRT"):
407
+ gr.Markdown("Carregar um arquivo SRT e gerenciar áudios sincronizados com os tempos das legendas.")
408
+ with gr.Tabs():
409
+ with gr.TabItem("Gerar áudio"):
410
+ gr.Markdown("A velocidade é ajustada automaticamente para cada legenda.")
411
+ with gr.Row():
412
+ language_input_srt = gr.Dropdown(
413
+ choices=available_languages,
414
+ label="Idioma",
415
+ value=available_languages[52] if available_languages else None
416
+ )
417
+ initial_voices = get_voice_options(available_languages[52], voices_data) if available_languages else []
418
+ voice_model_input_srt = gr.Dropdown(
419
+ choices=initial_voices,
420
+ label="Modelo de Voz",
421
+ value=initial_voices[0] if initial_voices else None
422
+ )
423
+
424
+ language_input_srt.change(
425
+ fn=update_voice_options,
426
+ inputs=[language_input_srt],
427
+ outputs=[voice_model_input_srt]
428
+ )
429
+
430
+ srt_input = gr.File(label="Arquivo SRT", file_types=[".srt"], type="filepath")
431
+
432
+ with gr.Row():
433
+ with gr.Column():
434
+ pitch_input_srt = gr.Slider(minimum=-100, maximum=100, label="Tom (Hz)", value=0, interactive=True)
435
+ with gr.Column():
436
+ volume_input_srt = gr.Slider(minimum=-99, maximum=200, label="Volume (%)", value=0, interactive=True)
437
+
438
+ audio_output_srt = gr.Audio(label="Resultado", type="filepath", interactive=False)
439
+ with gr.Row():
440
+ srt_button = gr.Button(value="Gerar Áudio")
441
+ clear_button_srt = gr.ClearButton(srt_input, value='Limpar')
442
+
443
+ # Função para gerar áudio e atualizar a lista
444
+ def generate_and_update_list(srt_file, voice_model_input, pitch_input, volume_input):
445
+ audio_file = controlador_process_srt_file(srt_file, voice_model_input, pitch_input, volume_input)
446
+ updated_list = listar_audios()
447
+ return audio_file, updated_list
448
+
449
+ srt_button.click(
450
+ fn=generate_and_update_list,
451
+ inputs=[srt_input, voice_model_input_srt, pitch_input_srt, volume_input_srt],
452
+ outputs=[audio_output_srt, gr.Dropdown(visible=False)], # Componente oculto para a lista
453
+ queue=True # Garantir que o evento seja processado na fila
454
+ )
455
+
456
+ gr.Markdown("Agradecimentos a rany2 pelo Edge-TTS")
457
+
458
+ with gr.TabItem("Arquivos gerados"):
459
+ gr.Markdown("Lista de arquivos de áudio gerados na pasta 'output/srt_output'.")
460
+ audio_list = gr.Dropdown(
461
+ label="Arquivos de áudio",
462
+ choices=listar_audios(),
463
+ value=None,
464
+ interactive=True,
465
+ allow_custom_value=True
466
+ )
467
+ play_button = gr.Button(value="Tocar")
468
+ audio_player = gr.Audio(label="Reproduzir", type="filepath", interactive=False)
469
+ status_message = gr.Textbox(label="Status", interactive=False, visible=True)
470
+
471
+ def update_audio_list():
472
+ arquivos = listar_audios()
473
+ return gr.update(choices=arquivos, value=None), "Lista atualizada com sucesso" if "Erro" not in arquivos[0] else "Erro ao atualizar lista"
474
+
475
+ refresh_button = gr.Button(value="Atualizar Lista")
476
+ refresh_button.click(
477
+ fn=update_audio_list,
478
+ inputs=[],
479
+ outputs=[audio_list, status_message],
480
+ queue=True
481
+ )
482
+
483
+ play_button.click(
484
+ fn=tocar_audio,
485
+ inputs=[audio_list],
486
+ outputs=[audio_player],
487
+ queue=True
488
+ )
489
 
490
  gr.Markdown("""
491
  Desenvolvido por Rafael Godoy <br>