Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -13,21 +13,17 @@ import pysrt
|
|
13 |
from tqdm import tqdm
|
14 |
import shutil
|
15 |
|
16 |
-
|
17 |
-
srt_temp_deleta = True # True apaga, False mantém
|
18 |
|
19 |
-
# Load voices from JSON file
|
20 |
def load_voices():
|
21 |
with open('voices.json', 'r', encoding='utf-8') as f:
|
22 |
return json.load(f)
|
23 |
|
24 |
-
# Get formatted voice options for specific language
|
25 |
def get_voice_options(language, voices_data):
|
26 |
if language in voices_data:
|
27 |
return [f"{voice['name']} | {voice['gender']}" for voice in voices_data[language]]
|
28 |
return []
|
29 |
|
30 |
-
# Extract voice name from formatted string
|
31 |
def extract_voice_name(formatted_voice):
|
32 |
return formatted_voice.split(" | ")[0]
|
33 |
|
@@ -58,12 +54,10 @@ def remove_silence(input_file, output_file):
|
|
58 |
|
59 |
def controlador_generate_audio(audio_input, voice_model_input, speed_input, pitch_input, volume_input, checkbox_cortar_silencio):
|
60 |
audio_file = generate_audio(audio_input, voice_model_input, speed_input, pitch_input, volume_input)
|
61 |
-
if audio_file:
|
62 |
-
print("Áudio gerado com sucesso:", audio_file)
|
63 |
if checkbox_cortar_silencio:
|
64 |
-
print("Cortando silêncio...")
|
65 |
remove_silence(audio_file, audio_file)
|
66 |
-
|
67 |
else:
|
68 |
print("Erro ao gerar áudio.")
|
69 |
return audio_file
|
@@ -146,7 +140,6 @@ def controlador_generate_audio_from_file(file, voice_model_input, speed_input, p
|
|
146 |
|
147 |
return audio_file
|
148 |
|
149 |
-
# Funções adaptadas do TTS.py para processar SRT com ajuste de velocidade
|
150 |
def timetoms(time_obj):
|
151 |
return time_obj.hours * 3600000 + time_obj.minutes * 60000 + time_obj.seconds * 1000 + time_obj.milliseconds
|
152 |
|
@@ -167,7 +160,7 @@ async def merge_audio_files(output_folder, srt_file):
|
|
167 |
|
168 |
if audio_file.exists():
|
169 |
audio = AudioSegment.from_mp3(str(audio_file))
|
170 |
-
audio_segment = audio
|
171 |
else:
|
172 |
print(f"\nArquivo de áudio não encontrado: {audio_file}")
|
173 |
audio_segment = AudioSegment.silent(duration=end_time - start_time)
|
@@ -203,13 +196,9 @@ async def adjust_audio_speed(input_file, output_file, target_duration_ms):
|
|
203 |
print(f"Erro: Áudio em {input_file} tem duração zero.")
|
204 |
return audio
|
205 |
|
206 |
-
speed_factor = original_duration_ms / target_duration_ms
|
207 |
-
print(f"Fator de velocidade calculado: {speed_factor:.2f}x (original: {original_duration_ms}ms, alvo: {target_duration_ms}ms)")
|
208 |
-
|
209 |
-
# Ajustar a velocidade usando pydub
|
210 |
adjusted_audio = audio.speedup(playback_speed=speed_factor) if speed_factor > 1 else audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate * speed_factor)})
|
211 |
|
212 |
-
# Garantir que a duração ajustada seja próxima do alvo
|
213 |
if len(adjusted_audio) > target_duration_ms:
|
214 |
adjusted_audio = adjusted_audio[:target_duration_ms]
|
215 |
elif len(adjusted_audio) < target_duration_ms:
|
@@ -240,14 +229,12 @@ async def process_srt_file(srt_file, voice, output_dir, pitch, volume):
|
|
240 |
target_duration_ms = timetoms(sub.end) - timetoms(sub.start)
|
241 |
|
242 |
if not output_file.exists() or output_file.stat().st_size == 0:
|
243 |
-
# Gerar áudio inicial sem ajuste de velocidade
|
244 |
tts = EdgeTTS(text=sub.text, voice=voice, pitch=pitch_str, volume=volume_str)
|
245 |
tasks.append(tts.save(str(temp_file)))
|
246 |
|
247 |
if tasks:
|
248 |
await asyncio.gather(*tasks)
|
249 |
|
250 |
-
# Ajustar velocidade para cada áudio gerado no batch
|
251 |
for i in batch:
|
252 |
sub = subs[i]
|
253 |
temp_file = output_dir / f"{sub.index:02d}_temp.mp3"
|
@@ -256,12 +243,11 @@ async def process_srt_file(srt_file, voice, output_dir, pitch, volume):
|
|
256 |
|
257 |
if temp_file.exists():
|
258 |
await adjust_audio_speed(temp_file, output_file, target_duration_ms)
|
259 |
-
os.remove(temp_file)
|
260 |
pbar.update(1)
|
261 |
|
262 |
final_audio = await merge_audio_files(output_dir, srt_file)
|
263 |
|
264 |
-
# Apagar a pasta temporária se srt_temp_deleta for True
|
265 |
if srt_temp_deleta:
|
266 |
shutil.rmtree(output_dir, ignore_errors=True)
|
267 |
print(f"Pasta temporária {output_dir} apagada.")
|
@@ -280,7 +266,6 @@ def controlador_process_srt_file(srt_file, voice_model_input, pitch_input, volum
|
|
280 |
audio_file = asyncio.run(process_srt_file(srt_file, actual_voice, output_dir, pitch_input, volume_input))
|
281 |
return audio_file
|
282 |
|
283 |
-
# Funções para a aba "Arquivos gerados"
|
284 |
def listar_audios():
|
285 |
try:
|
286 |
srt_output_dir = "output/srt_output"
|
@@ -294,7 +279,6 @@ def listar_audios():
|
|
294 |
return ["Erro ao listar arquivos"]
|
295 |
|
296 |
def tocar_audio(arquivo):
|
297 |
-
# Retorna o caminho do arquivo para reprodução, se válido
|
298 |
if arquivo and arquivo != "Nenhum áudio gerado ainda":
|
299 |
return f"output/srt_output/{arquivo}"
|
300 |
return None
|
@@ -440,7 +424,6 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="green", secondary_hue="blue"
|
|
440 |
srt_button = gr.Button(value="Gerar Áudio")
|
441 |
clear_button_srt = gr.ClearButton(srt_input, value='Limpar')
|
442 |
|
443 |
-
# Função para gerar áudio e atualizar a lista
|
444 |
def generate_and_update_list(srt_file, voice_model_input, pitch_input, volume_input):
|
445 |
audio_file = controlador_process_srt_file(srt_file, voice_model_input, pitch_input, volume_input)
|
446 |
updated_list = listar_audios()
|
@@ -449,8 +432,8 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="green", secondary_hue="blue"
|
|
449 |
srt_button.click(
|
450 |
fn=generate_and_update_list,
|
451 |
inputs=[srt_input, voice_model_input_srt, pitch_input_srt, volume_input_srt],
|
452 |
-
outputs=[audio_output_srt, gr.Dropdown(visible=False)],
|
453 |
-
queue=True
|
454 |
)
|
455 |
|
456 |
gr.Markdown("Agradecimentos a rany2 pelo Edge-TTS")
|
|
|
13 |
from tqdm import tqdm
|
14 |
import shutil
|
15 |
|
16 |
+
srt_temp_deleta = True
|
|
|
17 |
|
|
|
18 |
def load_voices():
|
19 |
with open('voices.json', 'r', encoding='utf-8') as f:
|
20 |
return json.load(f)
|
21 |
|
|
|
22 |
def get_voice_options(language, voices_data):
|
23 |
if language in voices_data:
|
24 |
return [f"{voice['name']} | {voice['gender']}" for voice in voices_data[language]]
|
25 |
return []
|
26 |
|
|
|
27 |
def extract_voice_name(formatted_voice):
|
28 |
return formatted_voice.split(" | ")[0]
|
29 |
|
|
|
54 |
|
55 |
def controlador_generate_audio(audio_input, voice_model_input, speed_input, pitch_input, volume_input, checkbox_cortar_silencio):
|
56 |
audio_file = generate_audio(audio_input, voice_model_input, speed_input, pitch_input, volume_input)
|
57 |
+
if audio_file:
|
|
|
58 |
if checkbox_cortar_silencio:
|
|
|
59 |
remove_silence(audio_file, audio_file)
|
60 |
+
|
61 |
else:
|
62 |
print("Erro ao gerar áudio.")
|
63 |
return audio_file
|
|
|
140 |
|
141 |
return audio_file
|
142 |
|
|
|
143 |
def timetoms(time_obj):
|
144 |
return time_obj.hours * 3600000 + time_obj.minutes * 60000 + time_obj.seconds * 1000 + time_obj.milliseconds
|
145 |
|
|
|
160 |
|
161 |
if audio_file.exists():
|
162 |
audio = AudioSegment.from_mp3(str(audio_file))
|
163 |
+
audio_segment = audio
|
164 |
else:
|
165 |
print(f"\nArquivo de áudio não encontrado: {audio_file}")
|
166 |
audio_segment = AudioSegment.silent(duration=end_time - start_time)
|
|
|
196 |
print(f"Erro: Áudio em {input_file} tem duração zero.")
|
197 |
return audio
|
198 |
|
199 |
+
speed_factor = original_duration_ms / target_duration_ms
|
|
|
|
|
|
|
200 |
adjusted_audio = audio.speedup(playback_speed=speed_factor) if speed_factor > 1 else audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate * speed_factor)})
|
201 |
|
|
|
202 |
if len(adjusted_audio) > target_duration_ms:
|
203 |
adjusted_audio = adjusted_audio[:target_duration_ms]
|
204 |
elif len(adjusted_audio) < target_duration_ms:
|
|
|
229 |
target_duration_ms = timetoms(sub.end) - timetoms(sub.start)
|
230 |
|
231 |
if not output_file.exists() or output_file.stat().st_size == 0:
|
|
|
232 |
tts = EdgeTTS(text=sub.text, voice=voice, pitch=pitch_str, volume=volume_str)
|
233 |
tasks.append(tts.save(str(temp_file)))
|
234 |
|
235 |
if tasks:
|
236 |
await asyncio.gather(*tasks)
|
237 |
|
|
|
238 |
for i in batch:
|
239 |
sub = subs[i]
|
240 |
temp_file = output_dir / f"{sub.index:02d}_temp.mp3"
|
|
|
243 |
|
244 |
if temp_file.exists():
|
245 |
await adjust_audio_speed(temp_file, output_file, target_duration_ms)
|
246 |
+
os.remove(temp_file)
|
247 |
pbar.update(1)
|
248 |
|
249 |
final_audio = await merge_audio_files(output_dir, srt_file)
|
250 |
|
|
|
251 |
if srt_temp_deleta:
|
252 |
shutil.rmtree(output_dir, ignore_errors=True)
|
253 |
print(f"Pasta temporária {output_dir} apagada.")
|
|
|
266 |
audio_file = asyncio.run(process_srt_file(srt_file, actual_voice, output_dir, pitch_input, volume_input))
|
267 |
return audio_file
|
268 |
|
|
|
269 |
def listar_audios():
|
270 |
try:
|
271 |
srt_output_dir = "output/srt_output"
|
|
|
279 |
return ["Erro ao listar arquivos"]
|
280 |
|
281 |
def tocar_audio(arquivo):
|
|
|
282 |
if arquivo and arquivo != "Nenhum áudio gerado ainda":
|
283 |
return f"output/srt_output/{arquivo}"
|
284 |
return None
|
|
|
424 |
srt_button = gr.Button(value="Gerar Áudio")
|
425 |
clear_button_srt = gr.ClearButton(srt_input, value='Limpar')
|
426 |
|
|
|
427 |
def generate_and_update_list(srt_file, voice_model_input, pitch_input, volume_input):
|
428 |
audio_file = controlador_process_srt_file(srt_file, voice_model_input, pitch_input, volume_input)
|
429 |
updated_list = listar_audios()
|
|
|
432 |
srt_button.click(
|
433 |
fn=generate_and_update_list,
|
434 |
inputs=[srt_input, voice_model_input_srt, pitch_input_srt, volume_input_srt],
|
435 |
+
outputs=[audio_output_srt, gr.Dropdown(visible=False)],
|
436 |
+
queue=True
|
437 |
)
|
438 |
|
439 |
gr.Markdown("Agradecimentos a rany2 pelo Edge-TTS")
|