Update app.py
Browse files
app.py
CHANGED
@@ -57,14 +57,14 @@ def load_f5tts():
|
|
57 |
ckpt_path = hf_hub_download(repo_id=repo_id, filename=filename, use_auth_token=token)
|
58 |
|
59 |
F5TTS_model_cfg = dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)
|
60 |
-
return load_model(DiT, F5TTS_model_cfg, ckpt_path)
|
61 |
|
62 |
# Carregar modelo F5TTS
|
63 |
F5TTS_ema_model = load_f5tts()
|
64 |
|
65 |
@gpu_decorator
|
66 |
def infer(
|
67 |
-
ref_audio_orig, ref_text, gen_text, remove_silence, cross_fade_duration=0.15, speed=1, show_info=gr.Info
|
68 |
):
|
69 |
ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, ref_text, show_info=show_info)
|
70 |
ema_model = F5TTS_ema_model
|
@@ -75,6 +75,7 @@ def infer(
|
|
75 |
ema_model,
|
76 |
vocoder,
|
77 |
cross_fade_duration=cross_fade_duration,
|
|
|
78 |
speed=speed,
|
79 |
show_info=show_info,
|
80 |
progress=gr.Progress(),
|
@@ -140,6 +141,14 @@ with gr.Blocks(css=custom_css) as app:
|
|
140 |
step=0.1,
|
141 |
info="Ajuste a velocidade do áudio.",
|
142 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
cross_fade_duration_slider = gr.Slider(
|
144 |
label="Duração do Cross-fade (s)",
|
145 |
minimum=0.0,
|
@@ -170,6 +179,7 @@ with gr.Blocks(css=custom_css) as app:
|
|
170 |
remove_silence,
|
171 |
cross_fade_duration_slider,
|
172 |
speed_slider,
|
|
|
173 |
chunk_size_slider,
|
174 |
):
|
175 |
# Dividir o texto em sentenças
|
@@ -191,6 +201,7 @@ with gr.Blocks(css=custom_css) as app:
|
|
191 |
remove_silence,
|
192 |
cross_fade_duration_slider,
|
193 |
speed_slider,
|
|
|
194 |
)
|
195 |
sr, audio_data = audio_out
|
196 |
audio_segments.append(audio_data)
|
|
|
57 |
ckpt_path = hf_hub_download(repo_id=repo_id, filename=filename, use_auth_token=token)
|
58 |
|
59 |
F5TTS_model_cfg = dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)
|
60 |
+
return load_model(DiT, F5TTS_model_cfg, ckpt_path, use_ema=True)
|
61 |
|
62 |
# Carregar modelo F5TTS
|
63 |
F5TTS_ema_model = load_f5tts()
|
64 |
|
65 |
@gpu_decorator
|
66 |
def infer(
|
67 |
+
ref_audio_orig, ref_text, gen_text, remove_silence, cross_fade_duration=0.15, speed=1, nfe=32, show_info=gr.Info
|
68 |
):
|
69 |
ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, ref_text, show_info=show_info)
|
70 |
ema_model = F5TTS_ema_model
|
|
|
75 |
ema_model,
|
76 |
vocoder,
|
77 |
cross_fade_duration=cross_fade_duration,
|
78 |
+
nfe_step=nfe,
|
79 |
speed=speed,
|
80 |
show_info=show_info,
|
81 |
progress=gr.Progress(),
|
|
|
141 |
step=0.1,
|
142 |
info="Ajuste a velocidade do áudio.",
|
143 |
)
|
144 |
+
nfe_slider = gr.Slider(
|
145 |
+
label="NFE Step",
|
146 |
+
minimum=16,
|
147 |
+
maximum=64,
|
148 |
+
value=32,
|
149 |
+
step=1,
|
150 |
+
info="Ajuste a velocidade do áudio.",
|
151 |
+
)
|
152 |
cross_fade_duration_slider = gr.Slider(
|
153 |
label="Duração do Cross-fade (s)",
|
154 |
minimum=0.0,
|
|
|
179 |
remove_silence,
|
180 |
cross_fade_duration_slider,
|
181 |
speed_slider,
|
182 |
+
nfe_slider,
|
183 |
chunk_size_slider,
|
184 |
):
|
185 |
# Dividir o texto em sentenças
|
|
|
201 |
remove_silence,
|
202 |
cross_fade_duration_slider,
|
203 |
speed_slider,
|
204 |
+
nfe_slider,
|
205 |
)
|
206 |
sr, audio_data = audio_out
|
207 |
audio_segments.append(audio_data)
|