M4xjunior commited on
Commit
d4320d1
·
verified ·
1 Parent(s): 035bd50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -2
app.py CHANGED
@@ -57,14 +57,14 @@ def load_f5tts():
57
  ckpt_path = hf_hub_download(repo_id=repo_id, filename=filename, use_auth_token=token)
58
 
59
  F5TTS_model_cfg = dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)
60
- return load_model(DiT, F5TTS_model_cfg, ckpt_path)
61
 
62
  # Carregar modelo F5TTS
63
  F5TTS_ema_model = load_f5tts()
64
 
65
  @gpu_decorator
66
  def infer(
67
- ref_audio_orig, ref_text, gen_text, remove_silence, cross_fade_duration=0.15, speed=1, show_info=gr.Info
68
  ):
69
  ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, ref_text, show_info=show_info)
70
  ema_model = F5TTS_ema_model
@@ -75,6 +75,7 @@ def infer(
75
  ema_model,
76
  vocoder,
77
  cross_fade_duration=cross_fade_duration,
 
78
  speed=speed,
79
  show_info=show_info,
80
  progress=gr.Progress(),
@@ -140,6 +141,14 @@ with gr.Blocks(css=custom_css) as app:
140
  step=0.1,
141
  info="Ajuste a velocidade do áudio.",
142
  )
 
 
 
 
 
 
 
 
143
  cross_fade_duration_slider = gr.Slider(
144
  label="Duração do Cross-fade (s)",
145
  minimum=0.0,
@@ -170,6 +179,7 @@ with gr.Blocks(css=custom_css) as app:
170
  remove_silence,
171
  cross_fade_duration_slider,
172
  speed_slider,
 
173
  chunk_size_slider,
174
  ):
175
  # Dividir o texto em sentenças
@@ -191,6 +201,7 @@ with gr.Blocks(css=custom_css) as app:
191
  remove_silence,
192
  cross_fade_duration_slider,
193
  speed_slider,
 
194
  )
195
  sr, audio_data = audio_out
196
  audio_segments.append(audio_data)
 
57
  ckpt_path = hf_hub_download(repo_id=repo_id, filename=filename, use_auth_token=token)
58
 
59
  F5TTS_model_cfg = dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)
60
+ return load_model(DiT, F5TTS_model_cfg, ckpt_path, use_ema=True)
61
 
62
  # Carregar modelo F5TTS
63
  F5TTS_ema_model = load_f5tts()
64
 
65
  @gpu_decorator
66
  def infer(
67
+ ref_audio_orig, ref_text, gen_text, remove_silence, cross_fade_duration=0.15, speed=1, nfe=32, show_info=gr.Info
68
  ):
69
  ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, ref_text, show_info=show_info)
70
  ema_model = F5TTS_ema_model
 
75
  ema_model,
76
  vocoder,
77
  cross_fade_duration=cross_fade_duration,
78
+ nfe_step=nfe,
79
  speed=speed,
80
  show_info=show_info,
81
  progress=gr.Progress(),
 
141
  step=0.1,
142
  info="Ajuste a velocidade do áudio.",
143
  )
144
+ nfe_slider = gr.Slider(
145
+ label="NFE Step",
146
+ minimum=16,
147
+ maximum=64,
148
+ value=32,
149
+ step=1,
150
+ info="Ajuste a velocidade do áudio.",
151
+ )
152
  cross_fade_duration_slider = gr.Slider(
153
  label="Duração do Cross-fade (s)",
154
  minimum=0.0,
 
179
  remove_silence,
180
  cross_fade_duration_slider,
181
  speed_slider,
182
+ nfe_slider,
183
  chunk_size_slider,
184
  ):
185
  # Dividir o texto em sentenças
 
201
  remove_silence,
202
  cross_fade_duration_slider,
203
  speed_slider,
204
+ nfe_slider,
205
  )
206
  sr, audio_data = audio_out
207
  audio_segments.append(audio_data)