LAP-DEV commited on
Commit
2f73f46
·
verified ·
1 Parent(s): d8e191b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -25
app.py CHANGED
@@ -114,13 +114,25 @@ class App:
114
  cb_diarize = gr.Checkbox(value=diarization_params["is_diarize"],label="Use diarization",interactive=True)
115
  tb_hf_token = gr.Text(label="Token", value=diarization_params["hf_token"],info="An access token is required to use diarization & can be created [here](https://hf.co/settings/tokens). If not done yet for your account, you need to accept the terms & conditions of [diarization](https://huggingface.co/pyannote/speaker-diarization-3.1) & [segmentation](https://huggingface.co/pyannote/segmentation-3.0)")
116
 
117
- with gr.Accordion("Advanced audio options", open=False, visible=True):
118
- cb_bgm_separation = gr.Checkbox(label="Enable Background Music Remover Filter", value=uvr_params["is_separate_bgm"],
119
- interactive=True,
120
- info="Enable to remove background music before transcribing")
121
  cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=vad_params["vad_filter"],
122
- interactive=True,
123
- info="Enable to transcribe only detected voice parts")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
  with gr.Accordion("Advanced options", open=False, visible=False):
126
  with gr.Accordion("Advanced diarization options", open=False, visible=True):
@@ -200,9 +212,9 @@ class App:
200
  nb_batch_size = gr.Number(label="Batch Size", value=whisper_params["batch_size"], precision=0)
201
 
202
  with gr.Accordion("Background Music Remover Filter", open=False):
203
- # cb_bgm_separation = gr.Checkbox(label="Enable Background Music Remover Filter", value=uvr_params["is_separate_bgm"],
204
- # interactive=True,
205
- # info="Enabling this will remove background music by submodel before transcribing.")
206
  dd_uvr_device = gr.Dropdown(label="Device", value=self.whisper_inf.music_separator.device,
207
  choices=self.whisper_inf.music_separator.available_devices)
208
  dd_uvr_model_size = gr.Dropdown(label="Model", value=uvr_params["model_size"],
@@ -212,25 +224,25 @@ class App:
212
  cb_uvr_enable_offload = gr.Checkbox(label="Offload sub model after removing background music",
213
  value=uvr_params["enable_offload"])
214
 
215
- with gr.Accordion("Voice Detection Filter", open=False):
216
  # cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=vad_params["vad_filter"],
217
  # interactive=True,
218
  # info="Enable this to transcribe only detected voice parts by submodel.")
219
- sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
220
- value=vad_params["threshold"],
221
- info="Lower it to be more sensitive to small sounds.")
222
- nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0,
223
- value=vad_params["min_speech_duration_ms"],
224
- info="Final speech chunks shorter than this time are thrown out")
225
- nb_max_speech_duration_s = gr.Number(label="Maximum Speech Duration (s)",
226
- value=vad_params["max_speech_duration_s"],
227
- info="Maximum duration of speech chunks in \"seconds\".")
228
- nb_min_silence_duration_ms = gr.Number(label="Minimum Silence Duration (ms)", precision=0,
229
- value=vad_params["min_silence_duration_ms"],
230
- info="In the end of each speech chunk wait for this time"
231
- " before separating it")
232
- nb_speech_pad_ms = gr.Number(label="Speech Padding (ms)", precision=0, value=vad_params["speech_pad_ms"],
233
- info="Final speech chunks are padded by this time each side")
234
 
235
  #dd_model.change(fn=self.on_change_models, inputs=[dd_model], outputs=[cb_translate])
236
 
 
114
  cb_diarize = gr.Checkbox(value=diarization_params["is_diarize"],label="Use diarization",interactive=True)
115
  tb_hf_token = gr.Text(label="Token", value=diarization_params["hf_token"],info="An access token is required to use diarization & can be created [here](https://hf.co/settings/tokens). If not done yet for your account, you need to accept the terms & conditions of [diarization](https://huggingface.co/pyannote/speaker-diarization-3.1) & [segmentation](https://huggingface.co/pyannote/segmentation-3.0)")
116
 
117
+ with gr.Accordion("Voice Detection Filter", open=False, visible=True):
 
 
 
118
  cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=vad_params["vad_filter"],
119
+ interactive=True,
120
+ info="Enable to transcribe only detected voice parts")
121
+ sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
122
+ value=vad_params["threshold"],
123
+ info="Lower it to be more sensitive to small sounds")
124
+ nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0,
125
+ value=vad_params["min_speech_duration_ms"],
126
+ info="Final speech chunks shorter than this time are thrown out")
127
+ nb_max_speech_duration_s = gr.Number(label="Maximum Speech Duration (s)",
128
+ value=vad_params["max_speech_duration_s"],
129
+ info="Maximum duration of speech chunks in \"seconds\"")
130
+ nb_min_silence_duration_ms = gr.Number(label="Minimum Silence Duration (ms)", precision=0,
131
+ value=vad_params["min_silence_duration_ms"],
132
+ info="In the end of each speech chunk wait for this time"
133
+ " before separating it")
134
+ nb_speech_pad_ms = gr.Number(label="Speech Padding (ms)", precision=0, value=vad_params["speech_pad_ms"],
135
+ info="Final speech chunks are padded by this time each side")
136
 
137
  with gr.Accordion("Advanced options", open=False, visible=False):
138
  with gr.Accordion("Advanced diarization options", open=False, visible=True):
 
212
  nb_batch_size = gr.Number(label="Batch Size", value=whisper_params["batch_size"], precision=0)
213
 
214
  with gr.Accordion("Background Music Remover Filter", open=False):
215
+ cb_bgm_separation = gr.Checkbox(label="Enable Background Music Remover Filter", value=uvr_params["is_separate_bgm"],
216
+ interactive=True,
217
+ info="Enabling this will remove background music by submodel before transcribing.")
218
  dd_uvr_device = gr.Dropdown(label="Device", value=self.whisper_inf.music_separator.device,
219
  choices=self.whisper_inf.music_separator.available_devices)
220
  dd_uvr_model_size = gr.Dropdown(label="Model", value=uvr_params["model_size"],
 
224
  cb_uvr_enable_offload = gr.Checkbox(label="Offload sub model after removing background music",
225
  value=uvr_params["enable_offload"])
226
 
227
+ # with gr.Accordion("Voice Detection Filter", open=False):
228
  # cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=vad_params["vad_filter"],
229
  # interactive=True,
230
  # info="Enable this to transcribe only detected voice parts by submodel.")
231
+ # sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
232
+ # value=vad_params["threshold"],
233
+ # info="Lower it to be more sensitive to small sounds.")
234
+ # nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0,
235
+ # value=vad_params["min_speech_duration_ms"],
236
+ # info="Final speech chunks shorter than this time are thrown out")
237
+ # nb_max_speech_duration_s = gr.Number(label="Maximum Speech Duration (s)",
238
+ # value=vad_params["max_speech_duration_s"],
239
+ # info="Maximum duration of speech chunks in \"seconds\".")
240
+ # nb_min_silence_duration_ms = gr.Number(label="Minimum Silence Duration (ms)", precision=0,
241
+ # value=vad_params["min_silence_duration_ms"],
242
+ # info="In the end of each speech chunk wait for this time"
243
+ # " before separating it")
244
+ # nb_speech_pad_ms = gr.Number(label="Speech Padding (ms)", precision=0, value=vad_params["speech_pad_ms"],
245
+ # info="Final speech chunks are padded by this time each side")
246
 
247
  #dd_model.change(fn=self.on_change_models, inputs=[dd_model], outputs=[cb_translate])
248