Update app.py
Browse files
app.py
CHANGED
@@ -114,13 +114,25 @@ class App:
|
|
114 |
cb_diarize = gr.Checkbox(value=diarization_params["is_diarize"],label="Use diarization",interactive=True)
|
115 |
tb_hf_token = gr.Text(label="Token", value=diarization_params["hf_token"],info="An access token is required to use diarization & can be created [here](https://hf.co/settings/tokens). If not done yet for your account, you need to accept the terms & conditions of [diarization](https://huggingface.co/pyannote/speaker-diarization-3.1) & [segmentation](https://huggingface.co/pyannote/segmentation-3.0)")
|
116 |
|
117 |
-
with gr.Accordion("
|
118 |
-
cb_bgm_separation = gr.Checkbox(label="Enable Background Music Remover Filter", value=uvr_params["is_separate_bgm"],
|
119 |
-
interactive=True,
|
120 |
-
info="Enable to remove background music before transcribing")
|
121 |
cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=vad_params["vad_filter"],
|
122 |
-
|
123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
|
125 |
with gr.Accordion("Advanced options", open=False, visible=False):
|
126 |
with gr.Accordion("Advanced diarization options", open=False, visible=True):
|
@@ -200,9 +212,9 @@ class App:
|
|
200 |
nb_batch_size = gr.Number(label="Batch Size", value=whisper_params["batch_size"], precision=0)
|
201 |
|
202 |
with gr.Accordion("Background Music Remover Filter", open=False):
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
dd_uvr_device = gr.Dropdown(label="Device", value=self.whisper_inf.music_separator.device,
|
207 |
choices=self.whisper_inf.music_separator.available_devices)
|
208 |
dd_uvr_model_size = gr.Dropdown(label="Model", value=uvr_params["model_size"],
|
@@ -212,25 +224,25 @@ class App:
|
|
212 |
cb_uvr_enable_offload = gr.Checkbox(label="Offload sub model after removing background music",
|
213 |
value=uvr_params["enable_offload"])
|
214 |
|
215 |
-
with gr.Accordion("Voice Detection Filter", open=False):
|
216 |
# cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=vad_params["vad_filter"],
|
217 |
# interactive=True,
|
218 |
# info="Enable this to transcribe only detected voice parts by submodel.")
|
219 |
-
sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
|
220 |
-
value=vad_params["threshold"],
|
221 |
-
info="Lower it to be more sensitive to small sounds.")
|
222 |
-
nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0,
|
223 |
-
value=vad_params["min_speech_duration_ms"],
|
224 |
-
info="Final speech chunks shorter than this time are thrown out")
|
225 |
-
nb_max_speech_duration_s = gr.Number(label="Maximum Speech Duration (s)",
|
226 |
-
value=vad_params["max_speech_duration_s"],
|
227 |
-
info="Maximum duration of speech chunks in \"seconds\".")
|
228 |
-
nb_min_silence_duration_ms = gr.Number(label="Minimum Silence Duration (ms)", precision=0,
|
229 |
-
value=vad_params["min_silence_duration_ms"],
|
230 |
-
info="In the end of each speech chunk wait for this time"
|
231 |
-
" before separating it")
|
232 |
-
nb_speech_pad_ms = gr.Number(label="Speech Padding (ms)", precision=0, value=vad_params["speech_pad_ms"],
|
233 |
-
info="Final speech chunks are padded by this time each side")
|
234 |
|
235 |
#dd_model.change(fn=self.on_change_models, inputs=[dd_model], outputs=[cb_translate])
|
236 |
|
|
|
114 |
cb_diarize = gr.Checkbox(value=diarization_params["is_diarize"],label="Use diarization",interactive=True)
|
115 |
tb_hf_token = gr.Text(label="Token", value=diarization_params["hf_token"],info="An access token is required to use diarization & can be created [here](https://hf.co/settings/tokens). If not done yet for your account, you need to accept the terms & conditions of [diarization](https://huggingface.co/pyannote/speaker-diarization-3.1) & [segmentation](https://huggingface.co/pyannote/segmentation-3.0)")
|
116 |
|
117 |
+
with gr.Accordion("Voice Detection Filter", open=False, visible=True):
|
|
|
|
|
|
|
118 |
cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=vad_params["vad_filter"],
|
119 |
+
interactive=True,
|
120 |
+
info="Enable to transcribe only detected voice parts")
|
121 |
+
sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
|
122 |
+
value=vad_params["threshold"],
|
123 |
+
info="Lower it to be more sensitive to small sounds")
|
124 |
+
nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0,
|
125 |
+
value=vad_params["min_speech_duration_ms"],
|
126 |
+
info="Final speech chunks shorter than this time are thrown out")
|
127 |
+
nb_max_speech_duration_s = gr.Number(label="Maximum Speech Duration (s)",
|
128 |
+
value=vad_params["max_speech_duration_s"],
|
129 |
+
info="Maximum duration of speech chunks in \"seconds\"")
|
130 |
+
nb_min_silence_duration_ms = gr.Number(label="Minimum Silence Duration (ms)", precision=0,
|
131 |
+
value=vad_params["min_silence_duration_ms"],
|
132 |
+
info="In the end of each speech chunk wait for this time"
|
133 |
+
" before separating it")
|
134 |
+
nb_speech_pad_ms = gr.Number(label="Speech Padding (ms)", precision=0, value=vad_params["speech_pad_ms"],
|
135 |
+
info="Final speech chunks are padded by this time each side")
|
136 |
|
137 |
with gr.Accordion("Advanced options", open=False, visible=False):
|
138 |
with gr.Accordion("Advanced diarization options", open=False, visible=True):
|
|
|
212 |
nb_batch_size = gr.Number(label="Batch Size", value=whisper_params["batch_size"], precision=0)
|
213 |
|
214 |
with gr.Accordion("Background Music Remover Filter", open=False):
|
215 |
+
cb_bgm_separation = gr.Checkbox(label="Enable Background Music Remover Filter", value=uvr_params["is_separate_bgm"],
|
216 |
+
interactive=True,
|
217 |
+
info="Enabling this will remove background music by submodel before transcribing.")
|
218 |
dd_uvr_device = gr.Dropdown(label="Device", value=self.whisper_inf.music_separator.device,
|
219 |
choices=self.whisper_inf.music_separator.available_devices)
|
220 |
dd_uvr_model_size = gr.Dropdown(label="Model", value=uvr_params["model_size"],
|
|
|
224 |
cb_uvr_enable_offload = gr.Checkbox(label="Offload sub model after removing background music",
|
225 |
value=uvr_params["enable_offload"])
|
226 |
|
227 |
+
# with gr.Accordion("Voice Detection Filter", open=False):
|
228 |
# cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=vad_params["vad_filter"],
|
229 |
# interactive=True,
|
230 |
# info="Enable this to transcribe only detected voice parts by submodel.")
|
231 |
+
# sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
|
232 |
+
# value=vad_params["threshold"],
|
233 |
+
# info="Lower it to be more sensitive to small sounds.")
|
234 |
+
# nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0,
|
235 |
+
# value=vad_params["min_speech_duration_ms"],
|
236 |
+
# info="Final speech chunks shorter than this time are thrown out")
|
237 |
+
# nb_max_speech_duration_s = gr.Number(label="Maximum Speech Duration (s)",
|
238 |
+
# value=vad_params["max_speech_duration_s"],
|
239 |
+
# info="Maximum duration of speech chunks in \"seconds\".")
|
240 |
+
# nb_min_silence_duration_ms = gr.Number(label="Minimum Silence Duration (ms)", precision=0,
|
241 |
+
# value=vad_params["min_silence_duration_ms"],
|
242 |
+
# info="In the end of each speech chunk wait for this time"
|
243 |
+
# " before separating it")
|
244 |
+
# nb_speech_pad_ms = gr.Number(label="Speech Padding (ms)", precision=0, value=vad_params["speech_pad_ms"],
|
245 |
+
# info="Final speech chunks are padded by this time each side")
|
246 |
|
247 |
#dd_model.change(fn=self.on_change_models, inputs=[dd_model], outputs=[cb_translate])
|
248 |
|