Spaces:

LAP-DEV
/

Demo

Running

App Files Files Community

LAP-DEV commited on Dec 18, 2024

Commit

2f73f46

verified ·

1 Parent(s): d8e191b

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -25

app.py CHANGED Viewed

@@ -114,13 +114,25 @@ class App:
             cb_diarize = gr.Checkbox(value=diarization_params["is_diarize"],label="Use diarization",interactive=True)
             tb_hf_token = gr.Text(label="Token", value=diarization_params["hf_token"],info="An access token is required to use diarization & can be created [here](https://hf.co/settings/tokens). If not done yet for your account, you need to accept the terms & conditions of [diarization](https://huggingface.co/pyannote/speaker-diarization-3.1) & [segmentation](https://huggingface.co/pyannote/segmentation-3.0)")
-        with gr.Accordion("Advanced audio options", open=False, visible=True):
-            cb_bgm_separation = gr.Checkbox(label="Enable Background Music Remover Filter", value=uvr_params["is_separate_bgm"],
-                                            interactive=True,
-                                            info="Enable to remove background music before transcribing")
             cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=vad_params["vad_filter"],
-                            interactive=True,
-                            info="Enable to transcribe only detected voice parts")
         with gr.Accordion("Advanced options", open=False, visible=False):
             with gr.Accordion("Advanced diarization options", open=False, visible=True):
@@ -200,9 +212,9 @@ class App:
                     nb_batch_size = gr.Number(label="Batch Size", value=whisper_params["batch_size"], precision=0)
             with gr.Accordion("Background Music Remover Filter", open=False):
-#                cb_bgm_separation = gr.Checkbox(label="Enable Background Music Remover Filter", value=uvr_params["is_separate_bgm"],
-#                                                interactive=True,
-#                                                info="Enabling this will remove background music by submodel before transcribing.")
                 dd_uvr_device = gr.Dropdown(label="Device", value=self.whisper_inf.music_separator.device,
                                             choices=self.whisper_inf.music_separator.available_devices)
                 dd_uvr_model_size = gr.Dropdown(label="Model", value=uvr_params["model_size"],
@@ -212,25 +224,25 @@ class App:
                 cb_uvr_enable_offload = gr.Checkbox(label="Offload sub model after removing background music",
                                                     value=uvr_params["enable_offload"])
-            with gr.Accordion("Voice Detection Filter", open=False):
 #                cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=vad_params["vad_filter"],
 #                                            interactive=True,
 #                                            info="Enable this to transcribe only detected voice parts by submodel.")
-                sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
-                                         value=vad_params["threshold"],
-                                         info="Lower it to be more sensitive to small sounds.")
-                nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0,
-                                                      value=vad_params["min_speech_duration_ms"],
-                                                      info="Final speech chunks shorter than this time are thrown out")
-                nb_max_speech_duration_s = gr.Number(label="Maximum Speech Duration (s)",
-                                                     value=vad_params["max_speech_duration_s"],
-                                                     info="Maximum duration of speech chunks in \"seconds\".")
-                nb_min_silence_duration_ms = gr.Number(label="Minimum Silence Duration (ms)", precision=0,
-                                                       value=vad_params["min_silence_duration_ms"],
-                                                       info="In the end of each speech chunk wait for this time"
-                                                            " before separating it")
-                nb_speech_pad_ms = gr.Number(label="Speech Padding (ms)", precision=0, value=vad_params["speech_pad_ms"],
-                                             info="Final speech chunks are padded by this time each side")
         #dd_model.change(fn=self.on_change_models, inputs=[dd_model], outputs=[cb_translate])

             cb_diarize = gr.Checkbox(value=diarization_params["is_diarize"],label="Use diarization",interactive=True)
             tb_hf_token = gr.Text(label="Token", value=diarization_params["hf_token"],info="An access token is required to use diarization & can be created [here](https://hf.co/settings/tokens). If not done yet for your account, you need to accept the terms & conditions of [diarization](https://huggingface.co/pyannote/speaker-diarization-3.1) & [segmentation](https://huggingface.co/pyannote/segmentation-3.0)")
+        with gr.Accordion("Voice Detection Filter", open=False, visible=True):
             cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=vad_params["vad_filter"],
+                                        interactive=True,
+                                        info="Enable to transcribe only detected voice parts")
+            sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
+                                     value=vad_params["threshold"],
+                                     info="Lower it to be more sensitive to small sounds")
+            nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0,
+                                                  value=vad_params["min_speech_duration_ms"],
+                                                  info="Final speech chunks shorter than this time are thrown out")
+            nb_max_speech_duration_s = gr.Number(label="Maximum Speech Duration (s)",
+                                                 value=vad_params["max_speech_duration_s"],
+                                                 info="Maximum duration of speech chunks in \"seconds\"")
+            nb_min_silence_duration_ms = gr.Number(label="Minimum Silence Duration (ms)", precision=0,
+                                                   value=vad_params["min_silence_duration_ms"],
+                                                   info="In the end of each speech chunk wait for this time"
+                                                        " before separating it")
+            nb_speech_pad_ms = gr.Number(label="Speech Padding (ms)", precision=0, value=vad_params["speech_pad_ms"],
+                                         info="Final speech chunks are padded by this time each side")
         with gr.Accordion("Advanced options", open=False, visible=False):
             with gr.Accordion("Advanced diarization options", open=False, visible=True):
                     nb_batch_size = gr.Number(label="Batch Size", value=whisper_params["batch_size"], precision=0)
             with gr.Accordion("Background Music Remover Filter", open=False):
+                cb_bgm_separation = gr.Checkbox(label="Enable Background Music Remover Filter", value=uvr_params["is_separate_bgm"],
+                                                interactive=True,
+                                                info="Enabling this will remove background music by submodel before transcribing.")
                 dd_uvr_device = gr.Dropdown(label="Device", value=self.whisper_inf.music_separator.device,
                                             choices=self.whisper_inf.music_separator.available_devices)
                 dd_uvr_model_size = gr.Dropdown(label="Model", value=uvr_params["model_size"],
                 cb_uvr_enable_offload = gr.Checkbox(label="Offload sub model after removing background music",
                                                     value=uvr_params["enable_offload"])
+#            with gr.Accordion("Voice Detection Filter", open=False):
 #                cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=vad_params["vad_filter"],
 #                                            interactive=True,
 #                                            info="Enable this to transcribe only detected voice parts by submodel.")
+#                sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
+#                                         value=vad_params["threshold"],
+#                                         info="Lower it to be more sensitive to small sounds.")
+#                nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0,
+#                                                      value=vad_params["min_speech_duration_ms"],
+#                                                      info="Final speech chunks shorter than this time are thrown out")
+#                nb_max_speech_duration_s = gr.Number(label="Maximum Speech Duration (s)",
+#                                                     value=vad_params["max_speech_duration_s"],
+#                                                     info="Maximum duration of speech chunks in \"seconds\".")
+#                nb_min_silence_duration_ms = gr.Number(label="Minimum Silence Duration (ms)", precision=0,
+#                                                       value=vad_params["min_silence_duration_ms"],
+#                                                       info="In the end of each speech chunk wait for this time"
+#                                                            " before separating it")
+#                nb_speech_pad_ms = gr.Number(label="Speech Padding (ms)", precision=0, value=vad_params["speech_pad_ms"],
+#                                             info="Final speech chunks are padded by this time each side")
         #dd_model.change(fn=self.on_change_models, inputs=[dd_model], outputs=[cb_translate])