Spaces:

Politrees
/

audio-separator_UVR

Running

App Files Files Community

Politrees commited on Jan 11

Commit

4f80d1a

verified ·

1 Parent(s): a3713cd

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -33

app.py CHANGED Viewed

@@ -240,8 +240,8 @@ def roformer_separator(audio, model_key, seg_size, override_seg_size, overlap, p
 def mdx23c_separator(audio, model_key, seg_size, override_seg_size, overlap, pitch_shift, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
     """Separate audio using MDX23C model."""
-    stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model)
-    print_message(audio, model)
     model = MDX23C_MODELS[model_key]
     try:
         out_dir = prepare_output_dir(audio, out_dir)
@@ -276,8 +276,8 @@ def mdx23c_separator(audio, model_key, seg_size, override_seg_size, overlap, pit
 def mdx_separator(audio, model_key, hop_length, seg_size, overlap, denoise, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
     """Separate audio using MDX-NET model."""
-    stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model)
-    print_message(audio, model)
     model = MDXNET_MODELS[model_key]
     try:
         out_dir = prepare_output_dir(audio, out_dir)
@@ -312,8 +312,8 @@ def mdx_separator(audio, model_key, hop_length, seg_size, overlap, denoise, mode
 def vr_separator(audio, model_key, window_size, aggression, tta, post_process, post_process_threshold, high_end_process, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
     """Separate audio using VR ARCH model."""
-    stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model)
-    print_message(audio, model)
     model = VR_ARCH_MODELS[model_key]
     try:
         out_dir = prepare_output_dir(audio, out_dir)
@@ -350,8 +350,8 @@ def vr_separator(audio, model_key, window_size, aggression, tta, post_process, p
 def demucs_separator(audio, model_key, seg_size, shifts, overlap, segments_enabled, model_dir, out_dir, out_format, norm_thresh, amp_thresh, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
     """Separate audio using Demucs model."""
-    stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model)
-    print_message(audio, model)
     model = DEMUCS_MODELS[model_key]
     try:
         out_dir = prepare_output_dir(audio, out_dir)
@@ -394,6 +394,10 @@ def update_stems(model):
     else:
         return gr.update(visible=False)
 with gr.Blocks(
     title="🎵 Audio-Separator by Politrees 🎵",
     css="footer{display:none !important}",
@@ -413,10 +417,8 @@ with gr.Blocks(
                 with gr.Column(variant='panel'):
                     with gr.Group():
                         roformer_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
-                        roformer_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
-                with gr.Column(variant='panel'):
-                    with gr.Group():
                         with gr.Row():
                             roformer_overlap = gr.Slider(minimum=2, maximum=10, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Lower is better but slower.")
                             roformer_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
                 with gr.Column(variant='panel'):
@@ -439,16 +441,14 @@ with gr.Blocks(
                 mdx23c_model = gr.Dropdown(value="MDX23C-InstVoc HQ", label="Select the Model", choices=list(MDX23C_MODELS.keys()), scale=3)
                 mdx23c_output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.", scale=1)
             with gr.Accordion("Advanced settings", open=False):
-                with gr.Column(variant='compact'):
                     with gr.Group():
                         mdx23c_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
-                        mdx23c_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
-                with gr.Column(variant='compact'):
-                    with gr.Group():
                         with gr.Row():
                             mdx23c_overlap = gr.Slider(minimum=2, maximum=50, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
                             mdx23c_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
-                with gr.Column(variant='compact'):
                     with gr.Group():
                         with gr.Row():
                             mdx23c_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
@@ -468,15 +468,14 @@ with gr.Blocks(
                 mdx_model = gr.Dropdown(value="UVR-MDX-NET Inst HQ 5", label="Select the Model", choices=list(MDXNET_MODELS.keys()), scale=3)
                 mdx_output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.", scale=1)
             with gr.Accordion("Advanced settings", open=False):
-                with gr.Column():
                     with gr.Group():
                         with gr.Row():
                             mdx_hop_length = gr.Slider(minimum=32, maximum=2048, step=32, value=1024, label="Hop Length", info="Usually called stride in neural networks; only change if you know what you're doing.")
                             mdx_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
-                    with gr.Group():
-                        with gr.Row():
                             mdx_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
-                            mdx_denoise = gr.Checkbox(value=False, label="Denoise", info="Enable denoising after separation.")
                     with gr.Group():
                         with gr.Row():
                             mdx_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
@@ -496,19 +495,17 @@ with gr.Blocks(
                 vr_model = gr.Dropdown(value="1_HP-UVR", label="Select the Model", choices=list(VR_ARCH_MODELS.keys()), scale=3)
                 vr_output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.", scale=1)
             with gr.Accordion("Advanced settings", open=False):
-                with gr.Column():
                     with gr.Group():
                         with gr.Row():
-                            vr_window_size = gr.Slider(minimum=320, maximum=1024, step=32, value=512, label="Window Size", info="Balance quality and speed. 1024 = fast but lower, 320 = slower but better quality.")
-                            vr_aggression = gr.Slider(minimum=1, maximum=100, step=1, value=5, label="Agression", info="Intensity of primary stem extraction.")
-                    with gr.Group():
-                        with gr.Column():
                             vr_post_process = gr.Checkbox(value=False, label="Post Process", info="Identify leftover artifacts within vocal output; may improve separation for some songs.")
-                            vr_post_process_threshold = gr.Slider(minimum=0.1, maximum=0.3, step=0.1, value=0.2, label="Post Process Threshold", info="Threshold for post-processing.")
-                    with gr.Group():
-                        with gr.Row():
                             vr_tta = gr.Checkbox(value=False, label="TTA", info="Enable Test-Time-Augmentation; slow but improves quality.")
                             vr_high_end_process = gr.Checkbox(value=False, label="High End Process", info="Mirror the missing frequency range of the output.")
                     with gr.Group():
                         with gr.Row():
                             vr_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
@@ -528,15 +525,14 @@ with gr.Blocks(
                 demucs_model = gr.Dropdown(value="htdemucs_6s", label="Select the Model", choices=list(DEMUCS_MODELS.keys()), scale=3)
                 demucs_output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.", scale=1)
             with gr.Accordion("Advanced settings", open=False):
-                with gr.Column():
                     with gr.Group():
                         with gr.Row():
                             demucs_seg_size = gr.Slider(minimum=1, maximum=100, step=1, value=40, label="Segment Size", info="Size of segments into which the audio is split. Higher = slower but better quality.")
-                            demucs_shifts = gr.Slider(minimum=0, maximum=20, step=1, value=2, label="Shifts", info="Number of predictions with random shifts, higher = slower but better quality.")
-                    with gr.Group():
-                        with gr.Row():
                             demucs_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Overlap between prediction windows. Higher = slower but better quality.")
-                            demucs_segments_enabled = gr.Checkbox(value=True, label="Segment-wise processing", info="Enable segment-wise processing.")
                     with gr.Group():
                         with gr.Row():
                             demucs_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
@@ -605,6 +601,10 @@ with gr.Blocks(
             """
         )
     demucs_model.change(update_stems, inputs=[demucs_model], outputs=stem6)
     list_button.click(leaderboard, inputs=[list_filter, list_limit], outputs=output_list)

 def mdx23c_separator(audio, model_key, seg_size, override_seg_size, overlap, pitch_shift, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
     """Separate audio using MDX23C model."""
+    stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model_key)
+    print_message(audio, model_key)
     model = MDX23C_MODELS[model_key]
     try:
         out_dir = prepare_output_dir(audio, out_dir)
 def mdx_separator(audio, model_key, hop_length, seg_size, overlap, denoise, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
     """Separate audio using MDX-NET model."""
+    stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model_key)
+    print_message(audio, model_key)
     model = MDXNET_MODELS[model_key]
     try:
         out_dir = prepare_output_dir(audio, out_dir)
 def vr_separator(audio, model_key, window_size, aggression, tta, post_process, post_process_threshold, high_end_process, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
     """Separate audio using VR ARCH model."""
+    stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model_key)
+    print_message(audio, model_key)
     model = VR_ARCH_MODELS[model_key]
     try:
         out_dir = prepare_output_dir(audio, out_dir)
 def demucs_separator(audio, model_key, seg_size, shifts, overlap, segments_enabled, model_dir, out_dir, out_format, norm_thresh, amp_thresh, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
     """Separate audio using Demucs model."""
+    stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model_key)
+    print_message(audio, model_key)
     model = DEMUCS_MODELS[model_key]
     try:
         out_dir = prepare_output_dir(audio, out_dir)
     else:
         return gr.update(visible=False)
+def show_hide_params(param):
+    """Update the visibility of a parameter based on the checkbox state."""
+    return gr.update(visible=param)
 with gr.Blocks(
     title="🎵 Audio-Separator by Politrees 🎵",
     css="footer{display:none !important}",
                 with gr.Column(variant='panel'):
                     with gr.Group():
                         roformer_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
                         with gr.Row():
+                            roformer_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", visible=False)
                             roformer_overlap = gr.Slider(minimum=2, maximum=10, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Lower is better but slower.")
                             roformer_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
                 with gr.Column(variant='panel'):
                 mdx23c_model = gr.Dropdown(value="MDX23C-InstVoc HQ", label="Select the Model", choices=list(MDX23C_MODELS.keys()), scale=3)
                 mdx23c_output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.", scale=1)
             with gr.Accordion("Advanced settings", open=False):
+                with gr.Column(variant='panel'):
                     with gr.Group():
                         mdx23c_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
                         with gr.Row():
+                            mdx23c_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", visible=False)
                             mdx23c_overlap = gr.Slider(minimum=2, maximum=50, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
                             mdx23c_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
+                with gr.Column(variant='panel'):
                     with gr.Group():
                         with gr.Row():
                             mdx23c_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
                 mdx_model = gr.Dropdown(value="UVR-MDX-NET Inst HQ 5", label="Select the Model", choices=list(MDXNET_MODELS.keys()), scale=3)
                 mdx_output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.", scale=1)
             with gr.Accordion("Advanced settings", open=False):
+                with gr.Column(variant='panel'):
                     with gr.Group():
+                        mdx_denoise = gr.Checkbox(value=False, label="Denoise", info="Enable denoising after separation.")
                         with gr.Row():
                             mdx_hop_length = gr.Slider(minimum=32, maximum=2048, step=32, value=1024, label="Hop Length", info="Usually called stride in neural networks; only change if you know what you're doing.")
                             mdx_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
                             mdx_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
+                with gr.Column(variant='panel'):
                     with gr.Group():
                         with gr.Row():
                             mdx_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
                 vr_model = gr.Dropdown(value="1_HP-UVR", label="Select the Model", choices=list(VR_ARCH_MODELS.keys()), scale=3)
                 vr_output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.", scale=1)
             with gr.Accordion("Advanced settings", open=False):
+                with gr.Column(variant='panel'):
                     with gr.Group():
                         with gr.Row():
                             vr_post_process = gr.Checkbox(value=False, label="Post Process", info="Identify leftover artifacts within vocal output; may improve separation for some songs.")
                             vr_tta = gr.Checkbox(value=False, label="TTA", info="Enable Test-Time-Augmentation; slow but improves quality.")
                             vr_high_end_process = gr.Checkbox(value=False, label="High End Process", info="Mirror the missing frequency range of the output.")
+                        with gr.Row():
+                            vr_post_process_threshold = gr.Slider(minimum=0.1, maximum=0.3, step=0.1, value=0.2, label="Post Process Threshold", info="Threshold for post-processing.", visible=False)
+                            vr_window_size = gr.Slider(minimum=320, maximum=1024, step=32, value=512, label="Window Size", info="Balance quality and speed. 1024 = fast but lower, 320 = slower but better quality.")
+                            vr_aggression = gr.Slider(minimum=1, maximum=100, step=1, value=5, label="Agression", info="Intensity of primary stem extraction.")
+                with gr.Column(variant='panel'):
                     with gr.Group():
                         with gr.Row():
                             vr_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
                 demucs_model = gr.Dropdown(value="htdemucs_6s", label="Select the Model", choices=list(DEMUCS_MODELS.keys()), scale=3)
                 demucs_output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.", scale=1)
             with gr.Accordion("Advanced settings", open=False):
+                with gr.Column(variant='panel'):
                     with gr.Group():
+                        demucs_segments_enabled = gr.Checkbox(value=True, label="Segment-wise processing", info="Enable segment-wise processing.")
                         with gr.Row():
                             demucs_seg_size = gr.Slider(minimum=1, maximum=100, step=1, value=40, label="Segment Size", info="Size of segments into which the audio is split. Higher = slower but better quality.")
                             demucs_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Overlap between prediction windows. Higher = slower but better quality.")
+                            demucs_shifts = gr.Slider(minimum=0, maximum=20, step=1, value=2, label="Shifts", info="Number of predictions with random shifts, higher = slower but better quality.")
+                with gr.Column(variant='panel'):
                     with gr.Group():
                         with gr.Row():
                             demucs_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
             """
         )
+    roformer_override_seg_size.change(show_hide_params, inputs=[roformer_override_seg_size], outputs=[roformer_seg_size])
+    mdx23c_override_seg_size.change(show_hide_params, inputs=[mdx23c_override_seg_size], outputs=[mdx23c_seg_size])
+    vr_post_process.change(show_hide_params, inputs=[vr_post_process], outputs=[vr_post_process_threshold])
     demucs_model.change(update_stems, inputs=[demucs_model], outputs=stem6)
     list_button.click(leaderboard, inputs=[list_filter, list_limit], outputs=output_list)