Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -240,8 +240,8 @@ def roformer_separator(audio, model_key, seg_size, override_seg_size, overlap, p
|
|
240 |
|
241 |
def mdx23c_separator(audio, model_key, seg_size, override_seg_size, overlap, pitch_shift, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
|
242 |
"""Separate audio using MDX23C model."""
|
243 |
-
stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem,
|
244 |
-
print_message(audio,
|
245 |
model = MDX23C_MODELS[model_key]
|
246 |
try:
|
247 |
out_dir = prepare_output_dir(audio, out_dir)
|
@@ -276,8 +276,8 @@ def mdx23c_separator(audio, model_key, seg_size, override_seg_size, overlap, pit
|
|
276 |
|
277 |
def mdx_separator(audio, model_key, hop_length, seg_size, overlap, denoise, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
|
278 |
"""Separate audio using MDX-NET model."""
|
279 |
-
stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem,
|
280 |
-
print_message(audio,
|
281 |
model = MDXNET_MODELS[model_key]
|
282 |
try:
|
283 |
out_dir = prepare_output_dir(audio, out_dir)
|
@@ -312,8 +312,8 @@ def mdx_separator(audio, model_key, hop_length, seg_size, overlap, denoise, mode
|
|
312 |
|
313 |
def vr_separator(audio, model_key, window_size, aggression, tta, post_process, post_process_threshold, high_end_process, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
|
314 |
"""Separate audio using VR ARCH model."""
|
315 |
-
stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem,
|
316 |
-
print_message(audio,
|
317 |
model = VR_ARCH_MODELS[model_key]
|
318 |
try:
|
319 |
out_dir = prepare_output_dir(audio, out_dir)
|
@@ -350,8 +350,8 @@ def vr_separator(audio, model_key, window_size, aggression, tta, post_process, p
|
|
350 |
|
351 |
def demucs_separator(audio, model_key, seg_size, shifts, overlap, segments_enabled, model_dir, out_dir, out_format, norm_thresh, amp_thresh, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
|
352 |
"""Separate audio using Demucs model."""
|
353 |
-
stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem,
|
354 |
-
print_message(audio,
|
355 |
model = DEMUCS_MODELS[model_key]
|
356 |
try:
|
357 |
out_dir = prepare_output_dir(audio, out_dir)
|
@@ -394,6 +394,10 @@ def update_stems(model):
|
|
394 |
else:
|
395 |
return gr.update(visible=False)
|
396 |
|
|
|
|
|
|
|
|
|
397 |
with gr.Blocks(
|
398 |
title="🎵 Audio-Separator by Politrees 🎵",
|
399 |
css="footer{display:none !important}",
|
@@ -413,10 +417,8 @@ with gr.Blocks(
|
|
413 |
with gr.Column(variant='panel'):
|
414 |
with gr.Group():
|
415 |
roformer_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
|
416 |
-
roformer_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
|
417 |
-
with gr.Column(variant='panel'):
|
418 |
-
with gr.Group():
|
419 |
with gr.Row():
|
|
|
420 |
roformer_overlap = gr.Slider(minimum=2, maximum=10, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Lower is better but slower.")
|
421 |
roformer_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
|
422 |
with gr.Column(variant='panel'):
|
@@ -439,16 +441,14 @@ with gr.Blocks(
|
|
439 |
mdx23c_model = gr.Dropdown(value="MDX23C-InstVoc HQ", label="Select the Model", choices=list(MDX23C_MODELS.keys()), scale=3)
|
440 |
mdx23c_output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.", scale=1)
|
441 |
with gr.Accordion("Advanced settings", open=False):
|
442 |
-
with gr.Column(variant='
|
443 |
with gr.Group():
|
444 |
mdx23c_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
|
445 |
-
mdx23c_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
|
446 |
-
with gr.Column(variant='compact'):
|
447 |
-
with gr.Group():
|
448 |
with gr.Row():
|
|
|
449 |
mdx23c_overlap = gr.Slider(minimum=2, maximum=50, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
|
450 |
mdx23c_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
|
451 |
-
with gr.Column(variant='
|
452 |
with gr.Group():
|
453 |
with gr.Row():
|
454 |
mdx23c_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
|
@@ -468,15 +468,14 @@ with gr.Blocks(
|
|
468 |
mdx_model = gr.Dropdown(value="UVR-MDX-NET Inst HQ 5", label="Select the Model", choices=list(MDXNET_MODELS.keys()), scale=3)
|
469 |
mdx_output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.", scale=1)
|
470 |
with gr.Accordion("Advanced settings", open=False):
|
471 |
-
with gr.Column():
|
472 |
with gr.Group():
|
|
|
473 |
with gr.Row():
|
474 |
mdx_hop_length = gr.Slider(minimum=32, maximum=2048, step=32, value=1024, label="Hop Length", info="Usually called stride in neural networks; only change if you know what you're doing.")
|
475 |
mdx_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
|
476 |
-
with gr.Group():
|
477 |
-
with gr.Row():
|
478 |
mdx_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
|
479 |
-
|
480 |
with gr.Group():
|
481 |
with gr.Row():
|
482 |
mdx_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
|
@@ -496,19 +495,17 @@ with gr.Blocks(
|
|
496 |
vr_model = gr.Dropdown(value="1_HP-UVR", label="Select the Model", choices=list(VR_ARCH_MODELS.keys()), scale=3)
|
497 |
vr_output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.", scale=1)
|
498 |
with gr.Accordion("Advanced settings", open=False):
|
499 |
-
with gr.Column():
|
500 |
with gr.Group():
|
501 |
with gr.Row():
|
502 |
-
vr_window_size = gr.Slider(minimum=320, maximum=1024, step=32, value=512, label="Window Size", info="Balance quality and speed. 1024 = fast but lower, 320 = slower but better quality.")
|
503 |
-
vr_aggression = gr.Slider(minimum=1, maximum=100, step=1, value=5, label="Agression", info="Intensity of primary stem extraction.")
|
504 |
-
with gr.Group():
|
505 |
-
with gr.Column():
|
506 |
vr_post_process = gr.Checkbox(value=False, label="Post Process", info="Identify leftover artifacts within vocal output; may improve separation for some songs.")
|
507 |
-
vr_post_process_threshold = gr.Slider(minimum=0.1, maximum=0.3, step=0.1, value=0.2, label="Post Process Threshold", info="Threshold for post-processing.")
|
508 |
-
with gr.Group():
|
509 |
-
with gr.Row():
|
510 |
vr_tta = gr.Checkbox(value=False, label="TTA", info="Enable Test-Time-Augmentation; slow but improves quality.")
|
511 |
vr_high_end_process = gr.Checkbox(value=False, label="High End Process", info="Mirror the missing frequency range of the output.")
|
|
|
|
|
|
|
|
|
|
|
512 |
with gr.Group():
|
513 |
with gr.Row():
|
514 |
vr_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
|
@@ -528,15 +525,14 @@ with gr.Blocks(
|
|
528 |
demucs_model = gr.Dropdown(value="htdemucs_6s", label="Select the Model", choices=list(DEMUCS_MODELS.keys()), scale=3)
|
529 |
demucs_output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.", scale=1)
|
530 |
with gr.Accordion("Advanced settings", open=False):
|
531 |
-
with gr.Column():
|
532 |
with gr.Group():
|
|
|
533 |
with gr.Row():
|
534 |
demucs_seg_size = gr.Slider(minimum=1, maximum=100, step=1, value=40, label="Segment Size", info="Size of segments into which the audio is split. Higher = slower but better quality.")
|
535 |
-
demucs_shifts = gr.Slider(minimum=0, maximum=20, step=1, value=2, label="Shifts", info="Number of predictions with random shifts, higher = slower but better quality.")
|
536 |
-
with gr.Group():
|
537 |
-
with gr.Row():
|
538 |
demucs_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Overlap between prediction windows. Higher = slower but better quality.")
|
539 |
-
|
|
|
540 |
with gr.Group():
|
541 |
with gr.Row():
|
542 |
demucs_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
|
@@ -605,6 +601,10 @@ with gr.Blocks(
|
|
605 |
"""
|
606 |
)
|
607 |
|
|
|
|
|
|
|
|
|
608 |
demucs_model.change(update_stems, inputs=[demucs_model], outputs=stem6)
|
609 |
|
610 |
list_button.click(leaderboard, inputs=[list_filter, list_limit], outputs=output_list)
|
|
|
240 |
|
241 |
def mdx23c_separator(audio, model_key, seg_size, override_seg_size, overlap, pitch_shift, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
|
242 |
"""Separate audio using MDX23C model."""
|
243 |
+
stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model_key)
|
244 |
+
print_message(audio, model_key)
|
245 |
model = MDX23C_MODELS[model_key]
|
246 |
try:
|
247 |
out_dir = prepare_output_dir(audio, out_dir)
|
|
|
276 |
|
277 |
def mdx_separator(audio, model_key, hop_length, seg_size, overlap, denoise, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
|
278 |
"""Separate audio using MDX-NET model."""
|
279 |
+
stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model_key)
|
280 |
+
print_message(audio, model_key)
|
281 |
model = MDXNET_MODELS[model_key]
|
282 |
try:
|
283 |
out_dir = prepare_output_dir(audio, out_dir)
|
|
|
312 |
|
313 |
def vr_separator(audio, model_key, window_size, aggression, tta, post_process, post_process_threshold, high_end_process, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
|
314 |
"""Separate audio using VR ARCH model."""
|
315 |
+
stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model_key)
|
316 |
+
print_message(audio, model_key)
|
317 |
model = VR_ARCH_MODELS[model_key]
|
318 |
try:
|
319 |
out_dir = prepare_output_dir(audio, out_dir)
|
|
|
350 |
|
351 |
def demucs_separator(audio, model_key, seg_size, shifts, overlap, segments_enabled, model_dir, out_dir, out_format, norm_thresh, amp_thresh, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
|
352 |
"""Separate audio using Demucs model."""
|
353 |
+
stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model_key)
|
354 |
+
print_message(audio, model_key)
|
355 |
model = DEMUCS_MODELS[model_key]
|
356 |
try:
|
357 |
out_dir = prepare_output_dir(audio, out_dir)
|
|
|
394 |
else:
|
395 |
return gr.update(visible=False)
|
396 |
|
397 |
+
def show_hide_params(param):
|
398 |
+
"""Update the visibility of a parameter based on the checkbox state."""
|
399 |
+
return gr.update(visible=param)
|
400 |
+
|
401 |
with gr.Blocks(
|
402 |
title="🎵 Audio-Separator by Politrees 🎵",
|
403 |
css="footer{display:none !important}",
|
|
|
417 |
with gr.Column(variant='panel'):
|
418 |
with gr.Group():
|
419 |
roformer_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
|
|
|
|
|
|
|
420 |
with gr.Row():
|
421 |
+
roformer_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", visible=False)
|
422 |
roformer_overlap = gr.Slider(minimum=2, maximum=10, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Lower is better but slower.")
|
423 |
roformer_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
|
424 |
with gr.Column(variant='panel'):
|
|
|
441 |
mdx23c_model = gr.Dropdown(value="MDX23C-InstVoc HQ", label="Select the Model", choices=list(MDX23C_MODELS.keys()), scale=3)
|
442 |
mdx23c_output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.", scale=1)
|
443 |
with gr.Accordion("Advanced settings", open=False):
|
444 |
+
with gr.Column(variant='panel'):
|
445 |
with gr.Group():
|
446 |
mdx23c_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
|
|
|
|
|
|
|
447 |
with gr.Row():
|
448 |
+
mdx23c_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", visible=False)
|
449 |
mdx23c_overlap = gr.Slider(minimum=2, maximum=50, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
|
450 |
mdx23c_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
|
451 |
+
with gr.Column(variant='panel'):
|
452 |
with gr.Group():
|
453 |
with gr.Row():
|
454 |
mdx23c_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
|
|
|
468 |
mdx_model = gr.Dropdown(value="UVR-MDX-NET Inst HQ 5", label="Select the Model", choices=list(MDXNET_MODELS.keys()), scale=3)
|
469 |
mdx_output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.", scale=1)
|
470 |
with gr.Accordion("Advanced settings", open=False):
|
471 |
+
with gr.Column(variant='panel'):
|
472 |
with gr.Group():
|
473 |
+
mdx_denoise = gr.Checkbox(value=False, label="Denoise", info="Enable denoising after separation.")
|
474 |
with gr.Row():
|
475 |
mdx_hop_length = gr.Slider(minimum=32, maximum=2048, step=32, value=1024, label="Hop Length", info="Usually called stride in neural networks; only change if you know what you're doing.")
|
476 |
mdx_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
|
|
|
|
|
477 |
mdx_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
|
478 |
+
with gr.Column(variant='panel'):
|
479 |
with gr.Group():
|
480 |
with gr.Row():
|
481 |
mdx_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
|
|
|
495 |
vr_model = gr.Dropdown(value="1_HP-UVR", label="Select the Model", choices=list(VR_ARCH_MODELS.keys()), scale=3)
|
496 |
vr_output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.", scale=1)
|
497 |
with gr.Accordion("Advanced settings", open=False):
|
498 |
+
with gr.Column(variant='panel'):
|
499 |
with gr.Group():
|
500 |
with gr.Row():
|
|
|
|
|
|
|
|
|
501 |
vr_post_process = gr.Checkbox(value=False, label="Post Process", info="Identify leftover artifacts within vocal output; may improve separation for some songs.")
|
|
|
|
|
|
|
502 |
vr_tta = gr.Checkbox(value=False, label="TTA", info="Enable Test-Time-Augmentation; slow but improves quality.")
|
503 |
vr_high_end_process = gr.Checkbox(value=False, label="High End Process", info="Mirror the missing frequency range of the output.")
|
504 |
+
with gr.Row():
|
505 |
+
vr_post_process_threshold = gr.Slider(minimum=0.1, maximum=0.3, step=0.1, value=0.2, label="Post Process Threshold", info="Threshold for post-processing.", visible=False)
|
506 |
+
vr_window_size = gr.Slider(minimum=320, maximum=1024, step=32, value=512, label="Window Size", info="Balance quality and speed. 1024 = fast but lower, 320 = slower but better quality.")
|
507 |
+
vr_aggression = gr.Slider(minimum=1, maximum=100, step=1, value=5, label="Agression", info="Intensity of primary stem extraction.")
|
508 |
+
with gr.Column(variant='panel'):
|
509 |
with gr.Group():
|
510 |
with gr.Row():
|
511 |
vr_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
|
|
|
525 |
demucs_model = gr.Dropdown(value="htdemucs_6s", label="Select the Model", choices=list(DEMUCS_MODELS.keys()), scale=3)
|
526 |
demucs_output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.", scale=1)
|
527 |
with gr.Accordion("Advanced settings", open=False):
|
528 |
+
with gr.Column(variant='panel'):
|
529 |
with gr.Group():
|
530 |
+
demucs_segments_enabled = gr.Checkbox(value=True, label="Segment-wise processing", info="Enable segment-wise processing.")
|
531 |
with gr.Row():
|
532 |
demucs_seg_size = gr.Slider(minimum=1, maximum=100, step=1, value=40, label="Segment Size", info="Size of segments into which the audio is split. Higher = slower but better quality.")
|
|
|
|
|
|
|
533 |
demucs_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Overlap between prediction windows. Higher = slower but better quality.")
|
534 |
+
demucs_shifts = gr.Slider(minimum=0, maximum=20, step=1, value=2, label="Shifts", info="Number of predictions with random shifts, higher = slower but better quality.")
|
535 |
+
with gr.Column(variant='panel'):
|
536 |
with gr.Group():
|
537 |
with gr.Row():
|
538 |
demucs_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
|
|
|
601 |
"""
|
602 |
)
|
603 |
|
604 |
+
roformer_override_seg_size.change(show_hide_params, inputs=[roformer_override_seg_size], outputs=[roformer_seg_size])
|
605 |
+
mdx23c_override_seg_size.change(show_hide_params, inputs=[mdx23c_override_seg_size], outputs=[mdx23c_seg_size])
|
606 |
+
vr_post_process.change(show_hide_params, inputs=[vr_post_process], outputs=[vr_post_process_threshold])
|
607 |
+
|
608 |
demucs_model.change(update_stems, inputs=[demucs_model], outputs=stem6)
|
609 |
|
610 |
list_button.click(leaderboard, inputs=[list_filter, list_limit], outputs=output_list)
|