Politrees commited on
Commit
4f80d1a
·
verified ·
1 Parent(s): a3713cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -33
app.py CHANGED
@@ -240,8 +240,8 @@ def roformer_separator(audio, model_key, seg_size, override_seg_size, overlap, p
240
 
241
  def mdx23c_separator(audio, model_key, seg_size, override_seg_size, overlap, pitch_shift, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
242
  """Separate audio using MDX23C model."""
243
- stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model)
244
- print_message(audio, model)
245
  model = MDX23C_MODELS[model_key]
246
  try:
247
  out_dir = prepare_output_dir(audio, out_dir)
@@ -276,8 +276,8 @@ def mdx23c_separator(audio, model_key, seg_size, override_seg_size, overlap, pit
276
 
277
  def mdx_separator(audio, model_key, hop_length, seg_size, overlap, denoise, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
278
  """Separate audio using MDX-NET model."""
279
- stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model)
280
- print_message(audio, model)
281
  model = MDXNET_MODELS[model_key]
282
  try:
283
  out_dir = prepare_output_dir(audio, out_dir)
@@ -312,8 +312,8 @@ def mdx_separator(audio, model_key, hop_length, seg_size, overlap, denoise, mode
312
 
313
  def vr_separator(audio, model_key, window_size, aggression, tta, post_process, post_process_threshold, high_end_process, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
314
  """Separate audio using VR ARCH model."""
315
- stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model)
316
- print_message(audio, model)
317
  model = VR_ARCH_MODELS[model_key]
318
  try:
319
  out_dir = prepare_output_dir(audio, out_dir)
@@ -350,8 +350,8 @@ def vr_separator(audio, model_key, window_size, aggression, tta, post_process, p
350
 
351
  def demucs_separator(audio, model_key, seg_size, shifts, overlap, segments_enabled, model_dir, out_dir, out_format, norm_thresh, amp_thresh, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
352
  """Separate audio using Demucs model."""
353
- stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model)
354
- print_message(audio, model)
355
  model = DEMUCS_MODELS[model_key]
356
  try:
357
  out_dir = prepare_output_dir(audio, out_dir)
@@ -394,6 +394,10 @@ def update_stems(model):
394
  else:
395
  return gr.update(visible=False)
396
 
 
 
 
 
397
  with gr.Blocks(
398
  title="🎵 Audio-Separator by Politrees 🎵",
399
  css="footer{display:none !important}",
@@ -413,10 +417,8 @@ with gr.Blocks(
413
  with gr.Column(variant='panel'):
414
  with gr.Group():
415
  roformer_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
416
- roformer_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
417
- with gr.Column(variant='panel'):
418
- with gr.Group():
419
  with gr.Row():
 
420
  roformer_overlap = gr.Slider(minimum=2, maximum=10, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Lower is better but slower.")
421
  roformer_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
422
  with gr.Column(variant='panel'):
@@ -439,16 +441,14 @@ with gr.Blocks(
439
  mdx23c_model = gr.Dropdown(value="MDX23C-InstVoc HQ", label="Select the Model", choices=list(MDX23C_MODELS.keys()), scale=3)
440
  mdx23c_output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.", scale=1)
441
  with gr.Accordion("Advanced settings", open=False):
442
- with gr.Column(variant='compact'):
443
  with gr.Group():
444
  mdx23c_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
445
- mdx23c_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
446
- with gr.Column(variant='compact'):
447
- with gr.Group():
448
  with gr.Row():
 
449
  mdx23c_overlap = gr.Slider(minimum=2, maximum=50, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
450
  mdx23c_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
451
- with gr.Column(variant='compact'):
452
  with gr.Group():
453
  with gr.Row():
454
  mdx23c_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
@@ -468,15 +468,14 @@ with gr.Blocks(
468
  mdx_model = gr.Dropdown(value="UVR-MDX-NET Inst HQ 5", label="Select the Model", choices=list(MDXNET_MODELS.keys()), scale=3)
469
  mdx_output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.", scale=1)
470
  with gr.Accordion("Advanced settings", open=False):
471
- with gr.Column():
472
  with gr.Group():
 
473
  with gr.Row():
474
  mdx_hop_length = gr.Slider(minimum=32, maximum=2048, step=32, value=1024, label="Hop Length", info="Usually called stride in neural networks; only change if you know what you're doing.")
475
  mdx_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
476
- with gr.Group():
477
- with gr.Row():
478
  mdx_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
479
- mdx_denoise = gr.Checkbox(value=False, label="Denoise", info="Enable denoising after separation.")
480
  with gr.Group():
481
  with gr.Row():
482
  mdx_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
@@ -496,19 +495,17 @@ with gr.Blocks(
496
  vr_model = gr.Dropdown(value="1_HP-UVR", label="Select the Model", choices=list(VR_ARCH_MODELS.keys()), scale=3)
497
  vr_output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.", scale=1)
498
  with gr.Accordion("Advanced settings", open=False):
499
- with gr.Column():
500
  with gr.Group():
501
  with gr.Row():
502
- vr_window_size = gr.Slider(minimum=320, maximum=1024, step=32, value=512, label="Window Size", info="Balance quality and speed. 1024 = fast but lower, 320 = slower but better quality.")
503
- vr_aggression = gr.Slider(minimum=1, maximum=100, step=1, value=5, label="Agression", info="Intensity of primary stem extraction.")
504
- with gr.Group():
505
- with gr.Column():
506
  vr_post_process = gr.Checkbox(value=False, label="Post Process", info="Identify leftover artifacts within vocal output; may improve separation for some songs.")
507
- vr_post_process_threshold = gr.Slider(minimum=0.1, maximum=0.3, step=0.1, value=0.2, label="Post Process Threshold", info="Threshold for post-processing.")
508
- with gr.Group():
509
- with gr.Row():
510
  vr_tta = gr.Checkbox(value=False, label="TTA", info="Enable Test-Time-Augmentation; slow but improves quality.")
511
  vr_high_end_process = gr.Checkbox(value=False, label="High End Process", info="Mirror the missing frequency range of the output.")
 
 
 
 
 
512
  with gr.Group():
513
  with gr.Row():
514
  vr_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
@@ -528,15 +525,14 @@ with gr.Blocks(
528
  demucs_model = gr.Dropdown(value="htdemucs_6s", label="Select the Model", choices=list(DEMUCS_MODELS.keys()), scale=3)
529
  demucs_output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.", scale=1)
530
  with gr.Accordion("Advanced settings", open=False):
531
- with gr.Column():
532
  with gr.Group():
 
533
  with gr.Row():
534
  demucs_seg_size = gr.Slider(minimum=1, maximum=100, step=1, value=40, label="Segment Size", info="Size of segments into which the audio is split. Higher = slower but better quality.")
535
- demucs_shifts = gr.Slider(minimum=0, maximum=20, step=1, value=2, label="Shifts", info="Number of predictions with random shifts, higher = slower but better quality.")
536
- with gr.Group():
537
- with gr.Row():
538
  demucs_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Overlap between prediction windows. Higher = slower but better quality.")
539
- demucs_segments_enabled = gr.Checkbox(value=True, label="Segment-wise processing", info="Enable segment-wise processing.")
 
540
  with gr.Group():
541
  with gr.Row():
542
  demucs_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
@@ -605,6 +601,10 @@ with gr.Blocks(
605
  """
606
  )
607
 
 
 
 
 
608
  demucs_model.change(update_stems, inputs=[demucs_model], outputs=stem6)
609
 
610
  list_button.click(leaderboard, inputs=[list_filter, list_limit], outputs=output_list)
 
240
 
241
  def mdx23c_separator(audio, model_key, seg_size, override_seg_size, overlap, pitch_shift, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
242
  """Separate audio using MDX23C model."""
243
+ stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model_key)
244
+ print_message(audio, model_key)
245
  model = MDX23C_MODELS[model_key]
246
  try:
247
  out_dir = prepare_output_dir(audio, out_dir)
 
276
 
277
  def mdx_separator(audio, model_key, hop_length, seg_size, overlap, denoise, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
278
  """Separate audio using MDX-NET model."""
279
+ stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model_key)
280
+ print_message(audio, model_key)
281
  model = MDXNET_MODELS[model_key]
282
  try:
283
  out_dir = prepare_output_dir(audio, out_dir)
 
312
 
313
  def vr_separator(audio, model_key, window_size, aggression, tta, post_process, post_process_threshold, high_end_process, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
314
  """Separate audio using VR ARCH model."""
315
+ stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model_key)
316
+ print_message(audio, model_key)
317
  model = VR_ARCH_MODELS[model_key]
318
  try:
319
  out_dir = prepare_output_dir(audio, out_dir)
 
350
 
351
  def demucs_separator(audio, model_key, seg_size, shifts, overlap, segments_enabled, model_dir, out_dir, out_format, norm_thresh, amp_thresh, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
352
  """Separate audio using Demucs model."""
353
+ stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, model_key)
354
+ print_message(audio, model_key)
355
  model = DEMUCS_MODELS[model_key]
356
  try:
357
  out_dir = prepare_output_dir(audio, out_dir)
 
394
  else:
395
  return gr.update(visible=False)
396
 
397
+ def show_hide_params(param):
398
+ """Update the visibility of a parameter based on the checkbox state."""
399
+ return gr.update(visible=param)
400
+
401
  with gr.Blocks(
402
  title="🎵 Audio-Separator by Politrees 🎵",
403
  css="footer{display:none !important}",
 
417
  with gr.Column(variant='panel'):
418
  with gr.Group():
419
  roformer_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
 
 
 
420
  with gr.Row():
421
+ roformer_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", visible=False)
422
  roformer_overlap = gr.Slider(minimum=2, maximum=10, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Lower is better but slower.")
423
  roformer_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
424
  with gr.Column(variant='panel'):
 
441
  mdx23c_model = gr.Dropdown(value="MDX23C-InstVoc HQ", label="Select the Model", choices=list(MDX23C_MODELS.keys()), scale=3)
442
  mdx23c_output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.", scale=1)
443
  with gr.Accordion("Advanced settings", open=False):
444
+ with gr.Column(variant='panel'):
445
  with gr.Group():
446
  mdx23c_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
 
 
 
447
  with gr.Row():
448
+ mdx23c_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", visible=False)
449
  mdx23c_overlap = gr.Slider(minimum=2, maximum=50, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
450
  mdx23c_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
451
+ with gr.Column(variant='panel'):
452
  with gr.Group():
453
  with gr.Row():
454
  mdx23c_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
 
468
  mdx_model = gr.Dropdown(value="UVR-MDX-NET Inst HQ 5", label="Select the Model", choices=list(MDXNET_MODELS.keys()), scale=3)
469
  mdx_output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.", scale=1)
470
  with gr.Accordion("Advanced settings", open=False):
471
+ with gr.Column(variant='panel'):
472
  with gr.Group():
473
+ mdx_denoise = gr.Checkbox(value=False, label="Denoise", info="Enable denoising after separation.")
474
  with gr.Row():
475
  mdx_hop_length = gr.Slider(minimum=32, maximum=2048, step=32, value=1024, label="Hop Length", info="Usually called stride in neural networks; only change if you know what you're doing.")
476
  mdx_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
 
 
477
  mdx_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
478
+ with gr.Column(variant='panel'):
479
  with gr.Group():
480
  with gr.Row():
481
  mdx_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
 
495
  vr_model = gr.Dropdown(value="1_HP-UVR", label="Select the Model", choices=list(VR_ARCH_MODELS.keys()), scale=3)
496
  vr_output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.", scale=1)
497
  with gr.Accordion("Advanced settings", open=False):
498
+ with gr.Column(variant='panel'):
499
  with gr.Group():
500
  with gr.Row():
 
 
 
 
501
  vr_post_process = gr.Checkbox(value=False, label="Post Process", info="Identify leftover artifacts within vocal output; may improve separation for some songs.")
 
 
 
502
  vr_tta = gr.Checkbox(value=False, label="TTA", info="Enable Test-Time-Augmentation; slow but improves quality.")
503
  vr_high_end_process = gr.Checkbox(value=False, label="High End Process", info="Mirror the missing frequency range of the output.")
504
+ with gr.Row():
505
+ vr_post_process_threshold = gr.Slider(minimum=0.1, maximum=0.3, step=0.1, value=0.2, label="Post Process Threshold", info="Threshold for post-processing.", visible=False)
506
+ vr_window_size = gr.Slider(minimum=320, maximum=1024, step=32, value=512, label="Window Size", info="Balance quality and speed. 1024 = fast but lower, 320 = slower but better quality.")
507
+ vr_aggression = gr.Slider(minimum=1, maximum=100, step=1, value=5, label="Agression", info="Intensity of primary stem extraction.")
508
+ with gr.Column(variant='panel'):
509
  with gr.Group():
510
  with gr.Row():
511
  vr_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
 
525
  demucs_model = gr.Dropdown(value="htdemucs_6s", label="Select the Model", choices=list(DEMUCS_MODELS.keys()), scale=3)
526
  demucs_output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.", scale=1)
527
  with gr.Accordion("Advanced settings", open=False):
528
+ with gr.Column(variant='panel'):
529
  with gr.Group():
530
+ demucs_segments_enabled = gr.Checkbox(value=True, label="Segment-wise processing", info="Enable segment-wise processing.")
531
  with gr.Row():
532
  demucs_seg_size = gr.Slider(minimum=1, maximum=100, step=1, value=40, label="Segment Size", info="Size of segments into which the audio is split. Higher = slower but better quality.")
 
 
 
533
  demucs_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Overlap between prediction windows. Higher = slower but better quality.")
534
+ demucs_shifts = gr.Slider(minimum=0, maximum=20, step=1, value=2, label="Shifts", info="Number of predictions with random shifts, higher = slower but better quality.")
535
+ with gr.Column(variant='panel'):
536
  with gr.Group():
537
  with gr.Row():
538
  demucs_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
 
601
  """
602
  )
603
 
604
+ roformer_override_seg_size.change(show_hide_params, inputs=[roformer_override_seg_size], outputs=[roformer_seg_size])
605
+ mdx23c_override_seg_size.change(show_hide_params, inputs=[mdx23c_override_seg_size], outputs=[mdx23c_seg_size])
606
+ vr_post_process.change(show_hide_params, inputs=[vr_post_process], outputs=[vr_post_process_threshold])
607
+
608
  demucs_model.change(update_stems, inputs=[demucs_model], outputs=stem6)
609
 
610
  list_button.click(leaderboard, inputs=[list_filter, list_limit], outputs=output_list)