UVR-API

Running

App Files Files Community

Eddycrack864 commited on Jun 24, 2024

Commit

f5cfb96

verified ·

1 Parent(s): cbdf8e1

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -334

app.py CHANGED Viewed

@@ -1,14 +1,14 @@
-import os
-import glob
-from pathlib import Path
 import gradio as gr
-roformer_models = [
-    'BS-Roformer-Viperx-1297.ckpt',
-    'BS-Roformer-Viperx-1296.ckpt',
-    'BS-Roformer-Viperx-1053.ckpt',
-    'Mel-Roformer-Viperx-1143.ckpt',
-]
 mdx23c_models = [
     'MDX23C_D1581.ckpt',
@@ -88,336 +88,85 @@ vrarch_models = [
 ]
 demucs_models = [
-    'htdemucs_ft.yaml',
     'htdemucs.yaml',
     'hdemucs_mmi.yaml',
-    'htdemucs_6s.yaml',
 ]
-output_format = [
-    'wav',
-    'flac',
-    'mp3',
-]
-mdxnet_overlap_values = [
-    '0.25',
-    '0.5',
-    '0.75',
-    '0.99',
-]
-vrarch_window_size_values = [
-    '320',
-    '512',
-    '1024',
-]
-with gr.Blocks(title="🎵 UVR5 UI 🎵") as app:
     gr.Markdown("<h1> 🎵 UVR5 UI 🎵 </h1>")
-    with gr.Tabs():
-        with gr.TabItem("BS-Roformer"):
-            with gr.Row():
-                roformer_model = gr.Dropdown(
-                    label = "Select the Model",
-                    choices = roformer_models,
-                    interactive = True
-                )
-                roformer_output_format = gr.Dropdown(
-                    label = "Select the Output Format",
-                    choices = output_format,
-                    interactive = True
-                )
-            with gr.Row():
-                roformer_overlap = gr.Slider(
-                    minimum = 2,
-                    maximum = 4,
-                    step = 1,
-                    label = "Overlap",
-                    info = "Amount of overlap between prediction windows.",
-                    value = 4,
-                    interactive = True
-                )
-            with gr.Row():
-                roformer_audio = gr.Audio(
-                    label = "Input Audio",
-                    interactive = True
-                )
-            with gr.Row():
-                roformer_button = gr.Button("Separate", variant = "primary") #revisar esto
-                #resto del codigo de lo que hace el boton
-            with gr.Row():
-                roformer_stem_1 = gr.Audio(
-                    show_download_button = True,
-                    interactive = False,
-                    label = "Stem 1"
-                )
-                roformer_stem_2 = gr.Audio(
-                    show_download_button = True,
-                    interactive = False,
-                    label = "Stem 2"
-                )
-            with gr.Row():
-                roformer_status = gr.Textbox(
-                    label = "Output Information",
-                    interactive = False
-                    #añadir algo que muestre aqui todo
-                )
-        with gr.TabItem("MDX23C"):
-            with gr.Row():
-                mdx23c_model = gr.Dropdown(
-                    label = "Select the Model",
-                    choices = mdx23c_models,
-                    interactive = True
-                )
-                mdx23c_output_format = gr.Dropdown(
-                    label = "Select the Output Format",
-                    choices = output_format,
-                    interactive = True
-                )
-            with gr.Row():
-                mdx23c_segment_size = gr.Slider(
-                    minimum = 32,
-                    maximum = 4000,
-                    step = 32,
-                    label = "Segment Size",
-                    info = "Larger consumes more resources, but may give better results.",
-                    value = 256,
-                    interactive = True
-                )
-                mdx23c_overlap = gr.Slider(
-                    minimum = 2,
-                    maximum = 50,
-                    step = 1,
-                    label = "Overlap",
-                    info = "Amount of overlap between prediction windows.",
-                    value = 8,
-                    interactive = True
-                )
-            with gr.Row():
-                mdx23c_audio = gr.Audio(
-                    label = "Input Audio",
-                    interactive = True
-                )
-            with gr.Row():
-                mdx23c_button = gr.Button("Separate", variant = "primary") #revisar esto
-                #resto del codigo de lo que hace el boton
-            with gr.Row():
-                mdx23c_stem_1 = gr.Audio(
-                    show_download_button = True,
-                    interactive = False,
-                    label = "Stem 1"
-                )
-                mdx23c_stem_2 = gr.Audio(
-                    show_download_button = True,
-                    interactive = False,
-                    label = "Stem 2"
-                )
-            with gr.Row():
-                mdx23c_status = gr.Textbox(
-                    label = "Output Information",
-                    interactive = False
-                    #añadir algo que muestre aqui todo
-                )
-        with gr.TabItem("MDX-NET"):
-            with gr.Row():
-                mdxnet_model = gr.Dropdown(
-                    label = "Select the Model",
-                    choices = mdxnet_models,
-                    interactive = True
-                )
-                mdxnet_output_format = gr.Dropdown(
-                    label = "Select the Output Format",
-                    choices = output_format,
-                    interactive = True
-                )
-            with gr.Row():
-                mdxnet_segment_size = gr.Slider(
-                    minimum = 32,
-                    maximum = 4000,
-                    step = 32,
-                    label = "Segment Size",
-                    info = "Larger consumes more resources, but may give better results.",
-                    value = 256,
-                    interactive = True
-                )
-                mdxnet_overlap = gr.Dropdown(
-                        label = "Overlap",
-                        choices = mdxnet_overlap_values,
-                        value = mdxnet_overlap_values[0],
-                        interactive = True
-                )
-                mdxnet_denoise = gr.Checkbox(
-                    label = "Denoise",
-                    info = "Enable denoising during separation.",
-                    value = True,
-                    interactive = True
-                )
-            with gr.Row():
-                mdxnet_audio = gr.Audio(
-                    label = "Input Audio",
-                    interactive = True
-                )
-            with gr.Row():
-                mdxnet_button = gr.Button("Separate", variant = "primary") #revisar esto
-                #resto del codigo de lo que hace el boton
-            with gr.Row():
-                mdxnet_stem_1 = gr.Audio(
-                    show_download_button = True,
-                    interactive = False,
-                    label = "Stem 1"
-                )
-                mdxnet_stem_2 = gr.Audio(
-                    show_download_button = True,
-                    interactive = False,
-                    label = "Stem 2"
-                )
-            with gr.Row():
-                mdxnet_status = gr.Textbox(
-                    label = "Output Information",
-                    interactive = False
-                    #añadir algo que muestre aqui todo
-                )
-        with gr.TabItem("VR ARCH"):
-            with gr.Row():
-                vrarch_model = gr.Dropdown(
-                    label = "Select the Model",
-                    choices = vrarch_models,
-                    interactive = True
-                )
-                vrarch_output_format = gr.Dropdown(
-                    label = "Select the Output Format",
-                    choices = output_format,
-                    interactive = True
-                )
-            with gr.Row():
-                vrarch_window_size = gr.Dropdown(
-                    label = "Window Size",
-                    choices = vrarch_window_size_values,
-                    value = vrarch_window_size_values[0],
-                    interactive = True
-                )
-                vrarch_agression = gr.Slider(
-                    minimum = 1,
-                    maximum = 50,
-                    step = 1,
-                    label = "Agression",
-                    info = "Intensity of primary stem extraction.",
-                    value = 5,
-                    interactive = True
-                )
-                vrarch_tta = gr.Checkbox(
-                    label = "TTA",
-                    info = "Enable Test-Time-Augmentation; slow but improves quality.",
-                    value = True,
-                    interactive = True
-                )
-                vrarch_high_end_process = gr.Checkbox(
-                    label = "High End Process",
-                    info = "Mirror the missing frequency range of the output.",
-                    value = False,
-                    interactive = True
-                )
-            with gr.Row():
-                vrarch_audio = gr.Audio(
-                    label = "Input Audio",
-                    interactive = True
-                )
-            with gr.Row():
-                vrarch_button = gr.Button("Separate", variant = "primary") #revisar esto
-                #resto del codigo de lo que hace el boton
-            with gr.Row():
-                vrarch_stem_1 = gr.Audio(
-                    show_download_button = True,
-                    interactive = False,
-                    label = "Stem 1"
-                )
-                vrarch_stem_2 = gr.Audio(
-                    show_download_button = True,
-                    interactive = False,
-                    label = "Stem 2"
-                )
-            with gr.Row():
-                vrarch_status = gr.Textbox(
-                    label = "Output Information",
-                    interactive = False
-                    #añadir algo que muestre aqui todo
-                )
-        with gr.TabItem("Demucs"):
-            with gr.Row():
-                demucs_model = gr.Dropdown(
-                    label = "Select the Model",
-                    choices = demucs_models,
-                    interactive = True
-                )
-                demucs_output_format = gr.Dropdown(
-                    label = "Select the Output Format",
-                    choices = output_format,
-                    interactive = True
-                )
-            with gr.Row():
-                demucs_shifts = gr.Slider(
-                    minimum = 1,
-                    maximum = 20,
-                    step = 1,
-                    label = "Shifts",
-                    info = "Number of predictions with random shifts, higher = slower but better quality.",
-                    value = 2,
-                    interactive = True
-                )
-                demucs_overlap = gr.Slider(
-                    minimum = 0.001,
-                    maximum = 0.999,
-                    step = 0.001,
-                    label = "Overlap",
-                    info = "Amount of overlap between prediction windows.",
-                    value = 0.025,
-                    interactive = True
-                )
-            with gr.Row():
-                demucs_audio = gr.Audio(
-                    label = "Input Audio",
-                    interactive = True
-                )
-            with gr.Row():
-                demucs_button = gr.Button("Separate", variant = "primary") #revisar esto
-                #resto del codigo de lo que hace el boton
-            with gr.Row():
-                demucs_stem_1 = gr.Audio(
-                    show_download_button = True,
-                    interactive = False,
-                    label = "Stem 1"
-                )
-                demucs_stem_2 = gr.Audio(
-                    show_download_button = True,
-                    interactive = False,
-                    label = "Stem 2"
-                )
-                demucs_stem_3 = gr.Audio(
-                    show_download_button = True,
-                    interactive = False,
-                    label = "Stem 3"
-                )
-                demucs_stem_4 = gr.Audio(
-                    show_download_button = True,
-                    interactive = False,
-                    label = "Stem 4"
-                )
-            with gr.Row():
-                demucs_stem_5 = gr.Audio(
-                    show_download_button = True,
-                    interactive = False,
-                    label = "Stem 5"
-                )
-                demucs_stem_6 = gr.Audio(
-                    show_download_button = True,
-                    interactive = False,
-                    label = "Stem 6"
-                )
-            with gr.Row():
-                demucs_status = gr.Textbox(
-                    label = "Output Information",
-                    interactive = False
-                    #añadir algo que muestre aqui todo
-                )
-app.launch()

 import gradio as gr
+from audio_separator.separator import Separator
+separator = Separator()
+roformer_models = {
+        'BS-Roformer-Viperx-1297.ckpt': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',
+        'BS-Roformer-Viperx-1296.ckpt': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt',
+        'BS-Roformer-Viperx-1053.ckpt': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',
+        'Mel-Roformer-Viperx-1143.ckpt': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt'
+}
 mdx23c_models = [
     'MDX23C_D1581.ckpt',
 ]
 demucs_models = [
+    'htdemucs_ft.yaml',
     'htdemucs.yaml',
     'hdemucs_mmi.yaml',
 ]
+def roformer_separator(audio, checkpoint_name):
+    full_checkpoint_name = roformer_models[checkpoint_name]
+    separator.load_model(full_checkpoint_name)
+    output_files = separator.separate(audio)
+    stem1 = output_files[0]
+    stem2 = output_files[1]
+    return stem1, stem2
+def mdx_vr_separator(audio, checkpoint_name):
+    separator.load_model(checkpoint_name)
+    output_files = separator.separate(audio)
+    stem1 = output_files[0]
+    stem2 = output_files[1]
+    return stem1, stem2
+def demucs_separator(audio, checkpoint_name):
+    separator.load_model(checkpoint_name)
+    output_files = separator.separate(audio)
+    stem1 = output_files[0]
+    stem2 = output_files[1]
+    stem3 = output_files[2]
+    stem4 = output_files[3]
+    return stem1, stem2, stem3, stem4
+with gr.Blocks(title="🎵 UVR5 UI 🎵") as demo:
     gr.Markdown("<h1> 🎵 UVR5 UI 🎵 </h1>")
+    with gr.Tab("Vocal Separator (UVR)"):
+        gr.Markdown("Separate vocals and instruments from an audio file using UVR models.")
+        with gr.Tab("Mel/BS Roformer"):
+            roformer_audio_file = gr.Audio(label="Audio File", type="filepath")
+            with gr.Row():
+                roformer_model = gr.Dropdown(label="Model", choices=list(roformer_models.keys()))
+                roformer_button = gr.Button("Separate", variant="primary")
+            roformer_stem1 = gr.Audio(type="filepath", label="Stem 1")
+            roformer_stem2 = gr.Audio(type="filepath", label="Stem 2")
+            roformer_button.click(roformer_separator, [roformer_audio_file, roformer_model], [roformer_stem1, roformer_stem2])
+        with gr.Tab("MDX23C"):
+            mdx23c_audio_file = gr.Audio(label="Audio File", type="filepath")
+            with gr.Row():
+                mdx23c_model = gr.Dropdown(label="Model", choices=mdx23c_models)
+                mdx23c_button = gr.Button("Separate", variant="primary")
+            mdx23c_stem1 = gr.Audio(type="filepath", label="Stem 1")
+            mdx23c_stem2 = gr.Audio(type="filepath", label="Stem 2")
+            mdx23c_button.click(mdx_vr_separator, [mdx23c_audio_file, mdx23c_model], [mdx23c_stem1, mdx23c_stem2])
+        with gr.Tab("MDX-NET"):
+            mdxnet_audio_file = gr.Audio(label="Audio File", type="filepath")
+            with gr.Row():
+                mdxnet_model = gr.Dropdown(label="Model", choices=mdxnet_models)
+                mdxnet_button = gr.Button("Separate", variant="primary")
+            mdxnet_stem1 = gr.Audio(type="filepath", label="Stem 1")
+            mdxnet_stem2 = gr.Audio(type="filepath", label="Stem 2")
+            mdxnet_button.click(mdx_vr_separator, [mdxnet_audio_file, mdxnet_model], [mdxnet_stem1, mdxnet_stem2])
+        with gr.Tab("VR-ARCH"):
+            vr_audio_file = gr.Audio(label="Audio File", type="filepath")
+            with gr.Row():
+                vr_model = gr.Dropdown(label="Model", choices=vrarch_models)
+                vr_button = gr.Button("Separate", variant="primary")
+            vr_stem1 = gr.Audio(type="filepath", label="Stem 1")
+            vr_stem2 = gr.Audio(type="filepath", label="Stem 2")
+            vr_button.click(mdx_vr_separator, [vr_audio_file, vr_model], [vr_stem1, vr_stem2])
+        with gr.Tab("Demucs"):
+            demucs_audio_file = gr.Audio(label="Audio File", type="filepath")
+            with gr.Row():
+                demucs_model = gr.Dropdown(label="Model", choices=demucs_models)
+                demucs_button = gr.Button("Separate", variant="primary")
+            demucs_stem1 = gr.Audio(type="filepath", label="Stem 1")
+            demucs_stem2 = gr.Audio(type="filepath", label="Stem 2")
+            demucs_stem3 = gr.Audio(type="filepath", label="Stem 3")
+            demucs_stem4 = gr.Audio(type="filepath", label="Stem 4")
+            demucs_button.click(demucs_separator, [demucs_audio_file, demucs_model], [demucs_stem1, demucs_stem2, demucs_stem3, demucs_stem4])
+demo.launch()