Politrees commited on
Commit
6542672
·
verified ·
1 Parent(s): 6334383

Delete PolUVR_app.py

Browse files
Files changed (1) hide show
  1. PolUVR_app.py +0 -606
PolUVR_app.py DELETED
@@ -1,606 +0,0 @@
1
- import os
2
- import torch
3
- import shutil
4
- import logging
5
- import gradio as gr
6
-
7
- from PolUVR.separator import Separator
8
-
9
- device = "cuda" if torch.cuda.is_available() else "cpu"
10
- use_autocast = device == "cuda"
11
-
12
- #=========================#
13
- # Roformer Models #
14
- #=========================#
15
- ROFORMER_MODELS = {
16
- 'BS-Roformer-Viperx-1053': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',
17
- 'BS-Roformer-Viperx-1296': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt',
18
- 'BS-Roformer-Viperx-1297': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',
19
- 'BS-Roformer-De-Reverb': 'deverb_bs_roformer_8_384dim_10depth.ckpt',
20
- 'Mel-Roformer-Viperx-1143': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt',
21
- 'Mel-Roformer-Crowd-Aufr33-Viperx': 'mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144.ckpt',
22
- 'Mel-Roformer-Karaoke-Aufr33-Viperx': 'mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt',
23
- 'Mel-Roformer-Denoise-Aufr33': 'denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt',
24
- 'Mel-Roformer-Denoise-Aufr33-Aggr': 'denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt',
25
- 'MelBand Roformer Kim | Inst V1 by Unwa': 'melband_roformer_inst_v1.ckpt',
26
- 'MelBand Roformer Kim | Inst V2 by Unwa': 'melband_roformer_inst_v2.ckpt',
27
- 'MelBand Roformer Kim | InstVoc Duality V1 by Unwa': 'melband_roformer_instvoc_duality_v1.ckpt',
28
- 'MelBand Roformer Kim | InstVoc Duality V2 by Unwa': 'melband_roformer_instvox_duality_v2.ckpt',
29
- }
30
- #=========================#
31
- # MDX23C Models #
32
- #=========================#
33
- MDX23C_MODELS = [
34
- 'MDX23C-8KFFT-InstVoc_HQ.ckpt',
35
- 'MDX23C-8KFFT-InstVoc_HQ_2.ckpt',
36
- 'MDX23C_D1581.ckpt',
37
- ]
38
- #=========================#
39
- # MDXN-NET Models #
40
- #=========================#
41
- MDXNET_MODELS = [
42
- 'UVR-MDX-NET-Inst_1.onnx',
43
- 'UVR-MDX-NET-Inst_2.onnx',
44
- 'UVR-MDX-NET-Inst_3.onnx',
45
- 'UVR-MDX-NET-Inst_HQ_1.onnx',
46
- 'UVR-MDX-NET-Inst_HQ_2.onnx',
47
- 'UVR-MDX-NET-Inst_HQ_3.onnx',
48
- 'UVR-MDX-NET-Inst_HQ_4.onnx',
49
- 'UVR-MDX-NET-Inst_HQ_5.onnx',
50
- 'UVR-MDX-NET_Inst_82_beta.onnx',
51
- 'UVR-MDX-NET_Inst_90_beta.onnx',
52
- 'UVR-MDX-NET_Inst_187_beta.onnx',
53
- 'UVR-MDX-NET-Inst_full_292.onnx',
54
- 'UVR-MDX-NET_Main_340.onnx',
55
- 'UVR-MDX-NET_Main_390.onnx',
56
- 'UVR-MDX-NET_Main_406.onnx',
57
- 'UVR-MDX-NET_Main_427.onnx',
58
- 'UVR-MDX-NET_Main_438.onnx',
59
- 'UVR-MDX-NET-Crowd_HQ_1.onnx',
60
- 'UVR-MDX-NET-Voc_FT.onnx',
61
- 'UVR_MDXNET_1_9703.onnx',
62
- 'UVR_MDXNET_2_9682.onnx',
63
- 'UVR_MDXNET_3_9662.onnx',
64
- 'UVR_MDXNET_9482.onnx',
65
- 'UVR_MDXNET_KARA.onnx',
66
- 'UVR_MDXNET_KARA_2.onnx',
67
- 'UVR_MDXNET_Main.onnx',
68
- 'kuielab_a_bass.onnx',
69
- 'kuielab_a_drums.onnx',
70
- 'kuielab_a_other.onnx',
71
- 'kuielab_a_vocals.onnx',
72
- 'kuielab_b_bass.onnx',
73
- 'kuielab_b_drums.onnx',
74
- 'kuielab_b_other.onnx',
75
- 'kuielab_b_vocals.onnx',
76
- 'Kim_Inst.onnx',
77
- 'Kim_Vocal_1.onnx',
78
- 'Kim_Vocal_2.onnx',
79
- 'Reverb_HQ_By_FoxJoy.onnx',
80
- ]
81
- #========================#
82
- # VR-ARCH Models #
83
- #========================#
84
- VR_ARCH_MODELS = [
85
- '1_HP-UVR.pth',
86
- '2_HP-UVR.pth',
87
- '3_HP-Vocal-UVR.pth',
88
- '4_HP-Vocal-UVR.pth',
89
- '5_HP-Karaoke-UVR.pth',
90
- '6_HP-Karaoke-UVR.pth',
91
- '7_HP2-UVR.pth',
92
- '8_HP2-UVR.pth',
93
- '9_HP2-UVR.pth',
94
- '10_SP-UVR-2B-32000-1.pth',
95
- '11_SP-UVR-2B-32000-2.pth',
96
- '12_SP-UVR-3B-44100.pth',
97
- '13_SP-UVR-4B-44100-1.pth',
98
- '14_SP-UVR-4B-44100-2.pth',
99
- '15_SP-UVR-MID-44100-1.pth',
100
- '16_SP-UVR-MID-44100-2.pth',
101
- '17_HP-Wind_Inst-UVR.pth',
102
- 'MGM_HIGHEND_v4.pth',
103
- 'MGM_LOWEND_A_v4.pth',
104
- 'MGM_LOWEND_B_v4.pth',
105
- 'MGM_MAIN_v4.pth',
106
- 'UVR-BVE-4B_SN-44100-1.pth',
107
- 'UVR-DeEcho-DeReverb.pth',
108
- 'UVR-De-Echo-Aggressive.pth',
109
- 'UVR-De-Echo-Normal.pth',
110
- 'UVR-DeNoise-Lite.pth',
111
- 'UVR-DeNoise.pth',
112
- ]
113
- #=======================#
114
- # DEMUCS Models #
115
- #=======================#
116
- DEMUCS_MODELS = [
117
- 'hdemucs_mmi.yaml',
118
- 'htdemucs.yaml',
119
- 'htdemucs_6s.yaml',
120
- 'htdemucs_ft.yaml',
121
- ]
122
-
123
- def print_message(input_file, model_name):
124
- """Prints information about the audio separation process."""
125
- base_name = os.path.splitext(os.path.basename(input_file))[0]
126
- print("\n")
127
- print("🎵 PolUVR 🎵")
128
- print("Input audio:", base_name)
129
- print("Separation Model:", model_name)
130
- print("Audio Separation Process...")
131
-
132
- def prepare_output_dir(input_file, output_dir):
133
- """Create a directory for the output files and clean it if it already exists."""
134
- base_name = os.path.splitext(os.path.basename(input_file))[0]
135
- out_dir = os.path.join(output_dir, base_name)
136
- try:
137
- if os.path.exists(out_dir):
138
- shutil.rmtree(out_dir)
139
- os.makedirs(out_dir)
140
- except Exception as e:
141
- raise RuntimeError(f"Failed to prepare output directory {out_dir}: {e}")
142
- return out_dir
143
-
144
- def rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem):
145
- base_name = os.path.splitext(os.path.basename(audio))[0]
146
- stems = {
147
- "Vocals": vocals_stem.replace("{base_name}", base_name),
148
- "Instrumental": instrumental_stem.replace("{base_name}", base_name),
149
- "Drums": drums_stem.replace("{base_name}", base_name),
150
- "Bass": bass_stem.replace("{base_name}", base_name),
151
- "Other": other_stem.replace("{base_name}", base_name),
152
- "Guitar": guitar_stem.replace("{base_name}", base_name),
153
- "Piano": piano_stem.replace("{base_name}", base_name),
154
- }
155
- return stems
156
-
157
- def roformer_separator(audio, model_key, seg_size, override_seg_size, overlap, pitch_shift, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
158
- """Separate audio using Roformer model."""
159
- stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem)
160
- print_message(audio, model_key)
161
- model = ROFORMER_MODELS[model_key]
162
- try:
163
- out_dir = prepare_output_dir(audio, out_dir)
164
- separator = Separator(
165
- log_level=logging.WARNING,
166
- model_file_dir=model_dir,
167
- output_dir=out_dir,
168
- output_format=out_format,
169
- normalization_threshold=norm_thresh,
170
- amplification_threshold=amp_thresh,
171
- use_autocast=use_autocast,
172
- mdxc_params={
173
- "segment_size": seg_size,
174
- "override_model_segment_size": override_seg_size,
175
- "batch_size": batch_size,
176
- "overlap": overlap,
177
- "pitch_shift": pitch_shift,
178
- }
179
- )
180
-
181
- progress(0.2, desc="Model loaded...")
182
- separator.load_model(model_filename=model)
183
-
184
- progress(0.7, desc="Audio separated...")
185
- separation = separator.separate(audio, stemname)
186
- print(f"Separation complete!\nResults: {', '.join(separation)}")
187
-
188
- stems = [os.path.join(out_dir, file_name) for file_name in separation]
189
- return stems[0], stems[1]
190
- except Exception as e:
191
- raise RuntimeError(f"Roformer separation failed: {e}") from e
192
-
193
- def mdx23c_separator(audio, model, seg_size, override_seg_size, overlap, pitch_shift, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
194
- """Separate audio using MDX23C model."""
195
- stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem)
196
- print_message(audio, model)
197
- try:
198
- out_dir = prepare_output_dir(audio, out_dir)
199
- separator = Separator(
200
- log_level=logging.WARNING,
201
- model_file_dir=model_dir,
202
- output_dir=out_dir,
203
- output_format=out_format,
204
- normalization_threshold=norm_thresh,
205
- amplification_threshold=amp_thresh,
206
- use_autocast=use_autocast,
207
- mdxc_params={
208
- "segment_size": seg_size,
209
- "override_model_segment_size": override_seg_size,
210
- "batch_size": batch_size,
211
- "overlap": overlap,
212
- "pitch_shift": pitch_shift,
213
- }
214
- )
215
-
216
- progress(0.2, desc="Model loaded...")
217
- separator.load_model(model_filename=model)
218
-
219
- progress(0.7, desc="Audio separated...")
220
- separation = separator.separate(audio, stemname)
221
- print(f"Separation complete!\nResults: {', '.join(separation)}")
222
-
223
- stems = [os.path.join(out_dir, file_name) for file_name in separation]
224
- return stems[0], stems[1]
225
- except Exception as e:
226
- raise RuntimeError(f"MDX23C separation failed: {e}") from e
227
-
228
- def mdx_separator(audio, model, hop_length, seg_size, overlap, denoise, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
229
- """Separate audio using MDX-NET model."""
230
- stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem)
231
- print_message(audio, model)
232
- try:
233
- out_dir = prepare_output_dir(audio, out_dir)
234
- separator = Separator(
235
- log_level=logging.WARNING,
236
- model_file_dir=model_dir,
237
- output_dir=out_dir,
238
- output_format=out_format,
239
- normalization_threshold=norm_thresh,
240
- amplification_threshold=amp_thresh,
241
- use_autocast=use_autocast,
242
- mdx_params={
243
- "hop_length": hop_length,
244
- "segment_size": seg_size,
245
- "overlap": overlap,
246
- "batch_size": batch_size,
247
- "enable_denoise": denoise,
248
- }
249
- )
250
-
251
- progress(0.2, desc="Model loaded...")
252
- separator.load_model(model_filename=model)
253
-
254
- progress(0.7, desc="Audio separated...")
255
- separation = separator.separate(audio, stemname)
256
- print(f"Separation complete!\nResults: {', '.join(separation)}")
257
-
258
- stems = [os.path.join(out_dir, file_name) for file_name in separation]
259
- return stems[0], stems[1]
260
- except Exception as e:
261
- raise RuntimeError(f"MDX-NET separation failed: {e}") from e
262
-
263
- def vr_separator(audio, model, window_size, aggression, tta, post_process, post_process_threshold, high_end_process, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
264
- """Separate audio using VR ARCH model."""
265
- stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem)
266
- print_message(audio, model)
267
- try:
268
- out_dir = prepare_output_dir(audio, out_dir)
269
- separator = Separator(
270
- log_level=logging.WARNING,
271
- model_file_dir=model_dir,
272
- output_dir=out_dir,
273
- output_format=out_format,
274
- normalization_threshold=norm_thresh,
275
- amplification_threshold=amp_thresh,
276
- use_autocast=use_autocast,
277
- vr_params={
278
- "batch_size": batch_size,
279
- "window_size": window_size,
280
- "aggression": aggression,
281
- "enable_tta": tta,
282
- "enable_post_process": post_process,
283
- "post_process_threshold": post_process_threshold,
284
- "high_end_process": high_end_process,
285
- }
286
- )
287
-
288
- progress(0.2, desc="Model loaded...")
289
- separator.load_model(model_filename=model)
290
-
291
- progress(0.7, desc="Audio separated...")
292
- separation = separator.separate(audio, stemname)
293
- print(f"Separation complete!\nResults: {', '.join(separation)}")
294
-
295
- stems = [os.path.join(out_dir, file_name) for file_name in separation]
296
- return stems[0], stems[1]
297
- except Exception as e:
298
- raise RuntimeError(f"VR ARCH separation failed: {e}") from e
299
-
300
- def demucs_separator(audio, model, seg_size, shifts, overlap, segments_enabled, model_dir, out_dir, out_format, norm_thresh, amp_thresh, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
301
- """Separate audio using Demucs model."""
302
- stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem)
303
- print_message(audio, model)
304
- try:
305
- out_dir = prepare_output_dir(audio, out_dir)
306
- separator = Separator(
307
- log_level=logging.WARNING,
308
- model_file_dir=model_dir,
309
- output_dir=out_dir,
310
- output_format=out_format,
311
- normalization_threshold=norm_thresh,
312
- amplification_threshold=amp_thresh,
313
- use_autocast=use_autocast,
314
- demucs_params={
315
- "segment_size": seg_size,
316
- "shifts": shifts,
317
- "overlap": overlap,
318
- "segments_enabled": segments_enabled,
319
- }
320
- )
321
-
322
- progress(0.2, desc="Model loaded...")
323
- separator.load_model(model_filename=model)
324
-
325
- progress(0.7, desc="Audio separated...")
326
- separation = separator.separate(audio, stemname)
327
- print(f"Separation complete!\nResults: {', '.join(separation)}")
328
-
329
- stems = [os.path.join(out_dir, file_name) for file_name in separation]
330
-
331
- if model == "htdemucs_6s.yaml":
332
- return stems[0], stems[1], stems[2], stems[3], stems[4], stems[5]
333
- else:
334
- return stems[0], stems[1], stems[2], stems[3], None, None
335
- except Exception as e:
336
- raise RuntimeError(f"Demucs separation failed: {e}") from e
337
-
338
- def update_stems(model):
339
- if model == "htdemucs_6s.yaml":
340
- return gr.update(visible=True)
341
- else:
342
- return gr.update(visible=False)
343
-
344
- with gr.Blocks(
345
- title="🎵 PolUVR 🎵",
346
- css="footer{display:none !important}",
347
- theme=gr.themes.Default(
348
- spacing_size="sm",
349
- radius_size="lg",
350
- )
351
- ) as app:
352
- gr.HTML("<h1> 🎵 PolUVR 🎵 </h1>")
353
-
354
- with gr.Tab("Roformer"):
355
- with gr.Group():
356
- with gr.Row():
357
- roformer_model = gr.Dropdown(label="Select the Model", choices=list(ROFORMER_MODELS.keys()))
358
- with gr.Row():
359
- roformer_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
360
- roformer_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
361
- roformer_overlap = gr.Slider(minimum=2, maximum=10, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Lower is better but slower.")
362
- roformer_pitch_shift = gr.Slider(minimum=-12, maximum=12, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
363
- with gr.Row():
364
- roformer_audio = gr.Audio(label="Input Audio", type="filepath")
365
- with gr.Row():
366
- roformer_button = gr.Button("Separate!", variant="primary")
367
- with gr.Row():
368
- roformer_stem1 = gr.Audio(label="Stem 1", type="filepath", interactive=False)
369
- roformer_stem2 = gr.Audio(label="Stem 2", type="filepath", interactive=False)
370
-
371
- with gr.Tab("MDX23C"):
372
- with gr.Group():
373
- with gr.Row():
374
- mdx23c_model = gr.Dropdown(label="Select the Model", choices=MDX23C_MODELS)
375
- with gr.Row():
376
- mdx23c_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
377
- mdx23c_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
378
- mdx23c_overlap = gr.Slider(minimum=2, maximum=50, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
379
- mdx23c_pitch_shift = gr.Slider(minimum=-12, maximum=12, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
380
- with gr.Row():
381
- mdx23c_audio = gr.Audio(label="Input Audio", type="filepath")
382
- with gr.Row():
383
- mdx23c_button = gr.Button("Separate!", variant="primary")
384
- with gr.Row():
385
- mdx23c_stem1 = gr.Audio(label="Stem 1", type="filepath", interactive=False)
386
- mdx23c_stem2 = gr.Audio(label="Stem 2", type="filepath", interactive=False)
387
-
388
- with gr.Tab("MDX-NET"):
389
- with gr.Group():
390
- with gr.Row():
391
- mdx_model = gr.Dropdown(label="Select the Model", choices=MDXNET_MODELS)
392
- with gr.Row():
393
- mdx_hop_length = gr.Slider(minimum=32, maximum=2048, step=32, value=1024, label="Hop Length", info="Usually called stride in neural networks; only change if you know what you're doing.")
394
- mdx_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
395
- mdx_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
396
- mdx_denoise = gr.Checkbox(value=False, label="Denoise", info="Enable denoising after separation.")
397
- with gr.Row():
398
- mdx_audio = gr.Audio(label="Input Audio", type="filepath")
399
- with gr.Row():
400
- mdx_button = gr.Button("Separate!", variant="primary")
401
- with gr.Row():
402
- mdx_stem1 = gr.Audio(label="Stem 1", type="filepath", interactive=False)
403
- mdx_stem2 = gr.Audio(label="Stem 2", type="filepath", interactive=False)
404
-
405
- with gr.Tab("VR ARCH"):
406
- with gr.Group():
407
- with gr.Row():
408
- vr_model = gr.Dropdown(label="Select the Model", choices=VR_ARCH_MODELS)
409
- with gr.Row():
410
- vr_window_size = gr.Slider(minimum=320, maximum=1024, step=32, value=512, label="Window Size", info="Balance quality and speed. 1024 = fast but lower, 320 = slower but better quality.")
411
- vr_aggression = gr.Slider(minimum=1, maximum=50, step=1, value=5, label="Agression", info="Intensity of primary stem extraction.")
412
- vr_tta = gr.Checkbox(value=False, label="TTA", info="Enable Test-Time-Augmentation; slow but improves quality.")
413
- vr_post_process = gr.Checkbox(value=False, label="Post Process", info="Identify leftover artifacts within vocal output; may improve separation for some songs.")
414
- vr_post_process_threshold = gr.Slider(minimum=0.1, maximum=0.3, step=0.1, value=0.2, label="Post Process Threshold", info="Threshold for post-processing.")
415
- vr_high_end_process = gr.Checkbox(value=False, label="High End Process", info="Mirror the missing frequency range of the output.")
416
- with gr.Row():
417
- vr_audio = gr.Audio(label="Input Audio", type="filepath")
418
- with gr.Row():
419
- vr_button = gr.Button("Separate!", variant="primary")
420
- with gr.Row():
421
- vr_stem1 = gr.Audio(label="Stem 1", type="filepath", interactive=False)
422
- vr_stem2 = gr.Audio(label="Stem 2", type="filepath", interactive=False)
423
-
424
- with gr.Tab("Demucs"):
425
- with gr.Group():
426
- with gr.Row():
427
- demucs_model = gr.Dropdown(label="Select the Model", choices=DEMUCS_MODELS)
428
- with gr.Row():
429
- demucs_seg_size = gr.Slider(minimum=1, maximum=100, step=1, value=40, label="Segment Size", info="Size of segments into which the audio is split. Higher = slower but better quality.")
430
- demucs_shifts = gr.Slider(minimum=0, maximum=20, step=1, value=2, label="Shifts", info="Number of predictions with random shifts, higher = slower but better quality.")
431
- demucs_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Overlap between prediction windows. Higher = slower but better quality.")
432
- demucs_segments_enabled = gr.Checkbox(value=True, label="Segment-wise processing", info="Enable segment-wise processing.")
433
- with gr.Row():
434
- demucs_audio = gr.Audio(label="Input Audio", type="filepath")
435
- with gr.Row():
436
- demucs_button = gr.Button("Separate!", variant="primary")
437
- with gr.Row():
438
- demucs_stem1 = gr.Audio(label="Stem 1", type="filepath", interactive=False)
439
- demucs_stem2 = gr.Audio(label="Stem 2", type="filepath", interactive=False)
440
- with gr.Row():
441
- demucs_stem3 = gr.Audio(label="Stem 3", type="filepath", interactive=False)
442
- demucs_stem4 = gr.Audio(label="Stem 4", type="filepath", interactive=False)
443
- with gr.Row(visible=False) as stem6:
444
- demucs_stem5 = gr.Audio(label="Stem 5", type="filepath", interactive=False)
445
- demucs_stem6 = gr.Audio(label="Stem 6", type="filepath", interactive=False)
446
-
447
- with gr.Tab("Settings"):
448
- with gr.Accordion("General settings", open=False):
449
- with gr.Group():
450
- model_file_dir = gr.Textbox(value="/tmp/PolUVR-models/", label="Directory to cache model files", info="The directory where model files are stored.", placeholder="/tmp/PolUVR-models/")
451
- with gr.Row():
452
- output_dir = gr.Textbox(value="output", label="File output directory", info="The directory where output files will be saved.", placeholder="output")
453
- output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.")
454
- with gr.Row():
455
- norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
456
- amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
457
- with gr.Row():
458
- batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
459
-
460
- with gr.Accordion("Rename Stems", open=False):
461
- gr.HTML("<h3> `{base_name}` - input file name </h3>")
462
- with gr.Row():
463
- vocals_stem = gr.Textbox(value="{base_name}_(Vocals)", label="Vocals Stem", placeholder="{base_name}_(Vocals)")
464
- instrumental_stem = gr.Textbox(value="{base_name}_(Instrumental)", label="Instrumental Stem", placeholder="{base_name}_(Instrumental)")
465
- other_stem = gr.Textbox(value="{base_name}_(Other)", label="Other Stem", placeholder="{base_name}_(Other)")
466
- with gr.Row():
467
- drums_stem = gr.Textbox(value="{base_name}_(Drums)", label="Drums Stem", placeholder="{base_name}_(Drums)")
468
- bass_stem = gr.Textbox(value="{base_name}_(Bass)", label="Bass Stem", placeholder="{base_name}_(Bass)")
469
- with gr.Row():
470
- guitar_stem = gr.Textbox(value="{base_name}_(Guitar)", label="Guitar Stem", placeholder="{base_name}_(Guitar)")
471
- piano_stem = gr.Textbox(value="{base_name}_(Piano)", label="Piano Stem", placeholder="{base_name}_(Piano)")
472
-
473
- demucs_model.change(update_stems, inputs=[demucs_model], outputs=stem6)
474
-
475
- roformer_button.click(
476
- roformer_separator,
477
- inputs=[
478
- roformer_audio,
479
- roformer_model,
480
- roformer_seg_size,
481
- roformer_override_seg_size,
482
- roformer_overlap,
483
- roformer_pitch_shift,
484
- model_file_dir,
485
- output_dir,
486
- output_format,
487
- norm_threshold,
488
- amp_threshold,
489
- batch_size,
490
- vocals_stem,
491
- instrumental_stem,
492
- other_stem,
493
- drums_stem,
494
- bass_stem,
495
- guitar_stem,
496
- piano_stem,
497
- ],
498
- outputs=[roformer_stem1, roformer_stem2],
499
- )
500
- mdx23c_button.click(
501
- mdx23c_separator,
502
- inputs=[
503
- mdx23c_audio,
504
- mdx23c_model,
505
- mdx23c_seg_size,
506
- mdx23c_override_seg_size,
507
- mdx23c_overlap,
508
- mdx23c_pitch_shift,
509
- model_file_dir,
510
- output_dir,
511
- output_format,
512
- norm_threshold,
513
- amp_threshold,
514
- batch_size,
515
- vocals_stem,
516
- instrumental_stem,
517
- other_stem,
518
- drums_stem,
519
- bass_stem,
520
- guitar_stem,
521
- piano_stem,
522
- ],
523
- outputs=[mdx23c_stem1, mdx23c_stem2],
524
- )
525
- mdx_button.click(
526
- mdx_separator,
527
- inputs=[
528
- mdx_audio,
529
- mdx_model,
530
- mdx_hop_length,
531
- mdx_seg_size,
532
- mdx_overlap,
533
- mdx_denoise,
534
- model_file_dir,
535
- output_dir,
536
- output_format,
537
- norm_threshold,
538
- amp_threshold,
539
- batch_size,
540
- vocals_stem,
541
- instrumental_stem,
542
- other_stem,
543
- drums_stem,
544
- bass_stem,
545
- guitar_stem,
546
- piano_stem,
547
- ],
548
- outputs=[mdx_stem1, mdx_stem2],
549
- )
550
- vr_button.click(
551
- vr_separator,
552
- inputs=[
553
- vr_audio,
554
- vr_model,
555
- vr_window_size,
556
- vr_aggression,
557
- vr_tta,
558
- vr_post_process,
559
- vr_post_process_threshold,
560
- vr_high_end_process,
561
- model_file_dir,
562
- output_dir,
563
- output_format,
564
- norm_threshold,
565
- amp_threshold,
566
- batch_size,
567
- vocals_stem,
568
- instrumental_stem,
569
- other_stem,
570
- drums_stem,
571
- bass_stem,
572
- guitar_stem,
573
- piano_stem,
574
- ],
575
- outputs=[vr_stem1, vr_stem2],
576
- )
577
- demucs_button.click(
578
- demucs_separator,
579
- inputs=[
580
- demucs_audio,
581
- demucs_model,
582
- demucs_seg_size,
583
- demucs_shifts,
584
- demucs_overlap,
585
- demucs_segments_enabled,
586
- model_file_dir,
587
- output_dir,
588
- output_format,
589
- norm_threshold,
590
- amp_threshold,
591
- vocals_stem,
592
- instrumental_stem,
593
- other_stem,
594
- drums_stem,
595
- bass_stem,
596
- guitar_stem,
597
- piano_stem,
598
- ],
599
- outputs=[demucs_stem1, demucs_stem2, demucs_stem3, demucs_stem4, demucs_stem5, demucs_stem6],
600
- )
601
-
602
- def main():
603
- app.launch(share=True)
604
-
605
- if __name__ == "__main__":
606
- main()