import gradio as gr import subprocess import os import soundfile as sf from pathlib import Path import separate def audio_model_inference(file, output_folder, model_path, denoise, margin, chunks, n_fft, dim_t, dim_f): filename = str(Path(file)).split('/')[-1] # 执行调用 audio_worker = separate.Predictor(args={ "files": Path(file), "output": Path(output_folder), "model_path": Path(model_path), "denoise": denoise, "margin": margin, "chunks": chunks, "n_fft": n_fft, "dim_t": dim_t, "dim_f": dim_f }) vocals, no_vocals, sampling_rate = audio_worker.predict(files) sf.write(os.path.join(output_folder, mp3_filename + "_no_vocals.wav"), no_vocals, sampling_rate) sf.write(os.path.join(output_folder, mp3_filename + "_vocals.wav"), vocals, sampling_rate) # 生成分离后的文件名 vocals_filename = f"{os.path.splitext(filename)[0]}_vocals.wav" no_vocals_filename = f"{os.path.splitext(filename)[0]}_no_vocals.wav" # 读取输出文件 vocals_file = f"{os.path.splitext(os.path.basename(files[0]))[0]}_vocals.wav" no_vocals_file = f"{os.path.splitext(os.path.basename(files[0]))[0]}_no_vocals.wav" vocals_path = os.path.join(output_folder, vocals_file) no_vocals_path = os.path.join(output_folder, no_vocals_file) # 确保文件存在 if not os.path.exists(vocals_path) or not os.path.exists(no_vocals_path): return "错误:输出文件未找到。" # 读取音频文件 vocals_audio = open(vocals_path, 'rb').read() no_vocals_audio = open(no_vocals_path, 'rb').read() return (vocals_audio, no_vocals_audio) # Gradio 界面组件 inputs = [ gr.File(label="源音频文件", type='filepath', file_count='single'), gr.Textbox(label="输出文件夹", value="./"), gr.Textbox(label="模型路径", value="./models/MDX_Net_Models/UVR-MDX-NET-Inst_HQ_3.onnx"), gr.Checkbox(label="启用降噪", value=False), gr.Number(label="边距", value=0.1), gr.Number(label="块大小", value=1024), gr.Number(label="FFT大小", value=2048), gr.Number(label="时间维度", value=512), gr.Number(label="频率维度", value=64) ] outputs = [gr.Audio(label="人声"), gr.Audio(label="无人声")] # 创建界面 iface = gr.Interface( fn=audio_model_inference, inputs=inputs, outputs=outputs, title="音频分离模型", description="上传音频文件并配置参数,使用音频分离模型处理它们。" ) iface.launch()