# coding=utf-8 import time import gradio as gr import librosa import numpy as np import soundfile import io from inference.infer_tool import Svc import logging logging.getLogger("numba").setLevel(logging.WARNING) model_path = "model/model.pth" config_path = "model/config.json" svc_model = Svc(model_path, config_path) def sovits(input_audio, vc_transform): start = time.perf_counter() if input_audio is None: return "请上传音频", None, None sampling_rate, audio = input_audio duration = audio.shape[0] / sampling_rate if duration > 30: return "请上传小于30s的音频,长音频的转换请在本地进行", None, None audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32) if len(audio.shape) > 1: audio = librosa.to_mono(audio.transpose(1, 0)) if sampling_rate != 24000: audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=24000) out_wav_path = io.BytesIO() soundfile.write(out_wav_path, audio, 24000, format="wav") out_wav_path.seek(0) sid = 'xiaoke' out_audio, out_sr = svc_model.infer(sid, vc_transform, out_wav_path) _audio = out_audio.cpu().numpy() return "生成成功!", (48000, _audio), f"生成耗时 {round(time.perf_counter()-start, 2)} s" download_audio_js = """ () =>{{ let root = document.querySelector("body > gradio-app"); if (root.shadowRoot != null) root = root.shadowRoot; let audio = root.querySelector("#{audio_id}").querySelector("audio"); if (audio == undefined) return; audio = audio.src; let oA = document.createElement("a"); oA.download = Math.floor(Math.random()*100000000)+'.wav'; oA.href = audio; document.body.appendChild(oA); oA.click(); oA.remove(); }} """ if __name__ == '__main__': with gr.Blocks() as app: gr.Markdown( "#
AI小可\n" "
输入音频应为尽可能干净的人声
" '
可使用UVR5/demucs分离人声和BGM
' ) with gr.Row(): with gr.Column(): input_audio = gr.inputs.Audio(label="待转换音频") vc = gr.inputs.Number(label="音高调整", default=0) btn = gr.Button(value="生成") with gr.Column(): o1 = gr.Textbox(label="Output Message") o2 = gr.Audio(label="Output Audio", elem_id=f"vc-audio") o3 = gr.Textbox(label="Extra Info") download = gr.Button("Download Audio") btn.click(sovits, inputs=[input_audio, vc], outputs=[o1, o2, o3]) download.click(None, [], [], _js=download_audio_js.format(audio_id=f"vc-audio")) with gr.Row(): gr.Examples( label="示例", examples=[ ["examples/你够不够我这样洒脱.wav", 0], ["examples/CREAM - Girl Like Me_(Vocals).wav", 0], ["examples/何嘉嘉Gaga - 难念的经_(Vocals).wav", 0], ["examples/何嘉嘉Gaga - 难念的经2_(Vocals).wav", 0], ], inputs=[input_audio, vc], outputs=[o1, o2, o3], fn=sovits, cache_examples=True) app.queue(concurrency_count=1).launch()