diff-SVC / app.py
10kwon
์šฐํƒ€์šฐํƒ€์šฐํƒ€์šฐ์ธ์ค„~
362ca75
import gradio as gr
from pathlib import Path
from utils.hparams import hparams
from preprocessing.data_gen_utils import get_pitch_parselmouth, get_pitch_crepe
import numpy as np
import matplotlib.pyplot as plt
import librosa
import utils
from infer import *
import logging
import tempfile
from infer_tools.infer_tool import *
# ๋ชจ๋ธ ๊ฐ€์ ธ์˜ค๊ธฐ
project_name = "duckaloid"
model_path = "model_ckpt_steps_50000.ckpt"
config_path="config.yaml"
hubert_gpu=False
svc_model = Svc(project_name,config_path,hubert_gpu, model_path)
pndm_speedup = 20
add_noise_step = 500
thre = 0.05
use_crepe = False
use_pe = False
use_gt_mel = False
def infer(audios, key):
#save audios into local storage
file_path = Path(tempfile.mkdtemp()) / "input.wav"
librosa.output.write_wav(file_path, audios)
demoaudio, sr = librosa.load("input.wav")
# infer
f0_tst, f0_pred, audio = run_clip(svc_model,file_path="input.wav", key=key, acc=pndm_speedup, use_crepe=use_crepe, use_pe=use_pe, thre=thre,
use_gt_mel=use_gt_mel, add_noise_step=add_noise_step,project_name=project_name,out_path="output.wav")
# return file
return audio
iface = gr.Interface(
fn=infer,
inputs=[gr.inputs.Audio(source="microphone", type="numpy", label="Audio Input"),gr.Slider(minimum=-12, maximum=12, step=1)],
outputs="audio")
iface.launch()