File size: 1,414 Bytes
c148b99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import gradio as gr
from pathlib import Path

from utils.hparams import hparams
from preprocessing.data_gen_utils import get_pitch_parselmouth, get_pitch_crepe
import numpy as np
import matplotlib.pyplot as plt
import librosa
import utils
import touchcrepe
from infer import *
import logging

import tempfile

from infer_tools.infer_tool import *

# 모델 가져오기
project_name = "duckaloid" 
model_path = "model_ckpt_steps_50000.ckpt"
config_path="config.yaml"
hubert_gpu=False
svc_model = Svc(project_name,config_path,hubert_gpu, model_path)

pndm_speedup = 20
add_noise_step = 500
thre = 0.05
use_crepe = False
use_pe = False
use_gt_mel = False


def infer(audios, key):
    #save audios into local storage
    file_path = Path(tempfile.mkdtemp()) / "input.wav"
    librosa.output.write_wav(file_path, audios)

    demoaudio, sr = librosa.load("input.wav")
    # infer
    f0_tst, f0_pred, audio = run_clip(svc_model,file_path="input.wav", key=key, acc=pndm_speedup, use_crepe=use_crepe, use_pe=use_pe, thre=thre,
                                            use_gt_mel=use_gt_mel, add_noise_step=add_noise_step,project_name=project_name,out_path="output.wav")
    
    # return file
    return audio
    

iface = gr.Interface(
    fn=infer,
    inputs=[gr.inputs.Audio(source="microphone", type="numpy", label="Audio Input"),gr.Slider(minimum=-12, maximum=12, step=1)],
    outputs="audio")
iface.launch()