Kremon96 commited on
Commit
14b38ac
·
verified ·
1 Parent(s): f652dae

Create rvc_infer.py

Browse files
Files changed (1) hide show
  1. rvc_infer.py +64 -0
rvc_infer.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import torch
4
+ import torch.nn as nn
5
+ import torchaudio
6
+ import librosa
7
+ import pyworld as pw
8
+ from scipy.io import wavfile
9
+
10
+ class RVCModel:
11
+ def __init__(self, model_path, index_path, device='cpu'):
12
+ self.device = device
13
+ self.model = self.load_model(model_path)
14
+ self.index = self.load_index(index_path)
15
+ self.sr = 16000
16
+ self.hop_length = 160
17
+
18
+ def load_model(self, path):
19
+ state_dict = torch.load(path, map_location=self.device)
20
+ model = nn.Sequential(
21
+ nn.Conv1d(128, 512, 3, padding=1),
22
+ nn.ReLU(),
23
+ nn.Conv1d(512, 512, 3, padding=1),
24
+ nn.ReLU(),
25
+ nn.Conv1d(512, 128, 3, padding=1)
26
+ )
27
+ model.load_state_dict(state_dict)
28
+ model.eval().to(self.device)
29
+ return model
30
+
31
+ def load_index(self, path):
32
+ if os.path.exists(path):
33
+ return np.load(path)
34
+ return None
35
+
36
+ def extract_features(self, audio):
37
+ f0, sp, ap = self.compute_pyworld(audio)
38
+ return torch.from_numpy(f0).float().to(self.device), sp, ap
39
+
40
+ def compute_pyworld(self, audio):
41
+ audio = audio.astype(np.float64)
42
+ f0, t = pw.harvest(audio, self.sr)
43
+ sp = pw.cheaptrick(audio, f0, t, self.sr)
44
+ ap = pw.d4c(audio, f0, t, self.sr)
45
+ return f0, sp, ap
46
+
47
+ def infer(self, audio):
48
+ f0, sp, ap = self.extract_features(audio)
49
+ sp = torch.from_numpy(sp).float().permute(1,0).unsqueeze(0).to(self.device)
50
+
51
+ with torch.no_grad():
52
+ converted = self.model(sp)
53
+
54
+ converted = converted.squeeze(0).permute(1,0).cpu().numpy()
55
+ return self.reconstruct_audio(f0, converted, ap)
56
+
57
+ def reconstruct_audio(self, f0, sp, ap):
58
+ y = pw.synthesize(
59
+ f0.flatten().astype(np.float64),
60
+ sp.astype(np.float64),
61
+ ap.astype(np.float64),
62
+ self.sr
63
+ )
64
+ return np.nan_to_num(y).astype(np.float32)