Cloning_Box

Running

App Files Files Community

Kremon96 commited on 14 days ago

Commit

14b38ac

verified ·

1 Parent(s): f652dae

Create rvc_infer.py

Browse files

Files changed (1) hide show

rvc_infer.py +64 -0

rvc_infer.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import os
+import numpy as np
+import torch
+import torch.nn as nn
+import torchaudio
+import librosa
+import pyworld as pw
+from scipy.io import wavfile
+class RVCModel:
+    def __init__(self, model_path, index_path, device='cpu'):
+        self.device = device
+        self.model = self.load_model(model_path)
+        self.index = self.load_index(index_path)
+        self.sr = 16000
+        self.hop_length = 160
+    def load_model(self, path):
+        state_dict = torch.load(path, map_location=self.device)
+        model = nn.Sequential(
+            nn.Conv1d(128, 512, 3, padding=1),
+            nn.ReLU(),
+            nn.Conv1d(512, 512, 3, padding=1),
+            nn.ReLU(),
+            nn.Conv1d(512, 128, 3, padding=1)
+        )
+        model.load_state_dict(state_dict)
+        model.eval().to(self.device)
+        return model
+    def load_index(self, path):
+        if os.path.exists(path):
+            return np.load(path)
+        return None
+    def extract_features(self, audio):
+        f0, sp, ap = self.compute_pyworld(audio)
+        return torch.from_numpy(f0).float().to(self.device), sp, ap
+    def compute_pyworld(self, audio):
+        audio = audio.astype(np.float64)
+        f0, t = pw.harvest(audio, self.sr)
+        sp = pw.cheaptrick(audio, f0, t, self.sr)
+        ap = pw.d4c(audio, f0, t, self.sr)
+        return f0, sp, ap
+    def infer(self, audio):
+        f0, sp, ap = self.extract_features(audio)
+        sp = torch.from_numpy(sp).float().permute(1,0).unsqueeze(0).to(self.device)
+        with torch.no_grad():
+            converted = self.model(sp)
+        converted = converted.squeeze(0).permute(1,0).cpu().numpy()
+        return self.reconstruct_audio(f0, converted, ap)
+    def reconstruct_audio(self, f0, sp, ap):
+        y = pw.synthesize(
+            f0.flatten().astype(np.float64),
+            sp.astype(np.float64),
+            ap.astype(np.float64),
+            self.sr
+        )
+        return np.nan_to_num(y).astype(np.float32)