VITS-Aatrox-AI

Build error

EDGAhab commited on Jan 10, 2023

Commit

2d2fdc9

1 Parent(s): a18c90f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,6 +9,17 @@ from models import SynthesizerTrn
 from text.symbols import symbols
 from text import text_to_sequence
 def get_text(text, hps):
     text_norm = text_to_sequence(text, hps.data.text_cleaners)
     if hps.data.add_blank:
@@ -16,7 +27,6 @@ def get_text(text, hps):
     text_norm = torch.LongTensor(text_norm)
     return text_norm
 hps = utils.get_hparams_from_file("configs/biaobei_base.json")
 net_g = SynthesizerTrn(
@@ -26,7 +36,6 @@ net_g = SynthesizerTrn(
     **hps.model)
 _ = net_g.eval()
-# _ = utils.load_checkpoint("logs/woman_csmsc/G_100000.pth", net_g, None)
 _ = utils.load_checkpoint("G_aatrox.pth", net_g, None)
 def vc_fn(input):
@@ -34,10 +43,6 @@ def vc_fn(input):
     with torch.no_grad():
         x_tst = stn_tst.unsqueeze(0)
         x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
-        # x_tst = stn_tst.cpu().unsqueeze(0)
-        # x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).cpu()
         audio = net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][0,0].data.cpu().float().numpy()
     sampling_rate = 22050
     return (sampling_rate, audio)

 from text.symbols import symbols
 from text import text_to_sequence
+%matplotlib inline
+import matplotlib.pyplot as plt
+import json
+import math
+from torch import nn
+from torch.nn import functional as F
+from torch.utils.data import DataLoader
+from data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate
+from scipy.io.wavfile import write
 def get_text(text, hps):
     text_norm = text_to_sequence(text, hps.data.text_cleaners)
     if hps.data.add_blank:
     text_norm = torch.LongTensor(text_norm)
     return text_norm
 hps = utils.get_hparams_from_file("configs/biaobei_base.json")
 net_g = SynthesizerTrn(
     **hps.model)
 _ = net_g.eval()
 _ = utils.load_checkpoint("G_aatrox.pth", net_g, None)
 def vc_fn(input):
     with torch.no_grad():
         x_tst = stn_tst.unsqueeze(0)
         x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
         audio = net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][0,0].data.cpu().float().numpy()
     sampling_rate = 22050
     return (sampling_rate, audio)