File size: 946 Bytes
bbdb87d
 
 
 
 
04ea122
bbdb87d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04ea122
 
c7380eb
bbdb87d
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import time
import torch
import scipy.io.wavfile
from espnet2.bin.tts_inference import Text2Speech
from espnet2.utils.types import str_or_none

tagen = 'kan-bayashi/ljspeech_vits' 
vocoder_tagen = "none" 

text2speechen = Text2Speech.from_pretrained(
    model_tag=str_or_none(tagen),
    vocoder_tag=str_or_none(vocoder_tagen),
    device="cpu",
    # Only for Tacotron 2 & Transformer
    threshold=0.5,
    # Only for Tacotron 2
    minlenratio=0.0,
    maxlenratio=10.0,
    use_att_constraint=False,
    backward_window=1,
    forward_window=3,
    # Only for FastSpeech & FastSpeech2 & VITS
    speed_control_alpha=1.0,
    # Only for VITS
    noise_scale=0.333,
    noise_scale_dur=0.333,
)


def inference(text,lang):
  with torch.no_grad():
      if lang == "english":
          wav = text2speechen(text)["wav"]
          scipy.io.wavfile.write("./audio/out.wav",text2speechen.fs , wav.view(-1).cpu().numpy())
  return  "./audio/out.wav"