Kremon96 commited on
Commit
f652dae
·
verified ·
1 Parent(s): 12cef9b

Create tts.py

Browse files
Files changed (1) hide show
  1. tts.py +29 -0
tts.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchaudio
3
+ from fairseq.checkpoint_utils import load_model_ensemble_and_task
4
+ from fairseq.models.text_to_speech import CodeHiFiGANVocoder
5
+ from fairseq.models.text_to_speech.hub_interface import TTSHubInterface
6
+
7
+ class TortoiseTTS:
8
+ def __init__(self, device='cpu'):
9
+ self.device = device
10
+ self.models, self.cfg, self.task = load_model_ensemble_and_task(
11
+ ["checkpoints/tts_model.pt"],
12
+ arg_overrides={"vocoder": "hifigan", "fp16": False}
13
+ )
14
+ self.model = self.models[0].to(device)
15
+ self.vocoder = CodeHiFiGANVocoder("checkpoints/vocoder.pt", model_cfg=self.cfg).to(device)
16
+ TTSHubInterface.update_cfg_with_data_cfg(self.cfg, self.task.data_cfg)
17
+
18
+ def text_to_speech(self, text, speaker='russian'):
19
+ sample = TTSHubInterface.get_model_input(self.task, text)
20
+ sample['net_input']['src_tokens'] = sample['net_input']['src_tokens'].to(self.device)
21
+
22
+ with torch.no_grad():
23
+ wav, rate = TTSHubInterface.get_prediction(
24
+ self.model,
25
+ self.vocoder,
26
+ sample,
27
+ speaker=speaker
28
+ )
29
+ return wav.cpu().numpy(), rate