Spaces:
Build error
Build error
Create tts.py
Browse files
tts.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torchaudio
|
3 |
+
from fairseq.checkpoint_utils import load_model_ensemble_and_task
|
4 |
+
from fairseq.models.text_to_speech import CodeHiFiGANVocoder
|
5 |
+
from fairseq.models.text_to_speech.hub_interface import TTSHubInterface
|
6 |
+
|
7 |
+
class TortoiseTTS:
|
8 |
+
def __init__(self, device='cpu'):
|
9 |
+
self.device = device
|
10 |
+
self.models, self.cfg, self.task = load_model_ensemble_and_task(
|
11 |
+
["checkpoints/tts_model.pt"],
|
12 |
+
arg_overrides={"vocoder": "hifigan", "fp16": False}
|
13 |
+
)
|
14 |
+
self.model = self.models[0].to(device)
|
15 |
+
self.vocoder = CodeHiFiGANVocoder("checkpoints/vocoder.pt", model_cfg=self.cfg).to(device)
|
16 |
+
TTSHubInterface.update_cfg_with_data_cfg(self.cfg, self.task.data_cfg)
|
17 |
+
|
18 |
+
def text_to_speech(self, text, speaker='russian'):
|
19 |
+
sample = TTSHubInterface.get_model_input(self.task, text)
|
20 |
+
sample['net_input']['src_tokens'] = sample['net_input']['src_tokens'].to(self.device)
|
21 |
+
|
22 |
+
with torch.no_grad():
|
23 |
+
wav, rate = TTSHubInterface.get_prediction(
|
24 |
+
self.model,
|
25 |
+
self.vocoder,
|
26 |
+
sample,
|
27 |
+
speaker=speaker
|
28 |
+
)
|
29 |
+
return wav.cpu().numpy(), rate
|