File size: 1,927 Bytes
c09ad18
 
e954652
 
c09ad18
 
b5daf8c
c09ad18
 
 
e954652
1f705ff
3d7c8ba
 
1f705ff
bee8f8a
1f705ff
5b90064
c09ad18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f705ff
 
 
c09ad18
1f705ff
c09ad18
5b90064
c09ad18
 
 
 
 
 
 
 
 
e954652
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import os
import torch
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer
import tempfile
from typing import Optional

# 🛠️ Use Model Manager to load vocoders
MODELS = {}
manager = ModelManager()

MODEL_NAMES = [
    "en/ljspeech/glow-tts",
    "en/ljspeech/speedy-speech-wn",
]

for MODEL_NAME in MODEL_NAMES:
    print(f"🚀 Downloading {MODEL_NAME}... because waiting is fun!")
    
    try:
        model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
        vocoder_name: Optional[str] = model_item["default_vocoder"]
        vocoder_path = None
        vocoder_config_path = None
        
        if vocoder_name is not None:
            vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
        
        # 🧙‍♂️ Load the synthesizer with vocoder and safe loading of weights
        synthesizer = Synthesizer(
            model_path, 
            config_path, 
            None, 
            vocoder_path, 
            vocoder_config_path,
            use_cuda=False  # Make sure you're not forcing CUDA unless needed
        )
        
        MODELS[MODEL_NAME] = synthesizer
        
    except Exception as e:
        print(f"😬 Failed to load model {MODEL_NAME}: {str(e)}")
        continue

# 🗣️ Text to Speech (because speaking is fun, but robots do it better)
def tts(text: str, model_name: str):
    print(text, model_name)
    synthesizer = MODELS.get(model_name, None)
    
    if synthesizer is None:
        raise NameError("Model not found, check if it's loaded properly!")
    
    try:
        wavs = synthesizer.tts(text)
        
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
            synthesizer.save_wav(wavs, fp)
            return fp.name
    except Exception as e:
        print(f"😬 Error generating speech: {str(e)}")
        return None