File size: 2,319 Bytes
4d9475f
4712ee3
 
4d9475f
4712ee3
4d9475f
 
 
4712ee3
 
 
4d9475f
 
4712ee3
 
 
 
 
 
4d9475f
4712ee3
 
 
 
 
 
 
 
 
 
4d9475f
 
4712ee3
 
 
4d9475f
4712ee3
4d9475f
 
4712ee3
4d9475f
 
 
 
4712ee3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import os
import tempfile
import gradio as gr
from TTS.utils.synthesizer import Synthesizer
from huggingface_hub import hf_hub_download

# Define constants
MODEL_INFO = [
    # ["Model Name", "Model File", "Config File", "Hub URL"]
    ["vits-espeak-57000", "checkpoint_57000.pth", "config.json", "mhrahmani/persian-tts-vits-0"],
    # Add other models similarly...
]

# Extract model names from MODEL_INFO
MODEL_NAMES = [info[0] for info in MODEL_INFO]

MAX_TXT_LEN = 400
TOKEN = os.environ.get('HUGGING_FACE_HUB_TOKEN')  # Replace with the environment variable containing your token, if different

# Download models
for model_name, model_file, config_file, repo_name in MODEL_INFO:
    os.makedirs(model_name, exist_ok=True)
    print(f"|> Downloading: {model_name}")
    
    # Use hf_hub_download to download models from Hugging Face repositories
    hf_hub_download(repo_id=repo_name, filename=model_file, cache_dir=model_name, use_auth_token=TOKEN)
    hf_hub_download(repo_id=repo_name, filename=config_file, cache_dir=model_name, use_auth_token=TOKEN)

def synthesize(text: str, model_name: str) -> str:
    """Synthesize speech using the selected model."""
    if len(text) > MAX_TXT_LEN:
        text = text[:MAX_TXT_LEN]
        print(f"Input text was cut off as it exceeded the {MAX_TXT_LEN} character limit.")
    
    synthesizer = Synthesizer(f"{model_name}/{model_file}", f"{model_name}/{config_file}")
    if synthesizer is None:
        raise NameError("Model not found")
    
    wavs = synthesizer.tts(text)
    
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
        synthesizer.save_wav(wavs, fp)
        return fp.name

# Define Gradio interface
iface = gr.Interface(
    fn=synthesize,
    inputs=[
        gr.Textbox(label="Enter Text to Synthesize:", value="زین همرهان سست عناصر، دلم گرفت."),
        gr.Radio(label="Pick a Model", choices=MODEL_NAMES, value=MODEL_NAMES[0]),
    ],
    outputs=gr.Audio(label="Output", type='filepath'),
    examples=[["زین همرهان سست عناصر، دلم گرفت.", MODEL_NAMES[0]]],
    title='persian tts playground',
    description="Persian text to speech model demo",  # Add the required description here.
    article="",
    live=False
)

# Launch the interface
iface.launch(share=False)