🐸💬 CoquiTTS Demo Proxecto Nós

import tempfile
from typing import Optional
from TTS.config import load_config
import gradio as gr
import numpy as np
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer


MODELS = {}
SPEAKERS = {}
MAX_TXT_LEN = 100


manager = ModelManager()
MODEL_NAMES = manager.list_tts_models()

# filter out multi-speaker models and slow wavegrad vocoders
filters = ["vctk", "your_tts", "ek1"]
MODEL_NAMES = [model_name for model_name in MODEL_NAMES if not any(f in model_name for f in filters)]

EN = [el for el in MODEL_NAMES if "/en/" in el]
OTHER = [el for el in MODEL_NAMES if "/en/" not in el]
EN[0], EN[5] = EN[5], EN[0]
MODEL_NAMES = EN + OTHER

# reorder models
print(MODEL_NAMES)


def tts(text: str, model_name: str):
    if len(text) > MAX_TXT_LEN:
        text = text[:MAX_TXT_LEN]
        print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
    print(text, model_name)
    # download model
    model_path, config_path, model_item = manager.download_model(model_name)
    vocoder_name: Optional[str] = model_item["default_vocoder"]
    # download vocoder
    vocoder_path = None
    vocoder_config_path = None
    if vocoder_name is not None:
        vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
    # init synthesizer
    synthesizer = Synthesizer(
        model_path, config_path, None, None, vocoder_path, vocoder_config_path,
    )
    # synthesize
    if synthesizer is None:
        raise NameError("model not found")
    wavs = synthesizer.tts(text, None)
    # return output
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
        synthesizer.save_wav(wavs, fp)
        return fp.name


title = """<h1 align="center">🐸💬 CoquiTTS Demo Proxecto Nós </h1>"""

with gr.Blocks(analytics_enabled=False) as demo:
    with gr.Row():
        with gr.Column():
            gr.Markdown(
                    """
                    ## <img src="https://huggingface.co/spaces/proxectonos/README/resolve/main/title-card.png" width="100%" style="border-radius: 0.75rem;">
                    """
            )
        with gr.Column():
            with gr.Row():            
                gr.Markdown(
                """
                <br/>
                
                💻 Este space mostra algúns dos modelos TTS desenvolvidos polo **[Proxecto Nós](https://huggingface.co/proxectonos)**.

                <br/>
                """
                )
            with gr.Row():            
                input_text = gr.Textbox(
                    label="Input Text",
                    value="This sentence has been generated by a speech synthesis system.",
                )
            with gr.Row():            
                model_select = gr.Dropdown(
                    label="Pick Model: tts_models/<language>/<dataset>/<model_name>",
                    choices=MODEL_NAMES,
                    value="tts_models/en/jenny/jenny"
                )
            with gr.Row():            
                tts_button = gr.Button("Send", elem_id="send-btn", visible=True)

            with gr.Row():
                output_audio = gr.Audio(label="Output", type="filepath")

    tts_button.click(
        tts,
        inputs=[
            input_text,
            model_select,
        ],
        outputs=[output_audio],
        concurrency_limit=16,
    )

demo.launch(debug=True)