rutsam's picture
deploy tts
5e334c0
raw
history blame
1.67 kB
import tempfile
from typing import Optional
import gradio as gr
import numpy as np
from TTS.config import load_config
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer
MAX_TXT_LEN = 100
manager = ModelManager()
model_name="DigitalUmuganda/Kinyarwanda_YourTTS"
def generate_audio(text):
if len(text) > MAX_TXT_LEN:
text = text[:MAX_TXT_LEN]
print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
model_path, config_path, model_item = manager.download_model(model_name)
vocoder_name: Optional[str] = model_item["default_vocoder"]
vocoder_path = None
vocoder_config_path = None
if vocoder_name is not None:
vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
synthesizer = Synthesizer(
model_path, config_path, None, None, vocoder_path, vocoder_config_path,
)
if synthesizer is None:
raise NameError("model not found")
wavs = synthesizer.tts(text)
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
synthesizer.save_wav(wavs, fp)
return fp.name
iface = gr.Interface(
fn=generate_audio,
inputs=[
gr.inputs.Textbox(
label="Input Text",
default="This sentence has been generated by a speech synthesis system.",
),
],
outputs=gr.outputs.Audio(label="Output"),
title="Kinyarwanda tts Demo",
description="Kinyarwanda tts build with ",
allow_flagging=False,
flagging_options=['error', 'bad-quality', 'wrong-pronounciation'],
layout="vertical",
live=False
)
iface.launch(share=False)