rutsam's picture
add tts api import
84f4ed8
raw
history blame
2.42 kB
import tempfile
from typing import Optional
import gradio as gr
import subprocess
import numpy as np
from TTS.api import TTS
MAX_TXT_LEN = 100
subprocess.check_output("git lfs install", shell=True)
subprocess.check_output("git clone https://huggingface.co/DigitalUmuganda/Kinyarwanda_YourTTS",
shell=True)
def generate_audio(text):
if len(text) > MAX_TXT_LEN:
text = text[:MAX_TXT_LEN]
print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
# model_path, config_path, model_item = manager.download_model(model_name)
# vocoder_name: Optional[str] = model_item["default_vocoder"]
# vocoder_path = None
# vocoder_config_path = None
# if vocoder_name is not None:
# vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
# synthesizer = Synthesizer(
# model_path, config_path, None, None, vocoder_path, vocoder_config_path,
# )
# if synthesizer is None:
# raise NameError("model not found")
#tts_engine= TextToSpeech()
# text1 = subprocess.check_output("pwd", shell=True)+ subprocess.check_output("ls Kinyarwanda_YourTTS", shell=True)
# text2 = text1.decode("utf-8")
# return text2
tts = TTS(model_path="Kinyarwanda_YourTTS/model.pth",
config_path="Kinyarwanda_YourTTS/config.json",
tts_speakers_file="Kinyarwanda_YourTTS/speakers.pth",
encoder_checkpoint="Kinyarwanda_YourTTS/SE_checkpoint.pth.tar",
encoder_config="Kinyarwanda_YourTTS/config_se.json",)
wav = tts.tts(text, speaker_wav="Kinyarwanda_YourTTS/conditioning_audio.wav")
# with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
# synthesizer.save_wav(wav, fp)
# return fp.name
return wav
iface = gr.Interface(
fn=generate_audio,
inputs=[
gr.inputs.Textbox(
label="Input Text",
default="This sentence has been generated by a speech synthesis system.",
),
],
outputs=gr.outputs.Audio(type="numpy",label="Output"),
#outputs=gr.outputs.Textbox(label="Recognized speech from speechbrain model"),
title="Kinyarwanda tts Demo",
description="Kinyarwanda tts build with ",
allow_flagging=False,
flagging_options=['error', 'bad-quality', 'wrong-pronounciation'],
layout="vertical",
live=False
)
iface.launch(share=False)