Futuresony's picture
Create app.py
fdda09f verified
raw
history blame
820 Bytes
import gradio as gr
import librosa
import sounddevice as sd
import numpy as np
from ttsmms import download, TTS
# Download and load the Swahili TTS model
dir_path = download("swh", "./data") # Change "swh" to another language if needed
tts = TTS(dir_path)
# Function to generate speech from text
def text_to_speech(text):
result = tts.synthesis(text)
audio = result["x"]
sample_rate = result["sampling_rate"]
# Play generated speech in real-time
sd.play(audio, samplerate=sample_rate)
sd.wait()
return audio, sample_rate
# Gradio UI for TTS
gr.Interface(
fn=text_to_speech,
inputs=gr.Text(label="Enter Text"),
outputs=gr.Audio(label="Generated Speech"),
title="Swahili Text-to-Speech",
description="Type text and listen to the generated Swahili speech.",
).launch()