|
import gradio as gr |
|
import librosa |
|
import sounddevice as sd |
|
import numpy as np |
|
from ttsmms import download, TTS |
|
|
|
|
|
dir_path = download("swh", "./data") |
|
tts = TTS(dir_path) |
|
|
|
|
|
def text_to_speech(text): |
|
result = tts.synthesis(text) |
|
audio = result["x"] |
|
sample_rate = result["sampling_rate"] |
|
|
|
|
|
sd.play(audio, samplerate=sample_rate) |
|
sd.wait() |
|
|
|
return audio, sample_rate |
|
|
|
|
|
gr.Interface( |
|
fn=text_to_speech, |
|
inputs=gr.Text(label="Enter Text"), |
|
outputs=gr.Audio(label="Generated Speech"), |
|
title="Swahili Text-to-Speech", |
|
description="Type text and listen to the generated Swahili speech.", |
|
).launch() |
|
|