Spaces:

Futuresony
/

Automatic-speech-recognition

Sleeping

File size: 820 Bytes

fdda09f

import gradio as gr
import librosa
import sounddevice as sd
import numpy as np
from ttsmms import download, TTS

# Download and load the Swahili TTS model
dir_path = download("swh", "./data")  # Change "swh" to another language if needed
tts = TTS(dir_path)

# Function to generate speech from text
def text_to_speech(text):
    result = tts.synthesis(text)
    audio = result["x"]
    sample_rate = result["sampling_rate"]

    # Play generated speech in real-time
    sd.play(audio, samplerate=sample_rate)
    sd.wait()

    return audio, sample_rate

# Gradio UI for TTS
gr.Interface(
    fn=text_to_speech,
    inputs=gr.Text(label="Enter Text"),
    outputs=gr.Audio(label="Generated Speech"),
    title="Swahili Text-to-Speech",
    description="Type text and listen to the generated Swahili speech.",
).launch()