Futuresony's picture
Rename app.py to app.py4
f5addc4 verified
import gradio as gr
from ttsmms import download, TTS
from langdetect import detect
import os
from pydub import AudioSegment
from pydub.playback import play
# Ensure ffmpeg works inside Hugging Face Spaces
AudioSegment.converter = "/usr/bin/ffmpeg"
# Download and load TTS models
swahili_dir = download("swh", "./data/swahili")
english_dir = download("eng", "./data/english") # Ensure an English TTS model is available
swahili_tts = TTS(swahili_dir)
english_tts = TTS(english_dir)
# Function to process mixed-language text
def text_to_speech(text):
words = text.split() # Split text into words
audio_clips = []
for word in words:
lang = detect(word) # Detect language of each word
wav_path = f"./temp_{word}.wav"
if lang == "sw":
swahili_tts.synthesis(word, wav_path=wav_path)
else:
english_tts.synthesis(word, wav_path=wav_path)
audio_clips.append(AudioSegment.from_wav(wav_path))
os.remove(wav_path) # Remove temporary files
# Combine all audio clips
final_audio = sum(audio_clips)
output_path = "./output.wav"
final_audio.export(output_path, format="wav")
return output_path
# Gradio UI
gr.Interface(
fn=text_to_speech,
inputs=gr.Textbox(label="Enter Text"),
outputs=gr.Audio(label="Generated Speech"),
title="Swahili & English Text-to-Speech",
description="Type text in Swahili and English, and listen to the mixed-language speech.",
).launch()