Spaces:

Futuresony
/

Customer-service

Sleeping

Customer-service / app.py

Create app.py

3da6c4d verified about 2 months ago

1.51 kB

	import gradio as gr
	from asr import transcribe_auto # Import ASR function
	from ttsmms import download, TTS
	from langdetect import detect

	# Download and load TTS models for Swahili and English
	swahili_dir = download("swh", "./data/swahili")
	english_dir = download("eng", "./data/english")

	swahili_tts = TTS(swahili_dir)
	english_tts = TTS(english_dir)

	# Function to handle ASR → TTS
	def asr_to_tts(audio):
	# Step 1: Transcribe Speech
	transcribed_text = transcribe_auto(audio)

	# Step 2: Detect Language & Generate Speech
	lang = detect(transcribed_text)
	wav_path = "./output.wav"

	if lang == "sw": # Swahili
	swahili_tts.synthesis(transcribed_text, wav_path=wav_path)
	else: # Default to English
	english_tts.synthesis(transcribed_text, wav_path=wav_path)

	return transcribed_text, wav_path # Return both text & generated speech

	# Gradio Interface
	with gr.Blocks() as demo:
	gr.Markdown("<h2 style='text-align: center;'>Multilingual Speech-to-Text & Text-to-Speech</h2>")

	with gr.Row():
	audio_input = gr.Audio(source="microphone", type="filepath", label="🎤 Speak Here")
	text_output = gr.Textbox(label="📝 Transcription", interactive=False)
	audio_output = gr.Audio(label="🔊 Generated Speech")

	submit_button = gr.Button("Transcribe & Speak 🔄")

	submit_button.click(fn=asr_to_tts, inputs=[audio_input], outputs=[text_output, audio_output])

	# Run the App
	if __name__ == "__main__":
	demo.launch()