usuktts / app.py
englissi's picture
Update app.py
20d2b2c verified
import gradio as gr
from gtts import gTTS
from pydub import AudioSegment
from io import BytesIO
# Step 1: Define a function to generate and merge TTS audio for multiple languages
def multilingual_tts(korean_text, british_english_text, american_english_text):
# Language mapping
texts = {
"ko": korean_text,
"en-gb": british_english_text, # British English
"en-us": american_english_text, # American English
}
combined_audio = AudioSegment.silent(duration=0) # Empty audio to start
for lang, text in texts.items():
if text.strip(): # Process only if text is provided
tld = 'co.uk' if lang == "en-gb" else 'com'
tts = gTTS(text, lang="en" if lang.startswith("en") else lang, tld=tld)
audio_file = BytesIO()
tts.write_to_fp(audio_file)
audio_file.seek(0)
tts_audio = AudioSegment.from_file(audio_file, format="mp3")
combined_audio += tts_audio + AudioSegment.silent(duration=500) # Add silence between languages
# Save combined audio to a file
output_file = "combined_output.mp3"
combined_audio.export(output_file, format="mp3")
return output_file
# Step 2: Create Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## Multilingual TTS: Generate a Single Audio File")
with gr.Row():
korean_input = gr.Textbox(label="Enter Korean Text:", placeholder="μ•ˆλ…•ν•˜μ„Έμš”")
british_english_input = gr.Textbox(label="Enter British English Text:", placeholder="Hello (British)")
american_english_input = gr.Textbox(label="Enter American English Text:", placeholder="Hello (American)")
output_audio = gr.Audio(label="Generated Speech", type="filepath")
generate_button = gr.Button("Generate Speech")
generate_button.click(
multilingual_tts,
inputs=[korean_input, british_english_input, american_english_input],
outputs=output_audio
)
# Run the app
if __name__ == "__main__":
demo.launch()