File size: 2,055 Bytes
33a6897
20d2b2c
 
 
33a6897
20d2b2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33a6897
20d2b2c
33a6897
20d2b2c
 
33a6897
20d2b2c
 
 
 
 
 
 
33a6897
20d2b2c
 
 
33a6897
 
20d2b2c
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import gradio as gr
from gtts import gTTS
from pydub import AudioSegment
from io import BytesIO

# Step 1: Define a function to generate and merge TTS audio for multiple languages
def multilingual_tts(korean_text, british_english_text, american_english_text):
    # Language mapping
    texts = {
        "ko": korean_text,
        "en-gb": british_english_text,  # British English
        "en-us": american_english_text,  # American English
    }
    
    combined_audio = AudioSegment.silent(duration=0)  # Empty audio to start
    
    for lang, text in texts.items():
        if text.strip():  # Process only if text is provided
            tld = 'co.uk' if lang == "en-gb" else 'com'
            tts = gTTS(text, lang="en" if lang.startswith("en") else lang, tld=tld)
            audio_file = BytesIO()
            tts.write_to_fp(audio_file)
            audio_file.seek(0)
            tts_audio = AudioSegment.from_file(audio_file, format="mp3")
            combined_audio += tts_audio + AudioSegment.silent(duration=500)  # Add silence between languages
    
    # Save combined audio to a file
    output_file = "combined_output.mp3"
    combined_audio.export(output_file, format="mp3")
    
    return output_file

# Step 2: Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## Multilingual TTS: Generate a Single Audio File")
    
    with gr.Row():
        korean_input = gr.Textbox(label="Enter Korean Text:", placeholder="μ•ˆλ…•ν•˜μ„Έμš”")
        british_english_input = gr.Textbox(label="Enter British English Text:", placeholder="Hello (British)")
        american_english_input = gr.Textbox(label="Enter American English Text:", placeholder="Hello (American)")
    
    output_audio = gr.Audio(label="Generated Speech", type="filepath")
    generate_button = gr.Button("Generate Speech")
    
    generate_button.click(
        multilingual_tts, 
        inputs=[korean_input, british_english_input, american_english_input], 
        outputs=output_audio
    )

# Run the app
if __name__ == "__main__":
    demo.launch()