File size: 3,469 Bytes
9e41260
 
4eca143
9e41260
 
909d366
9e41260
909d366
9e41260
 
 
4eca143
909d366
4eca143
9e41260
 
 
 
 
 
 
 
 
 
 
 
 
4eca143
909d366
 
 
 
 
 
c54507d
909d366
c54507d
909d366
 
 
 
e44bcbb
f5591d6
909d366
 
f5591d6
c54507d
 
909d366
c54507d
4eca143
909d366
 
 
9e41260
4eca143
909d366
9e41260
 
 
 
 
 
 
 
 
 
909d366
9e41260
 
 
 
 
 
 
 
 
909d366
9e41260
 
 
 
 
 
909d366
9e41260
 
ab9165c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import gradio as gr
import edge_tts
import asyncio
import tempfile
import os
from moviepy.editor import concatenate_videoclips, AudioFileClip, ImageClip, VideoFileClip

# Get all available voices
async def get_voices():
    voices = await edge_tts.list_voices()
    return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}

# Text-to-speech function
async def text_to_speech(text, voice, rate, pitch):
    if not text.strip():
        return None, gr.Warning("Please enter the text to convert.")
    if not voice:
        return None, gr.Warning("Please select a voice.")
    
    voice_short_name = voice.split(" - ")[0]
    rate_str = f"{rate:+d}%"
    pitch_str = f"{pitch:+d}Hz"
    communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
        tmp_path = tmp_file.name
        await communicate.save(tmp_path)
    return tmp_path, None

# Text-to-video function
def text_to_video(text, voice, rate, pitch, bg_media, video_width, video_height):
    # Generate audio from text
    audio, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
    if warning:
        return None, warning

    audio_clip = AudioFileClip(audio)
    
    # Create background video or image
    if bg_media.endswith('.mp4'):
        bg_clip = VideoFileClip(bg_media).resize(newsize=(video_width, video_height)).set_duration(audio_clip.duration)
    else:
        bg_clip = ImageClip(bg_media).set_duration(audio_clip.duration).resize(newsize=(video_width, video_height))
    
    # Set audio for the background
    final_video = bg_clip.set_audio(audio_clip)

    final_video_path = os.path.join(tempfile.gettempdir(), "output_video.mp4")
    final_video.write_videofile(final_video_path, fps=24, codec="libx264")
    
    return final_video_path, None

# Gradio interface function
def tts_interface(text, voice, rate, pitch, bg_media, video_width, video_height):
    video, warning = text_to_video(text, voice, rate, pitch, bg_media, video_width, video_height)
    return None, video, warning

# Create Gradio app
async def create_demo():
    voices = await get_voices()
    
    demo = gr.Interface(
        fn=tts_interface,
        inputs=[
            gr.Textbox(label="Input Text", lines=5),
            gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
            gr.Slider(minimum=-50, maximum=50, value=0, label="Rate Adjustment (%)", step=1),
            gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1),
            gr.File(label="Upload Background Image or Video", type="filepath"),
            gr.Slider(minimum=640, maximum=1920, value=1080, label="Video Width", step=10),
            gr.Slider(minimum=480, maximum=1080, value=720, label="Video Height", step=10),
        ],
        outputs=[
            gr.Audio(label="Generated Audio", type="filepath"),
            gr.Video(label="Generated Video"),
            gr.Markdown(label="Warning", visible=False)
        ],
        title="Edge TTS Text to Speech and Video",
        description="Convert text to speech and video using Microsoft Edge TTS. Upload an image or video for the background.",
        analytics_enabled=False,
        allow_flagging=False,
    )
    
    return demo

# Run the application
if __name__ == "__main__":
    demo = asyncio.run(create_demo())
    demo.launch()