Spaces:
Sleeping
Sleeping
File size: 3,469 Bytes
9e41260 4eca143 9e41260 909d366 9e41260 909d366 9e41260 4eca143 909d366 4eca143 9e41260 4eca143 909d366 c54507d 909d366 c54507d 909d366 e44bcbb f5591d6 909d366 f5591d6 c54507d 909d366 c54507d 4eca143 909d366 9e41260 4eca143 909d366 9e41260 909d366 9e41260 909d366 9e41260 909d366 9e41260 ab9165c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import gradio as gr
import edge_tts
import asyncio
import tempfile
import os
from moviepy.editor import concatenate_videoclips, AudioFileClip, ImageClip, VideoFileClip
# Get all available voices
async def get_voices():
voices = await edge_tts.list_voices()
return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
# Text-to-speech function
async def text_to_speech(text, voice, rate, pitch):
if not text.strip():
return None, gr.Warning("Please enter the text to convert.")
if not voice:
return None, gr.Warning("Please select a voice.")
voice_short_name = voice.split(" - ")[0]
rate_str = f"{rate:+d}%"
pitch_str = f"{pitch:+d}Hz"
communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
return tmp_path, None
# Text-to-video function
def text_to_video(text, voice, rate, pitch, bg_media, video_width, video_height):
# Generate audio from text
audio, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
if warning:
return None, warning
audio_clip = AudioFileClip(audio)
# Create background video or image
if bg_media.endswith('.mp4'):
bg_clip = VideoFileClip(bg_media).resize(newsize=(video_width, video_height)).set_duration(audio_clip.duration)
else:
bg_clip = ImageClip(bg_media).set_duration(audio_clip.duration).resize(newsize=(video_width, video_height))
# Set audio for the background
final_video = bg_clip.set_audio(audio_clip)
final_video_path = os.path.join(tempfile.gettempdir(), "output_video.mp4")
final_video.write_videofile(final_video_path, fps=24, codec="libx264")
return final_video_path, None
# Gradio interface function
def tts_interface(text, voice, rate, pitch, bg_media, video_width, video_height):
video, warning = text_to_video(text, voice, rate, pitch, bg_media, video_width, video_height)
return None, video, warning
# Create Gradio app
async def create_demo():
voices = await get_voices()
demo = gr.Interface(
fn=tts_interface,
inputs=[
gr.Textbox(label="Input Text", lines=5),
gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
gr.Slider(minimum=-50, maximum=50, value=0, label="Rate Adjustment (%)", step=1),
gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1),
gr.File(label="Upload Background Image or Video", type="filepath"),
gr.Slider(minimum=640, maximum=1920, value=1080, label="Video Width", step=10),
gr.Slider(minimum=480, maximum=1080, value=720, label="Video Height", step=10),
],
outputs=[
gr.Audio(label="Generated Audio", type="filepath"),
gr.Video(label="Generated Video"),
gr.Markdown(label="Warning", visible=False)
],
title="Edge TTS Text to Speech and Video",
description="Convert text to speech and video using Microsoft Edge TTS. Upload an image or video for the background.",
analytics_enabled=False,
allow_flagging=False,
)
return demo
# Run the application
if __name__ == "__main__":
demo = asyncio.run(create_demo())
demo.launch() |