Spaces:
Runtime error
Runtime error
File size: 3,646 Bytes
9e41260 4eca143 9e41260 909d366 9e41260 909d366 9e41260 4eca143 909d366 4eca143 9e41260 4eca143 909d366 c54507d 909d366 0692426 909d366 e44bcbb f5591d6 909d366 f5591d6 c54507d 909d366 c54507d 4eca143 909d366 9e41260 4eca143 909d366 9e41260 909d366 9e41260 909d366 9e41260 909d366 9e41260 0692426 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import gradio as gr
import edge_tts
import asyncio
import tempfile
import os
from moviepy.editor import concatenate_videoclips, AudioFileClip, ImageClip, VideoFileClip
# Get all available voices
async def get_voices():
voices = await edge_tts.list_voices()
return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
# Text-to-speech function
async def text_to_speech(text, voice, rate, pitch):
if not text.strip():
return None, gr.Warning("Please enter the text to convert.")
if not voice:
return None, gr.Warning("Please select a voice.")
voice_short_name = voice.split(" - ")[0]
rate_str = f"{rate:+d}%"
pitch_str = f"{pitch:+d}Hz"
communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
return tmp_path, None
# Text-to-video function
def text_to_video(text, voice, rate, pitch, bg_media, video_width, video_height):
# Generate audio from text
audio, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
if warning:
return None, warning
audio_clip = AudioFileClip(audio)
# Check if bg_media is None
if bg_media is None:
return None, gr.Warning("Please upload a background image or video.")
# Create background video or image
if bg_media.endswith('.mp4'):
bg_clip = VideoFileClip(bg_media).resize(newsize=(video_width, video_height)).set_duration(audio_clip.duration)
else:
bg_clip = ImageClip(bg_media).set_duration(audio_clip.duration).resize(newsize=(video_width, video_height))
# Set audio for the background
final_video = bg_clip.set_audio(audio_clip)
final_video_path = os.path.join(tempfile.gettempdir(), "output_video.mp4")
final_video.write_videofile(final_video_path, fps=24, codec="libx264")
return final_video_path, None
# Gradio interface function
def tts_interface(text, voice, rate, pitch, bg_media, video_width, video_height):
video, warning = text_to_video(text, voice, rate, pitch, bg_media, video_width, video_height)
return None, video, warning
# Create Gradio app
async def create_demo():
voices = await get_voices()
demo = gr.Interface(
fn=tts_interface,
inputs=[
gr.Textbox(label="Input Text", lines=5),
gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
gr.Slider(minimum=-50, maximum=50, value=0, label="Rate Adjustment (%)", step=1),
gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1),
gr.File(label="Upload Background Image or Video", type="filepath"),
gr.Slider(minimum=640, maximum=1920, value=1080, label="Video Width", step=10),
gr.Slider(minimum=480, maximum=1080, value=720, label="Video Height", step=10),
],
outputs=[
gr.Audio(label="Generated Audio", type="filepath"),
gr.Video(label="Generated Video"),
gr.Markdown(label="Warning", visible=False)
],
title="Edge TTS Text to Speech and Video",
description="Convert text to speech and video using Microsoft Edge TTS. Upload an image or video for the background.",
analytics_enabled=False,
allow_flagging=False,
)
return demo
# Run the application
if __name__ == "__main__":
demo = asyncio.run(create_demo())
demo.launch(share=True) # Set share=True for public link
|