TextToSpeech

Running

File size: 2,061 Bytes

9e41260
513a56f
9e41260
513a56f
 
 
 
 
4eca143
513a56f
909d366
513a56f
 
c54507d
513a56f
909d366
513a56f
 
 
909d366
513a56f
 
 
909d366
f5591d6
513a56f
 
909d366
513a56f
 
4eca143
513a56f
4eca143
513a56f

import gradio as gr
from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip

# Function to generate audio from text (placeholder)
def generate_audio(text, voice, rate, pitch):
    # This should generate the audio and return an AudioFileClip
    # Implement your audio generation logic here
    pass

# Function to create video from text and background media
def text_to_video(text, voice, rate, pitch, bg_media, video_width, video_height):
    # Generate the audio clip
    audio_clip = generate_audio(text, voice, rate, pitch)  # Ensure this function is defined to generate audio

    # Determine the type of background media and create the appropriate clip
    if bg_media.endswith('.mp4'):
        bg_clip = VideoFileClip(bg_media).set_duration(audio_clip.duration)
    elif bg_media.endswith(('.jpg', '.png', '.jpeg')):
        bg_clip = ImageClip(bg_media).set_duration(audio_clip.duration)
    else:
        return None, "Unsupported media type."

    # Create a final video with audio
    final_video = bg_clip.set_audio(audio_clip)

    # Set the final output video file name
    output_file = "output_video.mp4"
    
    # Write the final video to a file
    final_video.write_videofile(output_file, codec='libx264')

    return output_file, None

# Gradio interface
def tts_interface(text, voice, rate, pitch, bg_media):
    video, warning = text_to_video(text, voice, rate, pitch, bg_media, None, None)
    if warning:
        return warning
    return video

iface = gr.Interface(
    fn=tts_interface,
    inputs=[
        gr.Textbox(label="Text"),
        gr.Dropdown(label="Voice", choices=["Voice 1", "Voice 2"]),  # Update with actual voices
        gr.Slider(label="Rate", minimum=0.5, maximum=2.0, step=0.1, value=1.0),
        gr.Slider(label="Pitch", minimum=0, maximum=100, step=1, value=50),
        gr.File(label="Background Media (Image/Video)")
    ],
    outputs="file",
    title="Text to Video with Audio",
    description="Upload an image or video and generate a video with audio from text."
)

iface.launch(share=True)