File size: 2,061 Bytes
9e41260
513a56f
9e41260
513a56f
 
 
 
 
4eca143
513a56f
909d366
513a56f
 
c54507d
513a56f
909d366
513a56f
 
 
909d366
513a56f
 
 
909d366
f5591d6
513a56f
 
909d366
513a56f
 
4eca143
513a56f
4eca143
513a56f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import gradio as gr
from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip

# Function to generate audio from text (placeholder)
def generate_audio(text, voice, rate, pitch):
    # This should generate the audio and return an AudioFileClip
    # Implement your audio generation logic here
    pass

# Function to create video from text and background media
def text_to_video(text, voice, rate, pitch, bg_media, video_width, video_height):
    # Generate the audio clip
    audio_clip = generate_audio(text, voice, rate, pitch)  # Ensure this function is defined to generate audio

    # Determine the type of background media and create the appropriate clip
    if bg_media.endswith('.mp4'):
        bg_clip = VideoFileClip(bg_media).set_duration(audio_clip.duration)
    elif bg_media.endswith(('.jpg', '.png', '.jpeg')):
        bg_clip = ImageClip(bg_media).set_duration(audio_clip.duration)
    else:
        return None, "Unsupported media type."

    # Create a final video with audio
    final_video = bg_clip.set_audio(audio_clip)

    # Set the final output video file name
    output_file = "output_video.mp4"
    
    # Write the final video to a file
    final_video.write_videofile(output_file, codec='libx264')

    return output_file, None

# Gradio interface
def tts_interface(text, voice, rate, pitch, bg_media):
    video, warning = text_to_video(text, voice, rate, pitch, bg_media, None, None)
    if warning:
        return warning
    return video

iface = gr.Interface(
    fn=tts_interface,
    inputs=[
        gr.Textbox(label="Text"),
        gr.Dropdown(label="Voice", choices=["Voice 1", "Voice 2"]),  # Update with actual voices
        gr.Slider(label="Rate", minimum=0.5, maximum=2.0, step=0.1, value=1.0),
        gr.Slider(label="Pitch", minimum=0, maximum=100, step=1, value=50),
        gr.File(label="Background Media (Image/Video)")
    ],
    outputs="file",
    title="Text to Video with Audio",
    description="Upload an image or video and generate a video with audio from text."
)

iface.launch(share=True)