import gradio as gr from diffusers import StableVideoDiffusionPipeline import torch from PIL import Image # Load the model and pipeline model_id = "stabilityai/stable-video-diffusion-img2vid-xt" pipe = StableVideoDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16) pipe = pipe.to("cuda") def generate_video(image, prompt): # Convert the image to a format suitable for the model image = Image.fromarray(image) # Generate the video from the image and prompt video = pipe(prompt, image, num_inference_steps=50, guidance_scale=7.5).frames # Convert the video frames to a format suitable for Gradio video_path = "output_video.mp4" video[0].save(video_path, save_all=True, append_images=video[1:], duration=100, loop=0) return video_path # Create the Gradio interface iface = gr.Interface( fn=generate_video, inputs=[ gr.Image(type="pil", label="Input Image"), gr.Textbox(lines=2, placeholder="Enter a prompt...", label="Prompt") ], outputs=gr.Video(label="Generated Video"), title="Stable Video Diffusion img2vid-xt", description="Generate a video from an image using the stabilityai/stable-video-diffusion-img2vid-xt model." ) # Launch the interface if __name__ == "__main__": iface.launch()