import gradio as gr
from diffusers import StableVideoDiffusionPipeline
import torch
from PIL import Image

# Load the model and pipeline
model_id = "stabilityai/stable-video-diffusion-img2vid-xt"
pipe = StableVideoDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe = pipe.to("cuda")

def generate_video(image, prompt):
    # Convert the image to a format suitable for the model
    image = Image.fromarray(image)

    # Generate the video from the image and prompt
    video = pipe(prompt, image, num_inference_steps=50, guidance_scale=7.5).frames

    # Convert the video frames to a format suitable for Gradio
    video_path = "output_video.mp4"
    video[0].save(video_path, save_all=True, append_images=video[1:], duration=100, loop=0)

    return video_path

# Create the Gradio interface
iface = gr.Interface(
    fn=generate_video,
    inputs=[
        gr.Image(type="pil", label="Input Image"),
        gr.Textbox(lines=2, placeholder="Enter a prompt...", label="Prompt")
    ],
    outputs=gr.Video(label="Generated Video"),
    title="Stable Video Diffusion img2vid-xt",
    description="Generate a video from an image using the stabilityai/stable-video-diffusion-img2vid-xt model."
)

# Launch the interface
if __name__ == "__main__":
    iface.launch()