mrcuddle's picture
Update app.py
5cac326 verified
raw
history blame
1.74 kB
import gradio as gr
from diffusers import StableVideoDiffusionPipeline, EulerDiscreteScheduler
import torch
from PIL import Image
import spaces
# Load the Stable Video Diffusion model
model_id = "stabilityai/stable-video-diffusion-img2vid-xt"
pipe = StableVideoDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, revision="main")
pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
pipe.to("cuda")
@spaces.GPU
def generate_video(image, prompt, num_frames=25, height=576, width=1024):
# Convert the image to a format suitable for the pipeline
image = Image.open(image)
# Generate the video
video_frames = pipe(prompt=prompt, init_image=image, num_frames=num_frames, height=height, width=width).frames
return video_frames
# Create the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## Image to Video with Stable Diffusion XT")
with gr.Row():
with gr.Column():
image_input = gr.Image(type="filepath", label="Upload Image")
prompt_input = gr.Textbox(lines=2, placeholder="Enter prompt...", label="Prompt")
num_frames_input = gr.Slider(1, 50, step=1, value=25, label="Number of Frames")
height_input = gr.Number(label="Resolution Height", value=576)
width_input = gr.Number(label="Resolution Width", value=1024)
run_button = gr.Button("Generate Video")
with gr.Column():
video_output = gr.Video(label="Generated Video")
run_button.click(
generate_video,
inputs=[image_input, prompt_input, num_frames_input, height_input, width_input],
outputs=video_output
)
# Launch the interface
if __name__ == "__main__":
demo.launch()