Spaces:
Runtime error
Runtime error
File size: 5,301 Bytes
aa2b83c cb5a657 aa2b83c cb5a657 18ed7af cb5a657 321b7b2 2927d16 cb5a657 321b7b2 cb5a657 ebdbb36 cb5a657 46f7940 cb5a657 321b7b2 cb5a657 18ed7af cb5a657 321b7b2 cb5a657 aa2b83c 18365d1 321b7b2 18365d1 aa2b83c 321b7b2 cb5a657 18365d1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
import torch._dynamo
torch._dynamo.config.suppress_errors = True
import torch
import gradio as gr
import os
import base64
from glob import glob
from pathlib import Path
from typing import Optional
from diffusers import StableVideoDiffusionPipeline
from diffusers.utils import load_image, export_to_video
from PIL import Image
import uuid
import random
from huggingface_hub import login, hf_hub_download
import spaces
# pipe = StableVideoDiffusionPipeline.from_pretrained(
# # "stabilityai/stable-video-diffusion-img2vid-xt-1-1",
# "vdo/stable-video-diffusion-img2vid-xt-1-1",
# torch_dtype=torch.float16,
# variant="fp16"
# )
# pipe.save_pretrained("./checkpoints", variant="fp16")
model_directory = './checkpoints'
if not os.path.exists(model_directory):
pipe = StableVideoDiffusionPipeline.from_pretrained(
# "stabilityai/stable-video-diffusion-img2vid-xt-1-1",
"vdo/stable-video-diffusion-img2vid-xt-1-1",
torch_dtype=torch.float16,
variant="fp16"
)
pipe.save_pretrained("./checkpoints", variant="fp16")
else:
pipe = StableVideoDiffusionPipeline.from_pretrained(
model_directory,
torch_dtype=torch.float16,
variant="fp16"
)
pipe.to("cuda")
# pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
#pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)
max_64_bit_int = 2**63 - 1
@spaces.GPU(duration=180)
def generate_video(
image: Image,
seed: int,
motion_bucket_id: int = 127,
fps_id: int = 6,
version: str = "svd_xt",
cond_aug: float = 0.02,
decoding_t: int = 3, # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
device: str = "cuda",
output_folder: str = "outputs",
):
global pipe
# note julian: normally we should resize input images, but normally they are already in 1024x576, so..
# also, I would like to experiment with vertical videos, and 1024x512 videos
image = resize_image(image)
if image.mode == "RGBA":
image = image.convert("RGB")
generator = torch.manual_seed(seed)
os.makedirs(output_folder, exist_ok=True)
base_count = len(glob(os.path.join(output_folder, "*.mp4")))
video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
# pipe.to("cuda")
frames = pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=0.1, num_frames=25).frames[0]
export_to_video(frames, video_path, fps=fps_id)
torch.manual_seed(seed)
# Read the content of the video file and encode it to base64
with open(video_path, "rb") as video_file:
video_base64 = base64.b64encode(video_file.read()).decode('utf-8')
# Prepend the appropriate data URI header with MIME type
video_data_uri = 'data:video/mp4;base64,' + video_base64
# clean-up (otherwise there is a risk of "ghosting", eg. someone seeing the previous generated video",
# of one of the steps go wrong)
os.remove(video_path)
return video_data_uri
def resize_image(image, output_size=(1024, 576)):
# Calculate aspect ratios
target_aspect = output_size[0] / output_size[1] # Aspect ratio of the desired size
image_aspect = image.width / image.height # Aspect ratio of the original image
# Resize then crop if the original image is larger
if image_aspect > target_aspect:
# Resize the image to match the target height, maintaining aspect ratio
new_height = output_size[1]
new_width = int(new_height * image_aspect)
resized_image = image.resize((new_width, new_height), Image.LANCZOS)
# Calculate coordinates for cropping
left = (new_width - output_size[0]) / 2
top = 0
right = (new_width + output_size[0]) / 2
bottom = output_size[1]
else:
# Resize the image to match the target width, maintaining aspect ratio
new_width = output_size[0]
new_height = int(new_width / image_aspect)
resized_image = image.resize((new_width, new_height), Image.LANCZOS)
# Calculate coordinates for cropping
left = 0
top = (new_height - output_size[1]) / 2
right = output_size[0]
bottom = (new_height + output_size[1]) / 2
# Crop the image
cropped_image = resized_image.crop((left, top, right, bottom))
return cropped_image
css = """
img, video {
max-height: 400px;
object-fit: contain;
}
"""
with gr.Blocks(css=css) as demo:
image = gr.Image(label="Upload your image", type="pil")
generate_btn = gr.Button("Generate")
base64_out = gr.Textbox(label="Base64 Video")
seed = gr.Slider(label="Seed", value=42, randomize=False, minimum=0, maximum=max_64_bit_int, step=1)
motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to add/remove from the image", value=127, minimum=1, maximum=255)
fps_id = gr.Slider(label="Frames per second", info="The length of your video in seconds will be 25/fps", value=6, minimum=5, maximum=30)
generate_btn.click(
fn=generate_video,
inputs=[image, seed, motion_bucket_id, fps_id],
outputs=base64_out,
api_name="run"
)
demo.launch() |