Spaces:
Runtime error
Runtime error
File size: 6,679 Bytes
aa2b83c cb5a657 aa2b83c cb5a657 18ed7af cb5a657 78731ee b75265f 78731ee b75265f 321b7b2 b75265f 2927d16 49812df 321b7b2 cb5a657 ebdbb36 cb5a657 1892558 cb5a657 91038eb cb5a657 321b7b2 49812df cb5a657 18ed7af cb5a657 91038eb cb5a657 321b7b2 cb5a657 1892558 cb5a657 1892558 cb5a657 1892558 cb5a657 1892558 cb5a657 aa2b83c 321b7b2 18365d1 aa2b83c 1892558 aa2b83c 1892558 91038eb 1892558 cb5a657 91038eb 1892558 cb5a657 1892558 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
import torch._dynamo
torch._dynamo.config.suppress_errors = True
import torch
import gradio as gr
import os
import base64
from glob import glob
from pathlib import Path
from typing import Optional
from diffusers import StableVideoDiffusionPipeline
from diffusers.utils import load_image, export_to_video
from PIL import Image
import uuid
import random
from huggingface_hub import login, hf_hub_download
import spaces
model_directory = './checkpoints'
try:
hf_hub_download(repo_id="vdo/stable-video-diffusion-img2vid-xt-1-1", filename="svd_xt_1_1.safetensors", local_dir=model_directory, cache_dir=model_directory)
except (Exception, BaseException) as error:
print(error)
# pipe = StableVideoDiffusionPipeline.from_pretrained(
# # "stabilityai/stable-video-diffusion-img2vid-xt-1-1",
# "vdo/stable-video-diffusion-img2vid-xt-1-1",
# torch_dtype=torch.float16,
# variant="fp16"
# )
# pipe.save_pretrained("./checkpoints", variant="fp16")
if not os.path.exists(model_directory):
pipe = StableVideoDiffusionPipeline.from_pretrained(
# "stabilityai/stable-video-diffusion-img2vid-xt-1-1",
"vdo/stable-video-diffusion-img2vid-xt-1-1",
torch_dtype=torch.float16,
variant="fp16"
)
pipe.save_pretrained("./checkpoints", variant="fp16")
else:
try:
pipe = StableVideoDiffusionPipeline.from_pretrained(
model_directory,
torch_dtype=torch.float16,
variant="fp16"
)
except:
pipe = StableVideoDiffusionPipeline.from_pretrained(
# "stabilityai/stable-video-diffusion-img2vid-xt-1-1",
"vdo/stable-video-diffusion-img2vid-xt-1-1",
torch_dtype=torch.float16,
variant="fp16"
)
pipe.save_pretrained("./checkpoints", variant="fp16")
# device = "cuda" if torch.cuda.is_available() else "cpu"
# pipe.to(device)
# pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
#pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)
max_64_bit_int = 2**63 - 1
@spaces.GPU(enable_queue=True, duration=240)
def generate_video(
image: Image,
randomize_seed: bool = True,
seed: int,
motion_bucket_id: int = 127,
fps_id: int = 6,
version: str = "svd_xt",
cond_aug: float = 0.02,
decoding_t: int = 3, # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
device: str = "cuda",
output_folder: str = "outputs",
):
global pipe
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe.to(device)
# note julian: normally we should resize input images, but normally they are already in 1024x576, so..
# also, I would like to experiment with vertical videos, and 1024x512 videos
image = resize_image(image)
if image.mode == "RGBA":
image = image.convert("RGB")
if randomize_seed:
seed = random.randint(0, 2147483647)
generator = torch.manual_seed(seed)
os.makedirs(output_folder, exist_ok=True)
base_count = len(glob(os.path.join(output_folder, "*.mp4")))
video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
# pipe.to("cuda")
frames = pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=0.1, num_frames=25).frames[0]
export_to_video(frames, video_path, fps=fps_id)
torch.manual_seed(seed)
# Read the content of the video file and encode it to base64
# with open(video_path, "rb") as video_file:
# video_base64 = base64.b64encode(video_file.read()).decode('utf-8')
# Prepend the appropriate data URI header with MIME type
# video_data_uri = 'data:video/mp4;base64,' + video_base64
# clean-up (otherwise there is a risk of "ghosting", eg. someone seeing the previous generated video",
# of one of the steps go wrong)
# os.remove(video_path)
# return video_data_uri
return video_path
def resize_image(image, output_size=(1024, 576)):
# Calculate aspect ratios
target_aspect = output_size[0] / output_size[1] # Aspect ratio of the desired size
image_aspect = image.width / image.height # Aspect ratio of the original image
# Resize then crop if the original image is larger
if image_aspect > target_aspect:
# Resize the image to match the target height, maintaining aspect ratio
new_height = output_size[1]
new_width = int(new_height * image_aspect)
resized_image = image.resize((new_width, new_height), Image.LANCZOS)
# Calculate coordinates for cropping
left = (new_width - output_size[0]) / 2
top = 0
right = (new_width + output_size[0]) / 2
bottom = output_size[1]
else:
# Resize the image to match the target width, maintaining aspect ratio
new_width = output_size[0]
new_height = int(new_width / image_aspect)
resized_image = image.resize((new_width, new_height), Image.LANCZOS)
# Calculate coordinates for cropping
left = 0
top = (new_height - output_size[1]) / 2
right = output_size[0]
bottom = (new_height + output_size[1]) / 2
# Crop the image
cropped_image = resized_image.crop((left, top, right, bottom))
return cropped_image
css = """
img, video {
max-height: 400px;
object-fit: contain;
}
video {
margin: 0 auto
}
"""
with gr.Blocks(css=css) as SVD_XT_1_1:
with gr.Row():
with gr.Column():
image = gr.Image(label="Upload your image", type="pil")
generate_btn = gr.Button("Generate")
# base64_out = gr.Textbox(label="Base64 Video")
randomize_seed = gr.Checkbox(label="\U0001F3B2 Randomize seed", value=True, info="If checked, result is always different")
seed = gr.Slider(label="Seed", value=42, randomize=False, minimum=0, maximum=max_64_bit_int, step=1)
motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to add/remove from the image", value=127, minimum=1, maximum=255)
fps_id = gr.Slider(label="Frames per second", info="The length of your video in seconds will be 25/fps", value=6, minimum=5, maximum=30)
with gr.Column():
video_out = gr.Video(
autoplay=True,
# height=512,
# width=512,
# elem_id="video_output"
)
generate_btn.click(
fn=generate_video,
inputs=[image, randomize_seed, seed, motion_bucket_id, fps_id],
outputs=video_out,
api_name="run"
)
SVD_XT_1_1.launch() |