CharlieAmalet commited on
Commit
cb5a657
·
verified ·
1 Parent(s): 9d07490

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +143 -0
app.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ #import gradio.helpers
3
+ import torch
4
+ import os
5
+ import base64
6
+ from glob import glob
7
+ from pathlib import Path
8
+ from typing import Optional
9
+
10
+ from diffusers import StableVideoDiffusionPipeline
11
+ from diffusers.utils import load_image, export_to_video
12
+ from PIL import Image
13
+
14
+ import uuid
15
+ import random
16
+ from huggingface_hub import login, hf_hub_download
17
+ import space
18
+
19
+ #gradio.helpers.CACHED_FOLDER = '/data/cache'
20
+
21
+ # SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')
22
+
23
+ # HF_API_KEY = os.getenv('HF_API_KEY', '')
24
+ # login(token=HF_API_KEY)
25
+
26
+ pipe = StableVideoDiffusionPipeline.from_pretrained(
27
+ "stabilityai/stable-video-diffusion-img2vid-xt-1-1",
28
+ torch_dtype=torch.float16,
29
+ variant="fp16"
30
+ )
31
+ pipe.to("cuda")
32
+ pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
33
+ #pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)
34
+
35
+ max_64_bit_int = 2**63 - 1
36
+
37
+ @spaces.GPU(enable_queue=True)
38
+ def generate_video(
39
+ secret_token: str,
40
+ image: Image,
41
+ seed: int,
42
+ motion_bucket_id: int = 127,
43
+ fps_id: int = 6,
44
+ version: str = "svd_xt",
45
+ cond_aug: float = 0.02,
46
+ decoding_t: int = 3, # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
47
+ device: str = "cuda",
48
+ output_folder: str = "outputs",
49
+ ):
50
+ # if secret_token != SECRET_TOKEN:
51
+ # raise gr.Error(
52
+ # f'Invalid secret token. Please fork the original space if you want to use it for yourself.')
53
+
54
+
55
+ # note julian: normally we should resize input images, but normally they are already in 1024x576, so..
56
+ # also, I would like to experiment with vertical videos, and 1024x512 videos
57
+ image = resize_image(image)
58
+
59
+ if image.mode == "RGBA":
60
+ image = image.convert("RGB")
61
+
62
+ generator = torch.manual_seed(seed)
63
+
64
+ os.makedirs(output_folder, exist_ok=True)
65
+ base_count = len(glob(os.path.join(output_folder, "*.mp4")))
66
+ video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
67
+
68
+ frames = pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=0.1, num_frames=25).frames[0]
69
+ export_to_video(frames, video_path, fps=fps_id)
70
+ torch.manual_seed(seed)
71
+
72
+ # Read the content of the video file and encode it to base64
73
+ with open(video_path, "rb") as video_file:
74
+ video_base64 = base64.b64encode(video_file.read()).decode('utf-8')
75
+
76
+ # Prepend the appropriate data URI header with MIME type
77
+ video_data_uri = 'data:video/mp4;base64,' + video_base64
78
+
79
+ # clean-up (otherwise there is a risk of "ghosting", eg. someone seeing the previous generated video",
80
+ # of one of the steps go wrong)
81
+ os.remove(video_path)
82
+
83
+ return video_data_uri
84
+
85
+ def resize_image(image, output_size=(1024, 576)):
86
+ # Calculate aspect ratios
87
+ target_aspect = output_size[0] / output_size[1] # Aspect ratio of the desired size
88
+ image_aspect = image.width / image.height # Aspect ratio of the original image
89
+
90
+ # Resize then crop if the original image is larger
91
+ if image_aspect > target_aspect:
92
+ # Resize the image to match the target height, maintaining aspect ratio
93
+ new_height = output_size[1]
94
+ new_width = int(new_height * image_aspect)
95
+ resized_image = image.resize((new_width, new_height), Image.LANCZOS)
96
+ # Calculate coordinates for cropping
97
+ left = (new_width - output_size[0]) / 2
98
+ top = 0
99
+ right = (new_width + output_size[0]) / 2
100
+ bottom = output_size[1]
101
+ else:
102
+ # Resize the image to match the target width, maintaining aspect ratio
103
+ new_width = output_size[0]
104
+ new_height = int(new_width / image_aspect)
105
+ resized_image = image.resize((new_width, new_height), Image.LANCZOS)
106
+ # Calculate coordinates for cropping
107
+ left = 0
108
+ top = (new_height - output_size[1]) / 2
109
+ right = output_size[0]
110
+ bottom = (new_height + output_size[1]) / 2
111
+
112
+ # Crop the image
113
+ cropped_image = resized_image.crop((left, top, right, bottom))
114
+ return cropped_image
115
+
116
+ with gr.Blocks() as demo:
117
+ # secret_token = gr.Text(
118
+ # label='Secret Token',
119
+ # max_lines=1,
120
+ # placeholder='Enter your secret token')
121
+ gr.HTML("""
122
+ <div style="z-index: 100; position: fixed; top: 0px; right: 0px; left: 0px; bottom: 0px; width: 100%; height: 100%; background: white; display: flex; align-items: center; justify-content: center; color: black;">
123
+ <div style="text-align: center; color: black;">
124
+ <p style="color: black;">This space is a REST API to programmatically generate MP4 videos.</p>
125
+ <p style="color: black;">Interested in using it? Look no further than the <a href="https://huggingface.co/spaces/multimodalart/stable-video-diffusion" target="_blank">original space</a>!</p>
126
+ </div>
127
+ </div>""")
128
+ image = gr.Image(label="Upload your image", type="pil")
129
+ generate_btn = gr.Button("Generate")
130
+ base64_out = gr.Textbox(label="Base64 Video")
131
+ seed = gr.Slider(label="Seed", value=42, randomize=False, minimum=0, maximum=max_64_bit_int, step=1)
132
+ motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to add/remove from the image", value=127, minimum=1, maximum=255)
133
+ fps_id = gr.Slider(label="Frames per second", info="The length of your video in seconds will be 25/fps", value=6, minimum=5, maximum=30)
134
+
135
+ generate_btn.click(
136
+ fn=generate_video,
137
+ # inputs=[secret_token, image, seed, motion_bucket_id, fps_id],
138
+ inputs=[image, seed, motion_bucket_id, fps_id],
139
+ outputs=base64_out,
140
+ api_name="run"
141
+ )
142
+
143
+ demo.queue(max_size=20).launch()