Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -96,20 +96,21 @@ def resize_image_to_bucket(image: Union[Image.Image, np.ndarray], bucket_reso: T
|
|
96 |
return image
|
97 |
|
98 |
|
99 |
-
def generate_video(pipe, prompt: str,
|
100 |
# Load and preprocess frames
|
101 |
-
|
102 |
-
|
103 |
|
|
|
|
|
|
|
104 |
height, width = 720, 1280
|
105 |
cond_frame1 = resize_image_to_bucket(cond_frame1, bucket_reso=(width, height))
|
106 |
cond_frame2 = resize_image_to_bucket(cond_frame2, bucket_reso=(width, height))
|
107 |
-
|
108 |
cond_video = np.zeros(shape=(num_frames, height, width, 3))
|
109 |
cond_video[0], cond_video[-1] = np.array(cond_frame1), np.array(cond_frame2)
|
110 |
cond_video = torch.from_numpy(cond_video.copy()).permute(0, 3, 1, 2)
|
111 |
cond_video = torch.stack([video_transforms(x) for x in cond_video], dim=0).unsqueeze(0)
|
112 |
-
|
113 |
with torch.no_grad():
|
114 |
image_or_video = cond_video.to(device="cuda", dtype=pipe.dtype)
|
115 |
image_or_video = image_or_video.permute(0, 2, 1, 3, 4).contiguous() # [B, F, C, H, W] -> [B, C, F, H, W]
|
@@ -117,7 +118,6 @@ def generate_video(pipe, prompt: str, frame1_path: str, frame2_path: str, guidan
|
|
117 |
cond_latents = cond_latents * pipe.vae.config.scaling_factor
|
118 |
cond_latents = cond_latents.to(dtype=pipe.dtype)
|
119 |
assert not torch.any(torch.isnan(cond_latents))
|
120 |
-
|
121 |
# Generate video
|
122 |
video = call_pipe(
|
123 |
pipe,
|
@@ -130,17 +130,13 @@ def generate_video(pipe, prompt: str, frame1_path: str, frame2_path: str, guidan
|
|
130 |
guidance_scale=guidance_scale,
|
131 |
generator=torch.Generator(device="cuda").manual_seed(0),
|
132 |
).frames[0]
|
133 |
-
|
134 |
# Export to video
|
135 |
video_path = "output.mp4"
|
136 |
export_to_video(video, video_path, fps=24)
|
137 |
-
|
138 |
with open(video_path, "rb") as video_file:
|
139 |
video_bytes = video_file.read()
|
140 |
-
|
141 |
return video_bytes
|
142 |
|
143 |
-
|
144 |
@torch.inference_mode()
|
145 |
def call_pipe(
|
146 |
pipe,
|
@@ -303,8 +299,8 @@ def main():
|
|
303 |
# Define the interface inputs
|
304 |
inputs = [
|
305 |
gr.Textbox(label="Prompt", value="a woman"),
|
306 |
-
gr.Image(label="Frame 1", type="
|
307 |
-
gr.Image(label="Frame 2", type="
|
308 |
# gr.Textbox(label="Frame 1 URL", value="https://i-bacon.bunkr.ru/11b45aa7-630b-4189-996f-a6b37a697786.png"),
|
309 |
# gr.Textbox(label="Frame 2 URL", value="https://i-bacon.bunkr.ru/2382224f-120e-482d-a75d-f1a1bf13038c.png"),
|
310 |
gr.Slider(minimum=0.1, maximum=20, step=0.1, label="Guidance Scale", value=6.0),
|
|
|
96 |
return image
|
97 |
|
98 |
|
99 |
+
def generate_video(pipe, prompt: str, frame1: Image.Image, frame2: Image.Image, guidance_scale: float, num_frames: int, num_inference_steps: int) -> bytes:
|
100 |
# Load and preprocess frames
|
101 |
+
print(f"Frame 1 Path: {frame1_path}, Type: {type(frame1_path)}")
|
102 |
+
print(f"Frame 2 Path: {frame2_path}, Type: {type(frame2_path)}")
|
103 |
|
104 |
+
# Load and preprocess frames
|
105 |
+
cond_frame1 = frame1
|
106 |
+
cond_frame2 = frame2
|
107 |
height, width = 720, 1280
|
108 |
cond_frame1 = resize_image_to_bucket(cond_frame1, bucket_reso=(width, height))
|
109 |
cond_frame2 = resize_image_to_bucket(cond_frame2, bucket_reso=(width, height))
|
|
|
110 |
cond_video = np.zeros(shape=(num_frames, height, width, 3))
|
111 |
cond_video[0], cond_video[-1] = np.array(cond_frame1), np.array(cond_frame2)
|
112 |
cond_video = torch.from_numpy(cond_video.copy()).permute(0, 3, 1, 2)
|
113 |
cond_video = torch.stack([video_transforms(x) for x in cond_video], dim=0).unsqueeze(0)
|
|
|
114 |
with torch.no_grad():
|
115 |
image_or_video = cond_video.to(device="cuda", dtype=pipe.dtype)
|
116 |
image_or_video = image_or_video.permute(0, 2, 1, 3, 4).contiguous() # [B, F, C, H, W] -> [B, C, F, H, W]
|
|
|
118 |
cond_latents = cond_latents * pipe.vae.config.scaling_factor
|
119 |
cond_latents = cond_latents.to(dtype=pipe.dtype)
|
120 |
assert not torch.any(torch.isnan(cond_latents))
|
|
|
121 |
# Generate video
|
122 |
video = call_pipe(
|
123 |
pipe,
|
|
|
130 |
guidance_scale=guidance_scale,
|
131 |
generator=torch.Generator(device="cuda").manual_seed(0),
|
132 |
).frames[0]
|
|
|
133 |
# Export to video
|
134 |
video_path = "output.mp4"
|
135 |
export_to_video(video, video_path, fps=24)
|
|
|
136 |
with open(video_path, "rb") as video_file:
|
137 |
video_bytes = video_file.read()
|
|
|
138 |
return video_bytes
|
139 |
|
|
|
140 |
@torch.inference_mode()
|
141 |
def call_pipe(
|
142 |
pipe,
|
|
|
299 |
# Define the interface inputs
|
300 |
inputs = [
|
301 |
gr.Textbox(label="Prompt", value="a woman"),
|
302 |
+
gr.Image(label="Frame 1", type="pil"),
|
303 |
+
gr.Image(label="Frame 2", type="pil"),
|
304 |
# gr.Textbox(label="Frame 1 URL", value="https://i-bacon.bunkr.ru/11b45aa7-630b-4189-996f-a6b37a697786.png"),
|
305 |
# gr.Textbox(label="Frame 2 URL", value="https://i-bacon.bunkr.ru/2382224f-120e-482d-a75d-f1a1bf13038c.png"),
|
306 |
gr.Slider(minimum=0.1, maximum=20, step=0.1, label="Guidance Scale", value=6.0),
|