LPX55 commited on
Commit
867e3ec
·
verified ·
1 Parent(s): d1fc296

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -12
app.py CHANGED
@@ -96,20 +96,21 @@ def resize_image_to_bucket(image: Union[Image.Image, np.ndarray], bucket_reso: T
96
  return image
97
 
98
 
99
- def generate_video(pipe, prompt: str, frame1_path: str, frame2_path: str, guidance_scale: float, num_frames: int, num_inference_steps: int) -> bytes:
100
  # Load and preprocess frames
101
- cond_frame1 = Image.open(frame1_path)
102
- cond_frame2 = Image.open(frame2_path)
103
 
 
 
 
104
  height, width = 720, 1280
105
  cond_frame1 = resize_image_to_bucket(cond_frame1, bucket_reso=(width, height))
106
  cond_frame2 = resize_image_to_bucket(cond_frame2, bucket_reso=(width, height))
107
-
108
  cond_video = np.zeros(shape=(num_frames, height, width, 3))
109
  cond_video[0], cond_video[-1] = np.array(cond_frame1), np.array(cond_frame2)
110
  cond_video = torch.from_numpy(cond_video.copy()).permute(0, 3, 1, 2)
111
  cond_video = torch.stack([video_transforms(x) for x in cond_video], dim=0).unsqueeze(0)
112
-
113
  with torch.no_grad():
114
  image_or_video = cond_video.to(device="cuda", dtype=pipe.dtype)
115
  image_or_video = image_or_video.permute(0, 2, 1, 3, 4).contiguous() # [B, F, C, H, W] -> [B, C, F, H, W]
@@ -117,7 +118,6 @@ def generate_video(pipe, prompt: str, frame1_path: str, frame2_path: str, guidan
117
  cond_latents = cond_latents * pipe.vae.config.scaling_factor
118
  cond_latents = cond_latents.to(dtype=pipe.dtype)
119
  assert not torch.any(torch.isnan(cond_latents))
120
-
121
  # Generate video
122
  video = call_pipe(
123
  pipe,
@@ -130,17 +130,13 @@ def generate_video(pipe, prompt: str, frame1_path: str, frame2_path: str, guidan
130
  guidance_scale=guidance_scale,
131
  generator=torch.Generator(device="cuda").manual_seed(0),
132
  ).frames[0]
133
-
134
  # Export to video
135
  video_path = "output.mp4"
136
  export_to_video(video, video_path, fps=24)
137
-
138
  with open(video_path, "rb") as video_file:
139
  video_bytes = video_file.read()
140
-
141
  return video_bytes
142
 
143
-
144
  @torch.inference_mode()
145
  def call_pipe(
146
  pipe,
@@ -303,8 +299,8 @@ def main():
303
  # Define the interface inputs
304
  inputs = [
305
  gr.Textbox(label="Prompt", value="a woman"),
306
- gr.Image(label="Frame 1", type="filepath"),
307
- gr.Image(label="Frame 2", type="filepath"),
308
  # gr.Textbox(label="Frame 1 URL", value="https://i-bacon.bunkr.ru/11b45aa7-630b-4189-996f-a6b37a697786.png"),
309
  # gr.Textbox(label="Frame 2 URL", value="https://i-bacon.bunkr.ru/2382224f-120e-482d-a75d-f1a1bf13038c.png"),
310
  gr.Slider(minimum=0.1, maximum=20, step=0.1, label="Guidance Scale", value=6.0),
 
96
  return image
97
 
98
 
99
+ def generate_video(pipe, prompt: str, frame1: Image.Image, frame2: Image.Image, guidance_scale: float, num_frames: int, num_inference_steps: int) -> bytes:
100
  # Load and preprocess frames
101
+ print(f"Frame 1 Path: {frame1_path}, Type: {type(frame1_path)}")
102
+ print(f"Frame 2 Path: {frame2_path}, Type: {type(frame2_path)}")
103
 
104
+ # Load and preprocess frames
105
+ cond_frame1 = frame1
106
+ cond_frame2 = frame2
107
  height, width = 720, 1280
108
  cond_frame1 = resize_image_to_bucket(cond_frame1, bucket_reso=(width, height))
109
  cond_frame2 = resize_image_to_bucket(cond_frame2, bucket_reso=(width, height))
 
110
  cond_video = np.zeros(shape=(num_frames, height, width, 3))
111
  cond_video[0], cond_video[-1] = np.array(cond_frame1), np.array(cond_frame2)
112
  cond_video = torch.from_numpy(cond_video.copy()).permute(0, 3, 1, 2)
113
  cond_video = torch.stack([video_transforms(x) for x in cond_video], dim=0).unsqueeze(0)
 
114
  with torch.no_grad():
115
  image_or_video = cond_video.to(device="cuda", dtype=pipe.dtype)
116
  image_or_video = image_or_video.permute(0, 2, 1, 3, 4).contiguous() # [B, F, C, H, W] -> [B, C, F, H, W]
 
118
  cond_latents = cond_latents * pipe.vae.config.scaling_factor
119
  cond_latents = cond_latents.to(dtype=pipe.dtype)
120
  assert not torch.any(torch.isnan(cond_latents))
 
121
  # Generate video
122
  video = call_pipe(
123
  pipe,
 
130
  guidance_scale=guidance_scale,
131
  generator=torch.Generator(device="cuda").manual_seed(0),
132
  ).frames[0]
 
133
  # Export to video
134
  video_path = "output.mp4"
135
  export_to_video(video, video_path, fps=24)
 
136
  with open(video_path, "rb") as video_file:
137
  video_bytes = video_file.read()
 
138
  return video_bytes
139
 
 
140
  @torch.inference_mode()
141
  def call_pipe(
142
  pipe,
 
299
  # Define the interface inputs
300
  inputs = [
301
  gr.Textbox(label="Prompt", value="a woman"),
302
+ gr.Image(label="Frame 1", type="pil"),
303
+ gr.Image(label="Frame 2", type="pil"),
304
  # gr.Textbox(label="Frame 1 URL", value="https://i-bacon.bunkr.ru/11b45aa7-630b-4189-996f-a6b37a697786.png"),
305
  # gr.Textbox(label="Frame 2 URL", value="https://i-bacon.bunkr.ru/2382224f-120e-482d-a75d-f1a1bf13038c.png"),
306
  gr.Slider(minimum=0.1, maximum=20, step=0.1, label="Guidance Scale", value=6.0),