LPX55 commited on
Commit
6747584
·
verified ·
1 Parent(s): c8d124f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -20
app.py CHANGED
@@ -102,27 +102,31 @@ def resize_image_to_bucket(image: Union[Image.Image, np.ndarray], bucket_reso: T
102
 
103
  @spaces.GPU(duration=120)
104
  def generate_video(prompt: str, frame1: Image.Image, frame2: Image.Image, resolution: str, guidance_scale: float, num_frames: int, num_inference_steps: int) -> bytes:
 
 
 
 
 
 
105
  width, height = map(int, resolution.split('x'))
106
-
107
- transform = transforms.Compose([
108
- transforms.ToTensor(),
109
- transforms.Resize((height, width), antialias=True),
110
- transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
111
- ])
112
-
113
- cond_frame1 = transform(frame1).cuda() # Move to GPU immediately
114
- cond_frame2 = transform(frame2).cuda()
115
- cond_video = torch.zeros(num_frames, 3, height, width, device='cuda', dtype=pipe.dtype)
116
- cond_video[0] = cond_frame1
117
- cond_video[-1] = cond_frame2
118
-
119
  with torch.no_grad():
120
- image_or_video = cond_video.unsqueeze(0)
 
121
  cond_latents = pipe.vae.encode(image_or_video).latent_dist.sample()
122
  cond_latents = cond_latents * pipe.vae.config.scaling_factor
123
  cond_latents = cond_latents.to(dtype=pipe.dtype)
124
  assert not torch.any(torch.isnan(cond_latents))
125
-
126
  video = call_pipe(
127
  pipe,
128
  prompt=prompt,
@@ -134,13 +138,10 @@ def generate_video(prompt: str, frame1: Image.Image, frame2: Image.Image, resolu
134
  guidance_scale=guidance_scale,
135
  generator=torch.Generator(device="cuda").manual_seed(0),
136
  ).frames[0]
137
-
138
  video_path = "output.mp4"
 
139
  export_to_video(video, video_path, fps=24)
140
- del cond_video # Manual deletion
141
- del cond_frame1 # Manual deletion
142
- del cond_frame2 # Manual deletion
143
- del image_or_video # Manual deletion
144
  torch.cuda.empty_cache()
145
  return video_path
146
 
 
102
 
103
  @spaces.GPU(duration=120)
104
  def generate_video(prompt: str, frame1: Image.Image, frame2: Image.Image, resolution: str, guidance_scale: float, num_frames: int, num_inference_steps: int) -> bytes:
105
+ # Debugging print statements
106
+ print(f"Frame 1 Type: {type(frame1)}")
107
+ print(f"Frame 2 Type: {type(frame2)}")
108
+ print(f"Resolution: {resolution}")
109
+
110
+ # Parse resolution
111
  width, height = map(int, resolution.split('x'))
112
+
113
+ # Load and preprocess frames
114
+ cond_frame1 = np.array(frame1)
115
+ cond_frame2 = np.array(frame2)
116
+ cond_frame1 = resize_image_to_bucket(cond_frame1, bucket_reso=(width, height))
117
+ cond_frame2 = resize_image_to_bucket(cond_frame2, bucket_reso=(width, height))
118
+ cond_video = np.zeros(shape=(num_frames, height, width, 3))
119
+ cond_video[0], cond_video[-1] = cond_frame1, cond_frame2
120
+ cond_video = torch.from_numpy(cond_video.copy()).permute(0, 3, 1, 2)
121
+ cond_video = torch.stack([video_transforms(x) for x in cond_video], dim=0).unsqueeze(0)
 
 
 
122
  with torch.no_grad():
123
+ image_or_video = cond_video.to(device="cuda", dtype=pipe.dtype)
124
+ image_or_video = image_or_video.permute(0, 2, 1, 3, 4).contiguous() # [B, F, C, H, W] -> [B, C, F, H, W]
125
  cond_latents = pipe.vae.encode(image_or_video).latent_dist.sample()
126
  cond_latents = cond_latents * pipe.vae.config.scaling_factor
127
  cond_latents = cond_latents.to(dtype=pipe.dtype)
128
  assert not torch.any(torch.isnan(cond_latents))
129
+ # Generate video
130
  video = call_pipe(
131
  pipe,
132
  prompt=prompt,
 
138
  guidance_scale=guidance_scale,
139
  generator=torch.Generator(device="cuda").manual_seed(0),
140
  ).frames[0]
141
+ # Export to video
142
  video_path = "output.mp4"
143
+ # video_bytes = io.BytesIO()
144
  export_to_video(video, video_path, fps=24)
 
 
 
 
145
  torch.cuda.empty_cache()
146
  return video_path
147