LPX55 commited on
Commit
c8d124f
·
verified ·
1 Parent(s): 51d25bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -21
app.py CHANGED
@@ -102,31 +102,27 @@ def resize_image_to_bucket(image: Union[Image.Image, np.ndarray], bucket_reso: T
102
 
103
  @spaces.GPU(duration=120)
104
  def generate_video(prompt: str, frame1: Image.Image, frame2: Image.Image, resolution: str, guidance_scale: float, num_frames: int, num_inference_steps: int) -> bytes:
105
- # Debugging print statements
106
- print(f"Frame 1 Type: {type(frame1)}")
107
- print(f"Frame 2 Type: {type(frame2)}")
108
- print(f"Resolution: {resolution}")
109
-
110
- # Parse resolution
111
  width, height = map(int, resolution.split('x'))
112
-
113
- # Load and preprocess frames
114
- cond_frame1 = np.array(frame1)
115
- cond_frame2 = np.array(frame2)
116
- cond_frame1 = resize_image_to_bucket(cond_frame1, bucket_reso=(width, height))
117
- cond_frame2 = resize_image_to_bucket(cond_frame2, bucket_reso=(width, height))
118
- cond_video = np.zeros(shape=(num_frames, height, width, 3))
119
- cond_video[0], cond_video[-1] = cond_frame1, cond_frame2
120
- cond_video = torch.from_numpy(cond_video.copy()).permute(0, 3, 1, 2)
121
- cond_video = torch.stack([video_transforms(x) for x in cond_video], dim=0).unsqueeze(0)
 
 
 
122
  with torch.no_grad():
123
- image_or_video = cond_video.to(device="cuda", dtype=pipe.dtype)
124
- image_or_video = image_or_video.permute(0, 2, 1, 3, 4).contiguous() # [B, F, C, H, W] -> [B, C, F, H, W]
125
  cond_latents = pipe.vae.encode(image_or_video).latent_dist.sample()
126
  cond_latents = cond_latents * pipe.vae.config.scaling_factor
127
  cond_latents = cond_latents.to(dtype=pipe.dtype)
128
  assert not torch.any(torch.isnan(cond_latents))
129
- # Generate video
130
  video = call_pipe(
131
  pipe,
132
  prompt=prompt,
@@ -138,10 +134,13 @@ def generate_video(prompt: str, frame1: Image.Image, frame2: Image.Image, resolu
138
  guidance_scale=guidance_scale,
139
  generator=torch.Generator(device="cuda").manual_seed(0),
140
  ).frames[0]
141
- # Export to video
142
  video_path = "output.mp4"
143
- # video_bytes = io.BytesIO()
144
  export_to_video(video, video_path, fps=24)
 
 
 
 
145
  torch.cuda.empty_cache()
146
  return video_path
147
 
 
102
 
103
  @spaces.GPU(duration=120)
104
  def generate_video(prompt: str, frame1: Image.Image, frame2: Image.Image, resolution: str, guidance_scale: float, num_frames: int, num_inference_steps: int) -> bytes:
 
 
 
 
 
 
105
  width, height = map(int, resolution.split('x'))
106
+
107
+ transform = transforms.Compose([
108
+ transforms.ToTensor(),
109
+ transforms.Resize((height, width), antialias=True),
110
+ transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
111
+ ])
112
+
113
+ cond_frame1 = transform(frame1).cuda() # Move to GPU immediately
114
+ cond_frame2 = transform(frame2).cuda()
115
+ cond_video = torch.zeros(num_frames, 3, height, width, device='cuda', dtype=pipe.dtype)
116
+ cond_video[0] = cond_frame1
117
+ cond_video[-1] = cond_frame2
118
+
119
  with torch.no_grad():
120
+ image_or_video = cond_video.unsqueeze(0)
 
121
  cond_latents = pipe.vae.encode(image_or_video).latent_dist.sample()
122
  cond_latents = cond_latents * pipe.vae.config.scaling_factor
123
  cond_latents = cond_latents.to(dtype=pipe.dtype)
124
  assert not torch.any(torch.isnan(cond_latents))
125
+
126
  video = call_pipe(
127
  pipe,
128
  prompt=prompt,
 
134
  guidance_scale=guidance_scale,
135
  generator=torch.Generator(device="cuda").manual_seed(0),
136
  ).frames[0]
137
+
138
  video_path = "output.mp4"
 
139
  export_to_video(video, video_path, fps=24)
140
+ del cond_video # Manual deletion
141
+ del cond_frame1 # Manual deletion
142
+ del cond_frame2 # Manual deletion
143
+ del image_or_video # Manual deletion
144
  torch.cuda.empty_cache()
145
  return video_path
146