LPX55 commited on
Commit
889c6e6
·
verified ·
1 Parent(s): 8d52ba2
Files changed (1) hide show
  1. app.py +8 -64
app.py CHANGED
@@ -96,12 +96,13 @@ def construct_video_pipeline(model_id: str, lora_path: str):
96
  pipe.unload_lora_weights()
97
 
98
  return pipe
 
99
  def generate_video(pipe, prompt: str, frame1_path: str, frame2_path: str, guidance_scale: float, num_frames: int, num_inference_steps: int) -> bytes:
100
  # Load and preprocess frames
101
  cond_frame1 = Image.open(frame1_path)
102
  cond_frame2 = Image.open(frame2_path)
103
 
104
- height, width = 720, 720
105
  cond_frame1 = resize_image_to_bucket(cond_frame1, bucket_reso=(width, height))
106
  cond_frame2 = resize_image_to_bucket(cond_frame2, bucket_reso=(width, height))
107
 
@@ -110,11 +111,6 @@ def generate_video(pipe, prompt: str, frame1_path: str, frame2_path: str, guidan
110
  cond_video = torch.from_numpy(cond_video.copy()).permute(0, 3, 1, 2)
111
  cond_video = torch.stack([video_transforms(x) for x in cond_video], dim=0).unsqueeze(0)
112
 
113
- # Initialize pipeline
114
- model_id = "hunyuanvideo-community/HunyuanVideo"
115
- lora_path = hf_hub_download("dashtoon/hunyuan-video-keyframe-control-lora", "i2v.sft") # Replace with the actual LORA path
116
- pipe = construct_video_pipeline(model_id, lora_path)
117
-
118
  with torch.no_grad():
119
  image_or_video = cond_video.to(device="cuda", dtype=pipe.dtype)
120
  image_or_video = image_or_video.permute(0, 2, 1, 3, 4).contiguous() # [B, F, C, H, W] -> [B, C, F, H, W]
@@ -144,54 +140,7 @@ def generate_video(pipe, prompt: str, frame1_path: str, frame2_path: str, guidan
144
  video_bytes = video_file.read()
145
 
146
  return video_bytes
147
- # def generate_video(prompt: str, frame1_url: str, frame2_url: str, guidance_scale: float, num_frames: int, num_inference_steps: int) -> bytes:
148
- # # Load and preprocess frames
149
- # cond_frame1 = Image.open(requests.get(frame1_url, stream=True).raw)
150
- # cond_frame2 = Image.open(requests.get(frame2_url, stream=True).raw)
151
-
152
- # height, width = 720, 1280
153
- # cond_frame1 = resize_image_to_bucket(cond_frame1, bucket_reso=(width, height))
154
- # cond_frame2 = resize_image_to_bucket(cond_frame2, bucket_reso=(width, height))
155
-
156
- # cond_video = np.zeros(shape=(num_frames, height, width, 3))
157
- # cond_video[0], cond_video[-1] = np.array(cond_frame1), np.array(cond_frame2)
158
- # cond_video = torch.from_numpy(cond_video.copy()).permute(0, 3, 1, 2)
159
- # cond_video = torch.stack([video_transforms(x) for x in cond_video], dim=0).unsqueeze(0)
160
-
161
- # # Initialize pipeline
162
- # model_id = "hunyuanvideo-community/HunyuanVideo"
163
- # lora_path = hf_hub_download("dashtoon/hunyuan-video-keyframe-control-lora", "i2v.sft") # Replace with the actual LORA path
164
- # pipe = construct_video_pipeline(model_id, lora_path)
165
-
166
- # with torch.no_grad():
167
- # image_or_video = cond_video.to(device="cuda", dtype=pipe.dtype)
168
- # image_or_video = image_or_video.permute(0, 2, 1, 3, 4).contiguous() # [B, F, C, H, W] -> [B, C, F, H, W]
169
- # cond_latents = pipe.vae.encode(image_or_video).latent_dist.sample()
170
- # cond_latents = cond_latents * pipe.vae.config.scaling_factor
171
- # cond_latents = cond_latents.to(dtype=pipe.dtype)
172
- # assert not torch.any(torch.isnan(cond_latents))
173
-
174
- # # Generate video
175
- # video = call_pipe(
176
- # pipe,
177
- # prompt=prompt,
178
- # num_frames=num_frames,
179
- # num_inference_steps=num_inference_steps,
180
- # image_latents=cond_latents,
181
- # width=width,
182
- # height=height,
183
- # guidance_scale=guidance_scale,
184
- # generator=torch.Generator(device="cuda").manual_seed(0),
185
- # ).frames[0]
186
-
187
- # # Export to video
188
- # video_path = "output.mp4"
189
- # export_to_video(video, video_path, fps=24)
190
-
191
- # with open(video_path, "rb") as video_file:
192
- # video_bytes = video_file.read()
193
-
194
- # return video_bytes
195
 
196
  @torch.inference_mode()
197
  def call_pipe(
@@ -199,9 +148,9 @@ def call_pipe(
199
  prompt: Union[str, List[str]] = None,
200
  prompt_2: Union[str, List[str]] = None,
201
  height: int = 720,
202
- width: int = 720,
203
  num_frames: int = 129,
204
- num_inference_steps: int = 30,
205
  sigmas: Optional[List[float]] = None,
206
  guidance_scale: float = 6.0,
207
  num_videos_per_prompt: Optional[int] = 1,
@@ -268,7 +217,7 @@ def call_pipe(
268
 
269
  # 4. Prepare timesteps
270
  sigmas = np.linspace(1.0, 0.0, num_inference_steps + 1)[:-1] if sigmas is None else sigmas
271
- timesteps, num_inference_steps = retrieve_timesteps(
272
  pipe.scheduler,
273
  num_inference_steps,
274
  device,
@@ -345,11 +294,8 @@ def call_pipe(
345
  return (video,)
346
  return HunyuanVideoPipelineOutput(frames=video)
347
 
 
348
  def main():
349
- model_id = "hunyuanvideo-community/HunyuanVideo"
350
- lora_path = hf_hub_download("dashtoon/hunyuan-video-keyframe-control-lora", "i2v.sft") # Replace with the actual LORA path
351
- pipe = construct_video_pipeline(model_id, lora_path)
352
-
353
  gr.Markdown(
354
  """
355
  - https://i-bacon.bunkr.ru/11b45aa7-630b-4189-996f-a6b37a697786.png
@@ -371,9 +317,7 @@ def main():
371
  outputs = [
372
  gr.Video(label="Generated Video"),
373
  ]
374
- def generate_video_wrapper(*args):
375
- return generate_video(pipe, *args)
376
-
377
  # Create the Gradio interface
378
  iface = gr.Interface(
379
  fn=generate_video_wrapper,
 
96
  pipe.unload_lora_weights()
97
 
98
  return pipe
99
+
100
  def generate_video(pipe, prompt: str, frame1_path: str, frame2_path: str, guidance_scale: float, num_frames: int, num_inference_steps: int) -> bytes:
101
  # Load and preprocess frames
102
  cond_frame1 = Image.open(frame1_path)
103
  cond_frame2 = Image.open(frame2_path)
104
 
105
+ height, width = 720, 1280
106
  cond_frame1 = resize_image_to_bucket(cond_frame1, bucket_reso=(width, height))
107
  cond_frame2 = resize_image_to_bucket(cond_frame2, bucket_reso=(width, height))
108
 
 
111
  cond_video = torch.from_numpy(cond_video.copy()).permute(0, 3, 1, 2)
112
  cond_video = torch.stack([video_transforms(x) for x in cond_video], dim=0).unsqueeze(0)
113
 
 
 
 
 
 
114
  with torch.no_grad():
115
  image_or_video = cond_video.to(device="cuda", dtype=pipe.dtype)
116
  image_or_video = image_or_video.permute(0, 2, 1, 3, 4).contiguous() # [B, F, C, H, W] -> [B, C, F, H, W]
 
140
  video_bytes = video_file.read()
141
 
142
  return video_bytes
143
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
  @torch.inference_mode()
146
  def call_pipe(
 
148
  prompt: Union[str, List[str]] = None,
149
  prompt_2: Union[str, List[str]] = None,
150
  height: int = 720,
151
+ width: int = 1280,
152
  num_frames: int = 129,
153
+ num_inference_steps: int = 50,
154
  sigmas: Optional[List[float]] = None,
155
  guidance_scale: float = 6.0,
156
  num_videos_per_prompt: Optional[int] = 1,
 
217
 
218
  # 4. Prepare timesteps
219
  sigmas = np.linspace(1.0, 0.0, num_inference_steps + 1)[:-1] if sigmas is None else sigmas
220
+ timesteps, num_inference_steps = retrieve_argument timesteps(
221
  pipe.scheduler,
222
  num_inference_steps,
223
  device,
 
294
  return (video,)
295
  return HunyuanVideoPipelineOutput(frames=video)
296
 
297
+
298
  def main():
 
 
 
 
299
  gr.Markdown(
300
  """
301
  - https://i-bacon.bunkr.ru/11b45aa7-630b-4189-996f-a6b37a697786.png
 
317
  outputs = [
318
  gr.Video(label="Generated Video"),
319
  ]
320
+
 
 
321
  # Create the Gradio interface
322
  iface = gr.Interface(
323
  fn=generate_video_wrapper,