Spaces:
Running
on
Zero
Running
on
Zero
Update app.py (#2)
Browse files- Update app.py (8ee2c9ea0139ee192904a9254cb86c15a5ce7781)
app.py
CHANGED
@@ -96,12 +96,13 @@ def construct_video_pipeline(model_id: str, lora_path: str):
|
|
96 |
pipe.unload_lora_weights()
|
97 |
|
98 |
return pipe
|
|
|
99 |
def generate_video(pipe, prompt: str, frame1_path: str, frame2_path: str, guidance_scale: float, num_frames: int, num_inference_steps: int) -> bytes:
|
100 |
# Load and preprocess frames
|
101 |
cond_frame1 = Image.open(frame1_path)
|
102 |
cond_frame2 = Image.open(frame2_path)
|
103 |
|
104 |
-
height, width = 720,
|
105 |
cond_frame1 = resize_image_to_bucket(cond_frame1, bucket_reso=(width, height))
|
106 |
cond_frame2 = resize_image_to_bucket(cond_frame2, bucket_reso=(width, height))
|
107 |
|
@@ -110,11 +111,6 @@ def generate_video(pipe, prompt: str, frame1_path: str, frame2_path: str, guidan
|
|
110 |
cond_video = torch.from_numpy(cond_video.copy()).permute(0, 3, 1, 2)
|
111 |
cond_video = torch.stack([video_transforms(x) for x in cond_video], dim=0).unsqueeze(0)
|
112 |
|
113 |
-
# Initialize pipeline
|
114 |
-
model_id = "hunyuanvideo-community/HunyuanVideo"
|
115 |
-
lora_path = hf_hub_download("dashtoon/hunyuan-video-keyframe-control-lora", "i2v.sft") # Replace with the actual LORA path
|
116 |
-
pipe = construct_video_pipeline(model_id, lora_path)
|
117 |
-
|
118 |
with torch.no_grad():
|
119 |
image_or_video = cond_video.to(device="cuda", dtype=pipe.dtype)
|
120 |
image_or_video = image_or_video.permute(0, 2, 1, 3, 4).contiguous() # [B, F, C, H, W] -> [B, C, F, H, W]
|
@@ -144,54 +140,7 @@ def generate_video(pipe, prompt: str, frame1_path: str, frame2_path: str, guidan
|
|
144 |
video_bytes = video_file.read()
|
145 |
|
146 |
return video_bytes
|
147 |
-
|
148 |
-
# # Load and preprocess frames
|
149 |
-
# cond_frame1 = Image.open(requests.get(frame1_url, stream=True).raw)
|
150 |
-
# cond_frame2 = Image.open(requests.get(frame2_url, stream=True).raw)
|
151 |
-
|
152 |
-
# height, width = 720, 1280
|
153 |
-
# cond_frame1 = resize_image_to_bucket(cond_frame1, bucket_reso=(width, height))
|
154 |
-
# cond_frame2 = resize_image_to_bucket(cond_frame2, bucket_reso=(width, height))
|
155 |
-
|
156 |
-
# cond_video = np.zeros(shape=(num_frames, height, width, 3))
|
157 |
-
# cond_video[0], cond_video[-1] = np.array(cond_frame1), np.array(cond_frame2)
|
158 |
-
# cond_video = torch.from_numpy(cond_video.copy()).permute(0, 3, 1, 2)
|
159 |
-
# cond_video = torch.stack([video_transforms(x) for x in cond_video], dim=0).unsqueeze(0)
|
160 |
-
|
161 |
-
# # Initialize pipeline
|
162 |
-
# model_id = "hunyuanvideo-community/HunyuanVideo"
|
163 |
-
# lora_path = hf_hub_download("dashtoon/hunyuan-video-keyframe-control-lora", "i2v.sft") # Replace with the actual LORA path
|
164 |
-
# pipe = construct_video_pipeline(model_id, lora_path)
|
165 |
-
|
166 |
-
# with torch.no_grad():
|
167 |
-
# image_or_video = cond_video.to(device="cuda", dtype=pipe.dtype)
|
168 |
-
# image_or_video = image_or_video.permute(0, 2, 1, 3, 4).contiguous() # [B, F, C, H, W] -> [B, C, F, H, W]
|
169 |
-
# cond_latents = pipe.vae.encode(image_or_video).latent_dist.sample()
|
170 |
-
# cond_latents = cond_latents * pipe.vae.config.scaling_factor
|
171 |
-
# cond_latents = cond_latents.to(dtype=pipe.dtype)
|
172 |
-
# assert not torch.any(torch.isnan(cond_latents))
|
173 |
-
|
174 |
-
# # Generate video
|
175 |
-
# video = call_pipe(
|
176 |
-
# pipe,
|
177 |
-
# prompt=prompt,
|
178 |
-
# num_frames=num_frames,
|
179 |
-
# num_inference_steps=num_inference_steps,
|
180 |
-
# image_latents=cond_latents,
|
181 |
-
# width=width,
|
182 |
-
# height=height,
|
183 |
-
# guidance_scale=guidance_scale,
|
184 |
-
# generator=torch.Generator(device="cuda").manual_seed(0),
|
185 |
-
# ).frames[0]
|
186 |
-
|
187 |
-
# # Export to video
|
188 |
-
# video_path = "output.mp4"
|
189 |
-
# export_to_video(video, video_path, fps=24)
|
190 |
-
|
191 |
-
# with open(video_path, "rb") as video_file:
|
192 |
-
# video_bytes = video_file.read()
|
193 |
-
|
194 |
-
# return video_bytes
|
195 |
|
196 |
@torch.inference_mode()
|
197 |
def call_pipe(
|
@@ -199,9 +148,9 @@ def call_pipe(
|
|
199 |
prompt: Union[str, List[str]] = None,
|
200 |
prompt_2: Union[str, List[str]] = None,
|
201 |
height: int = 720,
|
202 |
-
width: int =
|
203 |
num_frames: int = 129,
|
204 |
-
num_inference_steps: int =
|
205 |
sigmas: Optional[List[float]] = None,
|
206 |
guidance_scale: float = 6.0,
|
207 |
num_videos_per_prompt: Optional[int] = 1,
|
@@ -268,7 +217,7 @@ def call_pipe(
|
|
268 |
|
269 |
# 4. Prepare timesteps
|
270 |
sigmas = np.linspace(1.0, 0.0, num_inference_steps + 1)[:-1] if sigmas is None else sigmas
|
271 |
-
timesteps, num_inference_steps =
|
272 |
pipe.scheduler,
|
273 |
num_inference_steps,
|
274 |
device,
|
@@ -345,11 +294,8 @@ def call_pipe(
|
|
345 |
return (video,)
|
346 |
return HunyuanVideoPipelineOutput(frames=video)
|
347 |
|
|
|
348 |
def main():
|
349 |
-
model_id = "hunyuanvideo-community/HunyuanVideo"
|
350 |
-
lora_path = hf_hub_download("dashtoon/hunyuan-video-keyframe-control-lora", "i2v.sft") # Replace with the actual LORA path
|
351 |
-
pipe = construct_video_pipeline(model_id, lora_path)
|
352 |
-
|
353 |
gr.Markdown(
|
354 |
"""
|
355 |
- https://i-bacon.bunkr.ru/11b45aa7-630b-4189-996f-a6b37a697786.png
|
@@ -371,9 +317,7 @@ def main():
|
|
371 |
outputs = [
|
372 |
gr.Video(label="Generated Video"),
|
373 |
]
|
374 |
-
|
375 |
-
return generate_video(pipe, *args)
|
376 |
-
|
377 |
# Create the Gradio interface
|
378 |
iface = gr.Interface(
|
379 |
fn=generate_video_wrapper,
|
|
|
96 |
pipe.unload_lora_weights()
|
97 |
|
98 |
return pipe
|
99 |
+
|
100 |
def generate_video(pipe, prompt: str, frame1_path: str, frame2_path: str, guidance_scale: float, num_frames: int, num_inference_steps: int) -> bytes:
|
101 |
# Load and preprocess frames
|
102 |
cond_frame1 = Image.open(frame1_path)
|
103 |
cond_frame2 = Image.open(frame2_path)
|
104 |
|
105 |
+
height, width = 720, 1280
|
106 |
cond_frame1 = resize_image_to_bucket(cond_frame1, bucket_reso=(width, height))
|
107 |
cond_frame2 = resize_image_to_bucket(cond_frame2, bucket_reso=(width, height))
|
108 |
|
|
|
111 |
cond_video = torch.from_numpy(cond_video.copy()).permute(0, 3, 1, 2)
|
112 |
cond_video = torch.stack([video_transforms(x) for x in cond_video], dim=0).unsqueeze(0)
|
113 |
|
|
|
|
|
|
|
|
|
|
|
114 |
with torch.no_grad():
|
115 |
image_or_video = cond_video.to(device="cuda", dtype=pipe.dtype)
|
116 |
image_or_video = image_or_video.permute(0, 2, 1, 3, 4).contiguous() # [B, F, C, H, W] -> [B, C, F, H, W]
|
|
|
140 |
video_bytes = video_file.read()
|
141 |
|
142 |
return video_bytes
|
143 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
|
145 |
@torch.inference_mode()
|
146 |
def call_pipe(
|
|
|
148 |
prompt: Union[str, List[str]] = None,
|
149 |
prompt_2: Union[str, List[str]] = None,
|
150 |
height: int = 720,
|
151 |
+
width: int = 1280,
|
152 |
num_frames: int = 129,
|
153 |
+
num_inference_steps: int = 50,
|
154 |
sigmas: Optional[List[float]] = None,
|
155 |
guidance_scale: float = 6.0,
|
156 |
num_videos_per_prompt: Optional[int] = 1,
|
|
|
217 |
|
218 |
# 4. Prepare timesteps
|
219 |
sigmas = np.linspace(1.0, 0.0, num_inference_steps + 1)[:-1] if sigmas is None else sigmas
|
220 |
+
timesteps, num_inference_steps = retrieve_argument timesteps(
|
221 |
pipe.scheduler,
|
222 |
num_inference_steps,
|
223 |
device,
|
|
|
294 |
return (video,)
|
295 |
return HunyuanVideoPipelineOutput(frames=video)
|
296 |
|
297 |
+
|
298 |
def main():
|
|
|
|
|
|
|
|
|
299 |
gr.Markdown(
|
300 |
"""
|
301 |
- https://i-bacon.bunkr.ru/11b45aa7-630b-4189-996f-a6b37a697786.png
|
|
|
317 |
outputs = [
|
318 |
gr.Video(label="Generated Video"),
|
319 |
]
|
320 |
+
|
|
|
|
|
321 |
# Create the Gradio interface
|
322 |
iface = gr.Interface(
|
323 |
fn=generate_video_wrapper,
|