Update pipeline.py
Browse files- pipeline.py +7 -2
pipeline.py
CHANGED
@@ -1407,6 +1407,8 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
1407 |
num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
|
1408 |
with self.progress_bar(total=len(timesteps)) as progress_bar:
|
1409 |
for i, t in enumerate(timesteps):
|
|
|
|
|
1410 |
noise_pred_uncond_sum = torch.zeros_like(latents).to(device).to(dtype=torch.float16)
|
1411 |
noise_pred_text_sum = torch.zeros_like(latents).to(device).to(dtype=torch.float16)
|
1412 |
latent_counter = torch.zeros(num_frames).to(device).to(dtype=torch.float16)
|
@@ -1424,7 +1426,7 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
1424 |
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
|
1425 |
|
1426 |
|
1427 |
-
if self.controlnet != None
|
1428 |
contorl_start = time.time()
|
1429 |
|
1430 |
current_context_conditioning_frames = conditioning_frames[current_context_indexes, :, :, :]
|
@@ -1467,7 +1469,7 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
1467 |
)
|
1468 |
print("controlnet time", time.time() - contorl_start)
|
1469 |
|
1470 |
-
|
1471 |
# predict the noise residual with the added controlnet residuals
|
1472 |
noise_pred = self.unet(
|
1473 |
latent_model_input,
|
@@ -1478,8 +1480,10 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
1478 |
down_block_additional_residuals=down_block_res_samples,
|
1479 |
mid_block_additional_residual=mid_block_res_sample,
|
1480 |
).sample
|
|
|
1481 |
|
1482 |
else:
|
|
|
1483 |
# predict the noise residual without contorlnet
|
1484 |
noise_pred = self.unet(
|
1485 |
latent_model_input,
|
@@ -1488,6 +1492,7 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
1488 |
cross_attention_kwargs=cross_attention_kwargs,
|
1489 |
added_cond_kwargs=added_cond_kwargs,
|
1490 |
).sample
|
|
|
1491 |
|
1492 |
# sum the noise predictions for the unconditional and text conditioned noise
|
1493 |
if do_classifier_free_guidance:
|
|
|
1407 |
num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
|
1408 |
with self.progress_bar(total=len(timesteps)) as progress_bar:
|
1409 |
for i, t in enumerate(timesteps):
|
1410 |
+
print("i", i)
|
1411 |
+
print("t", t)
|
1412 |
noise_pred_uncond_sum = torch.zeros_like(latents).to(device).to(dtype=torch.float16)
|
1413 |
noise_pred_text_sum = torch.zeros_like(latents).to(device).to(dtype=torch.float16)
|
1414 |
latent_counter = torch.zeros(num_frames).to(device).to(dtype=torch.float16)
|
|
|
1426 |
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
|
1427 |
|
1428 |
|
1429 |
+
if self.controlnet != None and i < 4:
|
1430 |
contorl_start = time.time()
|
1431 |
|
1432 |
current_context_conditioning_frames = conditioning_frames[current_context_indexes, :, :, :]
|
|
|
1469 |
)
|
1470 |
print("controlnet time", time.time() - contorl_start)
|
1471 |
|
1472 |
+
unet_start = time.time()
|
1473 |
# predict the noise residual with the added controlnet residuals
|
1474 |
noise_pred = self.unet(
|
1475 |
latent_model_input,
|
|
|
1480 |
down_block_additional_residuals=down_block_res_samples,
|
1481 |
mid_block_additional_residual=mid_block_res_sample,
|
1482 |
).sample
|
1483 |
+
print("unet time", time.time() - unet_start)
|
1484 |
|
1485 |
else:
|
1486 |
+
unet_start = time.time()
|
1487 |
# predict the noise residual without contorlnet
|
1488 |
noise_pred = self.unet(
|
1489 |
latent_model_input,
|
|
|
1492 |
cross_attention_kwargs=cross_attention_kwargs,
|
1493 |
added_cond_kwargs=added_cond_kwargs,
|
1494 |
).sample
|
1495 |
+
print("unet time", time.time() - unet_start)
|
1496 |
|
1497 |
# sum the noise predictions for the unconditional and text conditioned noise
|
1498 |
if do_classifier_free_guidance:
|