Update pipeline.py
Browse files- pipeline.py +50 -0
pipeline.py
CHANGED
@@ -543,6 +543,44 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
543 |
latents = latents * self.scheduler.init_noise_sigma
|
544 |
return latents
|
545 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
546 |
def prepare_motion_latents(self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator,
|
547 |
latents=None, x_velocity=0, y_velocity=0, scale_velocity=0):
|
548 |
shape = (
|
@@ -947,6 +985,18 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
947 |
device,
|
948 |
generator,
|
949 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
950 |
|
951 |
|
952 |
# 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
|
|
|
543 |
latents = latents * self.scheduler.init_noise_sigma
|
544 |
return latents
|
545 |
|
546 |
+
def prepare_latents_consistent(
|
547 |
+
self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
|
548 |
+
):
|
549 |
+
shape = (
|
550 |
+
batch_size,
|
551 |
+
num_channels_latents,
|
552 |
+
num_frames,
|
553 |
+
height // self.vae_scale_factor,
|
554 |
+
width // self.vae_scale_factor,
|
555 |
+
)
|
556 |
+
if isinstance(generator, list) and len(generator) != batch_size:
|
557 |
+
raise ValueError(
|
558 |
+
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
559 |
+
f" size of {batch_size}. Make sure the batch size matches the length of the generators."
|
560 |
+
)
|
561 |
+
|
562 |
+
if latents is None:
|
563 |
+
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
564 |
+
|
565 |
+
# blend each frame with the surrounding N frames making sure to wrap around at the end
|
566 |
+
smooth_steps = 3
|
567 |
+
for i in range(num_frames):
|
568 |
+
blended_latent = torch.zeros_like(latents[:, :, i])
|
569 |
+
for s in range(-smooth_steps, smooth_steps + 1):
|
570 |
+
if s == 0:
|
571 |
+
continue
|
572 |
+
frame_index = (i + s) % num_frames
|
573 |
+
weight = (smooth_steps - abs(s)) / smooth_steps
|
574 |
+
blended_latent += latents[:, :, frame_index] * weight
|
575 |
+
latents[:, :, i] = blended_latent / (2 * smooth_steps)
|
576 |
+
|
577 |
+
else:
|
578 |
+
latents = latents.to(device)
|
579 |
+
|
580 |
+
# scale the initial noise by the standard deviation required by the scheduler
|
581 |
+
latents = latents * self.scheduler.init_noise_sigma
|
582 |
+
return latents
|
583 |
+
|
584 |
def prepare_motion_latents(self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator,
|
585 |
latents=None, x_velocity=0, y_velocity=0, scale_velocity=0):
|
586 |
shape = (
|
|
|
985 |
device,
|
986 |
generator,
|
987 |
)
|
988 |
+
elif(latent_mode == "consistent"):
|
989 |
+
latents = self.prepare_latents_consistent(
|
990 |
+
batch_size * num_videos_per_prompt,
|
991 |
+
num_channels_latents,
|
992 |
+
num_frames,
|
993 |
+
height,
|
994 |
+
width,
|
995 |
+
prompt_embeds.dtype,
|
996 |
+
device,
|
997 |
+
generator,
|
998 |
+
latents,
|
999 |
+
)
|
1000 |
|
1001 |
|
1002 |
# 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
|