Update pipeline.py
Browse files- pipeline.py +22 -31
pipeline.py
CHANGED
@@ -602,14 +602,8 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
602 |
p = init_noise_correlation
|
603 |
flattened_latents = torch.flatten(cloned_latents[:, :, i])
|
604 |
prev_flattened_latents = torch.flatten(cloned_latents[:, :, i - 1])
|
605 |
-
correlated_latents = (
|
606 |
-
|
607 |
-
+
|
608 |
-
flattened_latents * math.sqrt(1/(1 + p**2))
|
609 |
-
)
|
610 |
-
cloned_latents[:, :, i] = correlated_latents.reshape(
|
611 |
-
cloned_latents[:, :, i].shape
|
612 |
-
)
|
613 |
|
614 |
return cloned_latents
|
615 |
|
@@ -699,9 +693,7 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
699 |
init_image_strength * (len(self.scheduler.timesteps) - 1)
|
700 |
)
|
701 |
noise = torch.randn_like(init_latents)
|
702 |
-
noise = self.generate_correlated_latents(
|
703 |
-
noise, init_noise_correlation
|
704 |
-
)
|
705 |
|
706 |
# Eric - some black magic here
|
707 |
# We should be only adding the noise at timestep[offset], but I noticed that
|
@@ -924,25 +916,8 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
924 |
# generator,
|
925 |
# latents,
|
926 |
# )
|
927 |
-
|
928 |
-
|
929 |
-
# num_channels_latents,
|
930 |
-
# num_frames,
|
931 |
-
# height,
|
932 |
-
# width,
|
933 |
-
# prompt_embeds.dtype,
|
934 |
-
# device,
|
935 |
-
# generator,
|
936 |
-
# latents,
|
937 |
-
# x_velocity=x_velocity,
|
938 |
-
# y_velocity=y_velocity,
|
939 |
-
# scale_velocity=scale_velocity,
|
940 |
-
# )
|
941 |
-
latents, init_latents = self.prepare_correlated_latents(
|
942 |
-
init_image,
|
943 |
-
init_image_strength,
|
944 |
-
init_noise_correlation,
|
945 |
-
batch_size,
|
946 |
num_channels_latents,
|
947 |
num_frames,
|
948 |
height,
|
@@ -950,8 +925,24 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
950 |
prompt_embeds.dtype,
|
951 |
device,
|
952 |
generator,
|
|
|
|
|
|
|
|
|
953 |
)
|
954 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
955 |
|
956 |
|
957 |
# 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
|
|
|
602 |
p = init_noise_correlation
|
603 |
flattened_latents = torch.flatten(cloned_latents[:, :, i])
|
604 |
prev_flattened_latents = torch.flatten(cloned_latents[:, :, i - 1])
|
605 |
+
correlated_latents = (prev_flattened_latents * p/math.sqrt((1+p**2))+flattened_latents * math.sqrt(1/(1 + p**2)))
|
606 |
+
cloned_latents[:, :, i] = correlated_latents.reshape(cloned_latents[:, :, i].shape)
|
|
|
|
|
|
|
|
|
|
|
|
|
607 |
|
608 |
return cloned_latents
|
609 |
|
|
|
693 |
init_image_strength * (len(self.scheduler.timesteps) - 1)
|
694 |
)
|
695 |
noise = torch.randn_like(init_latents)
|
696 |
+
noise = self.generate_correlated_latents(noise, init_noise_correlation)
|
|
|
|
|
697 |
|
698 |
# Eric - some black magic here
|
699 |
# We should be only adding the noise at timestep[offset], but I noticed that
|
|
|
916 |
# generator,
|
917 |
# latents,
|
918 |
# )
|
919 |
+
latents = self.prepare_motion_latents(
|
920 |
+
batch_size * num_videos_per_prompt,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
921 |
num_channels_latents,
|
922 |
num_frames,
|
923 |
height,
|
|
|
925 |
prompt_embeds.dtype,
|
926 |
device,
|
927 |
generator,
|
928 |
+
latents,
|
929 |
+
x_velocity=x_velocity,
|
930 |
+
y_velocity=y_velocity,
|
931 |
+
scale_velocity=scale_velocity,
|
932 |
)
|
933 |
+
# latents, init_latents = self.prepare_correlated_latents(
|
934 |
+
# init_image,
|
935 |
+
# init_image_strength,
|
936 |
+
# init_noise_correlation,
|
937 |
+
# batch_size,
|
938 |
+
# num_channels_latents,
|
939 |
+
# num_frames,
|
940 |
+
# height,
|
941 |
+
# width,
|
942 |
+
# prompt_embeds.dtype,
|
943 |
+
# device,
|
944 |
+
# generator,
|
945 |
+
# )
|
946 |
|
947 |
|
948 |
# 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
|