smoothieAI
/

pipeline_animatediff_context

Model card Files Files and versions Community

smoothieAI commited on Jan 15, 2024

Commit

2c12498

verified ·

1 Parent(s): 1f86862

Update pipeline.py

Browse files

Files changed (1) hide show

pipeline.py +22 -31

pipeline.py CHANGED Viewed

@@ -602,14 +602,8 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
             p = init_noise_correlation
             flattened_latents = torch.flatten(cloned_latents[:, :, i])
             prev_flattened_latents = torch.flatten(cloned_latents[:, :, i - 1])
-            correlated_latents = (
-                prev_flattened_latents * p/math.sqrt((1+p**2))
-                +
-                flattened_latents * math.sqrt(1/(1 + p**2))
-            )
-            cloned_latents[:, :, i] = correlated_latents.reshape(
-                cloned_latents[:, :, i].shape
-            )
         return cloned_latents
@@ -699,9 +693,7 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
                         init_image_strength * (len(self.scheduler.timesteps) - 1)
                     )
                     noise = torch.randn_like(init_latents)
-                    noise = self.generate_correlated_latents(
-                        noise, init_noise_correlation
-                    )
                     # Eric - some black magic here
                     # We should be only adding the noise at timestep[offset], but I noticed that
@@ -924,25 +916,8 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
         #     generator,
         #     latents,
         # )
-        # latents = self.prepare_motion_latents(
-        #     batch_size * num_videos_per_prompt,
-        #     num_channels_latents,
-        #     num_frames,
-        #     height,
-        #     width,
-        #     prompt_embeds.dtype,
-        #     device,
-        #     generator,
-        #     latents,
-        #     x_velocity=x_velocity,
-        #     y_velocity=y_velocity,
-        #     scale_velocity=scale_velocity,
-        # )
-        latents, init_latents = self.prepare_correlated_latents(
-            init_image,
-            init_image_strength,
-            init_noise_correlation,
-            batch_size,
             num_channels_latents,
             num_frames,
             height,
@@ -950,8 +925,24 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
             prompt_embeds.dtype,
             device,
             generator,
         )
-        print(type(latents), hasattr(latents, 'shape') and latents.shape)
         # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline

             p = init_noise_correlation
             flattened_latents = torch.flatten(cloned_latents[:, :, i])
             prev_flattened_latents = torch.flatten(cloned_latents[:, :, i - 1])
+            correlated_latents = (prev_flattened_latents * p/math.sqrt((1+p**2))+flattened_latents * math.sqrt(1/(1 + p**2)))
+            cloned_latents[:, :, i] = correlated_latents.reshape(cloned_latents[:, :, i].shape)
         return cloned_latents
                         init_image_strength * (len(self.scheduler.timesteps) - 1)
                     )
                     noise = torch.randn_like(init_latents)
+                    noise = self.generate_correlated_latents(noise, init_noise_correlation)
                     # Eric - some black magic here
                     # We should be only adding the noise at timestep[offset], but I noticed that
         #     generator,
         #     latents,
         # )
+        latents = self.prepare_motion_latents(
+            batch_size * num_videos_per_prompt,
             num_channels_latents,
             num_frames,
             height,
             prompt_embeds.dtype,
             device,
             generator,
+            latents,
+            x_velocity=x_velocity,
+            y_velocity=y_velocity,
+            scale_velocity=scale_velocity,
         )
+        # latents, init_latents = self.prepare_correlated_latents(
+        #     init_image,
+        #     init_image_strength,
+        #     init_noise_correlation,
+        #     batch_size,
+        #     num_channels_latents,
+        #     num_frames,
+        #     height,
+        #     width,
+        #     prompt_embeds.dtype,
+        #     device,
+        #     generator,
+        # )
         # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline