smoothieAI
/

pipeline_animatediff_context

Model card Files Files and versions Community

smoothieAI commited on Jan 17, 2024

Commit

e37cd62

verified ·

1 Parent(s): 7bb34ef

Update pipeline.py

Browse files

Files changed (1) hide show

pipeline.py +50 -0

pipeline.py CHANGED Viewed

@@ -543,6 +543,44 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
         latents = latents * self.scheduler.init_noise_sigma
         return latents
     def prepare_motion_latents(self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator,
                         latents=None, x_velocity=0, y_velocity=0, scale_velocity=0):
         shape = (
@@ -947,6 +985,18 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
                 device,
                 generator,
             )
         # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline

         latents = latents * self.scheduler.init_noise_sigma
         return latents
+    def prepare_latents_consistent(
+        self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
+    ):
+        shape = (
+            batch_size,
+            num_channels_latents,
+            num_frames,
+            height // self.vae_scale_factor,
+            width // self.vae_scale_factor,
+        )
+        if isinstance(generator, list) and len(generator) != batch_size:
+            raise ValueError(
+                f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
+                f" size of {batch_size}. Make sure the batch size matches the length of the generators."
+            )
+        if latents is None:
+            latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
+            # blend each frame with the surrounding N frames making sure to wrap around at the end
+            smooth_steps = 3
+            for i in range(num_frames):
+                blended_latent = torch.zeros_like(latents[:, :, i])
+                for s in range(-smooth_steps, smooth_steps + 1):
+                    if s == 0:
+                        continue
+                    frame_index = (i + s) % num_frames
+                    weight = (smooth_steps - abs(s)) / smooth_steps
+                    blended_latent += latents[:, :, frame_index] * weight
+                latents[:, :, i] = blended_latent / (2 * smooth_steps)
+        else:
+            latents = latents.to(device)
+        # scale the initial noise by the standard deviation required by the scheduler
+        latents = latents * self.scheduler.init_noise_sigma
+        return latents
     def prepare_motion_latents(self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator,
                         latents=None, x_velocity=0, y_velocity=0, scale_velocity=0):
         shape = (
                 device,
                 generator,
             )
+        elif(latent_mode == "consistent"):
+            latents = self.prepare_latents_consistent(
+                batch_size * num_videos_per_prompt,
+                num_channels_latents,
+                num_frames,
+                height,
+                width,
+                prompt_embeds.dtype,
+                device,
+                generator,
+                latents,
+            )
         # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline