smoothieAI
/

pipeline_animatediff_context

Model card Files Files and versions

xet

Community

smoothieAI commited on Jan 14, 2024

Commit

663e8f9

verified ·

1 Parent(s): 0359c79

Update pipeline.py

Browse files

Files changed (1) hide show

pipeline.py +59 -1

pipeline.py CHANGED Viewed

@@ -539,6 +539,51 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
         latents = latents * self.scheduler.init_noise_sigma
         return latents
     @torch.no_grad()
     # @replace_example_docstring(EXAMPLE_DOC_STRING)
     def __call__(
@@ -695,7 +740,18 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
         # 5. Prepare latent variables
         num_channels_latents = self.unet.config.in_channels
-        latents = self.prepare_latents(
             batch_size * num_videos_per_prompt,
             num_channels_latents,
             num_frames,
@@ -705,6 +761,8 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
             device,
             generator,
             latents,
         )
         # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline

         latents = latents * self.scheduler.init_noise_sigma
         return latents
+    def prepare_motion_latents(self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator,
+                        latents=None, x_velocity=0, y_velocity=0, scale_velocity=0):
+        shape = (
+            batch_size,
+            num_channels_latents,
+            num_frames,
+            height // self.vae_scale_factor,
+            width // self.vae_scale_factor,
+        )
+        if isinstance(generator, list) and len(generator) != batch_size:
+            raise ValueError(
+                f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
+                f" size of {batch_size}. Make sure the batch size matches the length of the generators."
+            )
+        if latents is None:
+            latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
+        else:
+            latents = latents.to(device)
+        # scale the initial noise by the standard deviation required by the scheduler
+        latents = latents * self.scheduler.init_noise_sigma
+        # Apply motion and scale dynamics
+        for frame in range(num_frames):
+            x_offset = frame * x_velocity
+            y_offset = frame * y_velocity
+            scale_factor = 1 + frame * scale_velocity
+            # Apply offsets
+            latents[:, :, frame] = torch.roll(latents[:, :, frame], shifts=x_offset, dims=3)  # x direction
+            latents[:, :, frame] = torch.roll(latents[:, :, frame], shifts=y_offset, dims=2)  # y direction
+            # Apply scaling - This is a simple approach and might not be ideal for all applications
+            if scale_factor != 1:
+                scaled_size = (
+                    int(latents.shape[3] * scale_factor),
+                    int(latents.shape[4] * scale_factor)
+                )
+                latents[:, :, frame] = torch.nn.functional.interpolate(
+                    latents[:, :, frame].unsqueeze(0), size=scaled_size, mode='bilinear', align_corners=False
+                ).squeeze(0)
+        return latents
     @torch.no_grad()
     # @replace_example_docstring(EXAMPLE_DOC_STRING)
     def __call__(
         # 5. Prepare latent variables
         num_channels_latents = self.unet.config.in_channels
+        # latents = self.prepare_latents(
+        #     batch_size * num_videos_per_prompt,
+        #     num_channels_latents,
+        #     num_frames,
+        #     height,
+        #     width,
+        #     prompt_embeds.dtype,
+        #     device,
+        #     generator,
+        #     latents,
+        # )
+        latents = self.prepare_motion_latents(
             batch_size * num_videos_per_prompt,
             num_channels_latents,
             num_frames,
             device,
             generator,
             latents,
+            x_velocity=0.1,
+            y_velocity=0.1,
         )
         # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline