smoothieAI
/

pipeline_animatediff_context

Model card Files Files and versions Community

smoothieAI commited on Jan 18, 2024

Commit

f34f3f1

verified ·

1 Parent(s): 5cdd2d4

Update pipeline.py

Browse files

Files changed (1) hide show

pipeline.py +51 -7

pipeline.py CHANGED Viewed

@@ -540,6 +540,36 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
         # scale the initial noise by the standard deviation required by the scheduler
         latents = latents * self.scheduler.init_noise_sigma
         return latents
     def prepare_latents_consistent(self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None,smooth_weight=0.5,smooth_steps=3):
         shape = (
@@ -954,6 +984,20 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
                 generator,
                 latents,
             )
         elif(latent_mode == "motion"):
             latents = self.prepare_motion_latents(
                 batch_size * num_videos_per_prompt,
@@ -1022,15 +1066,15 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
                     # Iterate over each index in the context group
                     local_context_size = context_size
                     if timestep <= 1:
-                        local_context_size = context_size * 2
                     for index in range(local_context_size):
                         # if its the first timestep, spread the indexes out evenly over the full frame range, offset by the group index
-                        # if timestep <= 1:
-                        #     step_size = (total_frames // context_size)+2
-                        #     frame_index = ((index * step_size)+group_index)+timestep
-                        # else:
-                        # # Calculate the frame index
-                        frame_index = (group_index * (local_context_size - overlap)) + (offset * timestep) + index
                         # If frame index exceeds total frames, wrap around
                         if frame_index >= total_frames:
                             frame_index %= total_frames

         # scale the initial noise by the standard deviation required by the scheduler
         latents = latents * self.scheduler.init_noise_sigma
         return latents
+    def prepare_latents_same_start(self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None, context_size=16, blend_frames=4):
+        shape = (
+            batch_size,
+            num_channels_latents,
+            num_frames,
+            height // self.vae_scale_factor,
+            width // self.vae_scale_factor,
+        )
+        if isinstance(generator, list) and len(generator) != batch_size:
+            raise ValueError(
+                f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
+                f" size of {batch_size}. Make sure the batch size matches the length of the generators."
+            )
+        if latents is None:
+            latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
+        else:
+            latents = latents.to(device)
+        # make every (context_size-blend_frames) frames have the same noise
+        loop_size = context_size - blend_frames
+        loop_count = num_frames // loop_size
+        for i in range(loop_count):
+            # repeat the first frames noise for i*loop_size frame
+            latents[:, :, i*loop_size, :, :] = latents[:, :, 0, :, :]
+        # scale the initial noise by the standard deviation required by the scheduler
+        latents = latents * self.scheduler.init_noise_sigma
+        return latents
     def prepare_latents_consistent(self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None,smooth_weight=0.5,smooth_steps=3):
         shape = (
                 generator,
                 latents,
             )
+        if(latent_mode == "same_start"):
+            latents = self.prepare_latents_same_start(
+                batch_size * num_videos_per_prompt,
+                num_channels_latents,
+                num_frames,
+                height,
+                width,
+                prompt_embeds.dtype,
+                device,
+                generator,
+                latents,
+                context_size=context_size,
+                blend_frames=overlap,
+            )
         elif(latent_mode == "motion"):
             latents = self.prepare_motion_latents(
                 batch_size * num_videos_per_prompt,
                     # Iterate over each index in the context group
                     local_context_size = context_size
                     if timestep <= 1:
+                        local_context_size = context_size * 1.5
                     for index in range(local_context_size):
                         # if its the first timestep, spread the indexes out evenly over the full frame range, offset by the group index
+                        if timestep <= 1:
+                            step_size = 2
+                            # make the context group stretch
+                        else:
+                            # Calculate the frame index
+                            frame_index = (group_index * (local_context_size - overlap)) + (offset * timestep) + index
                         # If frame index exceeds total frames, wrap around
                         if frame_index >= total_frames:
                             frame_index %= total_frames