smoothieAI
/

pipeline_animatediff_context

Model card Files Files and versions

xet

Community

smoothieAI commited on Jan 16, 2024

Commit

7bb34ef

verified ·

1 Parent(s): dd283e2

Update pipeline.py

Browse files

Files changed (1) hide show

pipeline.py +0 -13

pipeline.py CHANGED Viewed

@@ -956,20 +956,16 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
         # divide the initial latents into context groups
         num_context_groups = num_frames // (context_size-overlap)
-        print(f"Num context groups: {num_context_groups}")
         # Denoising loop
         num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
         with self.progress_bar(total=len(timesteps)) as progress_bar:
             for i, t in enumerate(timesteps):
-                print(f"Step: {i}")
-                print(f"Timestep: {t}")
                 latent_sum = torch.zeros_like(latents).to(device).to(dtype=torch.float16)
                 latent_counter = torch.zeros(num_frames).to(device).to(dtype=torch.float16)
                 # foreach context group seperately denoise the current timestep
                 for context_group in range(num_context_groups):
-                    print(f"Context group: {context_group}")
                     # calculate to current indexes, considering overlap
                     if context_group == 0:current_context_start = 0
                     else:current_context_start = context_group * (context_size - overlap)
@@ -978,7 +974,6 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
                     current_context_latents = latents[:, :, current_context_start : current_context_start + context_size, :, :]
                     wrap_count = max(current_context_start + context_size - num_frames, 0)
-                    print(f"Wrap count: {wrap_count}")
                     # if context_start + context_size > num_frames: append the remaining frames from the start of the latents
                     if wrap_count > 0:
@@ -1009,14 +1004,6 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
                     # compute the previous noisy sample x_t -> x_t-1
                     current_context_latents = self.scheduler.step(noise_pred, t, current_context_latents, **extra_step_kwargs).prev_sample
-                    # remove the appended frames from the end of the current_context_latents
-                    # if wrap_count > 0:
-                    #     # remove the ending frames from current_context_latents
-                    #     current_context_latents = current_context_latents[:, :, :-wrap_count, :, :]
-                    #     # remove the ending frames from noise_pred
-                    #     noise_pred = noise_pred[:, :, :-wrap_count, :, :]
-                    #     # print the shape of the current_context_latents and noise_pred
                     # if context_start + context_size > num_frames: remove the appended frames from the end of the current_context_latents
                     if wrap_count > 0:
                         # add the ending frames from current_context_latents to the start of the latent_sum

         # divide the initial latents into context groups
         num_context_groups = num_frames // (context_size-overlap)
         # Denoising loop
         num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
         with self.progress_bar(total=len(timesteps)) as progress_bar:
             for i, t in enumerate(timesteps):
                 latent_sum = torch.zeros_like(latents).to(device).to(dtype=torch.float16)
                 latent_counter = torch.zeros(num_frames).to(device).to(dtype=torch.float16)
                 # foreach context group seperately denoise the current timestep
                 for context_group in range(num_context_groups):
                     # calculate to current indexes, considering overlap
                     if context_group == 0:current_context_start = 0
                     else:current_context_start = context_group * (context_size - overlap)
                     current_context_latents = latents[:, :, current_context_start : current_context_start + context_size, :, :]
                     wrap_count = max(current_context_start + context_size - num_frames, 0)
                     # if context_start + context_size > num_frames: append the remaining frames from the start of the latents
                     if wrap_count > 0:
                     # compute the previous noisy sample x_t -> x_t-1
                     current_context_latents = self.scheduler.step(noise_pred, t, current_context_latents, **extra_step_kwargs).prev_sample
                     # if context_start + context_size > num_frames: remove the appended frames from the end of the current_context_latents
                     if wrap_count > 0:
                         # add the ending frames from current_context_latents to the start of the latent_sum