smoothieAI
/

pipeline_animatediff_context

Model card Files Files and versions Community

smoothieAI commited on Jan 17, 2024

Commit

6092549

verified ·

1 Parent(s): 3f6b453

Update pipeline.py

Browse files

Files changed (1) hide show

pipeline.py +14 -23

pipeline.py CHANGED Viewed

@@ -1044,30 +1044,24 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
                         added_cond_kwargs=added_cond_kwargs,
                     ).sample
-                    # perform guidance
                     if do_classifier_free_guidance:
                         noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
-                        noise_pred_uncond_sum += noise_pred_uncond
-                        noise_pred_text_sum += noise_pred_text
                     # set the step index to the current batch
                     self.scheduler._step_index = i
-                    # if context_start + context_size > num_frames: remove the appended frames from the end of the current_context_latents
-                    # if wrap_count > 0:
-                    #     # add the ending frames from current_context_latents to the start of the latent_sum
-                    #     latent_sum[:, :, 0:wrap_count, :, :] += current_context_latents[:, :, -wrap_count:, :, :]
-                    #     # increase the counter for the ending frames
-                    #     latent_counter[0:wrap_count] += 1
-                    #     # remove the ending frames from current_context_latents
-                    #     current_context_latents = current_context_latents[:, :, :-wrap_count, :, :]
-                    #add the context current_context_latents back to the latent sum starting from the current context start
-                    # latent_sum[:, :, current_context_start : current_context_start + context_size, :, :] += current_context_latents
-                    # add one to the counter for each timestep in the context
-                    latent_counter[current_context_start : current_context_start + context_size] += 1
                 # perform guidance
                 if do_classifier_free_guidance:
                     latent_counter = latent_counter.reshape(1, 1, num_frames, 1, 1)
@@ -1083,12 +1077,9 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
                     progress_bar.update()
                     if callback is not None and i % callback_steps == 0:
                         callback(i, t, None)
-                # latents = latent_sum / latent_counter
                 # shuffle rotate latent images by step places, wrapping around the last 2 to the start
-                latents = torch.cat([latents[:, :, -step:, :, :], latents[:, :, :-step, :, :]], dim=2)
         if output_type == "latent":
             return AnimateDiffPipelineOutput(frames=latents)

                         added_cond_kwargs=added_cond_kwargs,
                     ).sample
+                    # sum the noise predictions for the unconditional and text conditioned noise
                     if do_classifier_free_guidance:
                         noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+                        if wrap_count > 0:
+                            # add the ending frames from noise_pred_uncond to the start of the noise_pred_uncond_sum
+                            noise_pred_uncond_sum[:, :, 0:wrap_count, :, :] += noise_pred_uncond[:, :, -wrap_count:, :, :]
+                            noise_pred_text_sum[:, :, 0:wrap_count, :, :] += noise_pred_text[:, :, -wrap_count:, :, :]
+                            #increase the counter for the ending frames
+                            latent_counter[0:wrap_count] += 1
+                            # remove the ending frames from noise_pred_uncond
+                            noise_pred_uncond = noise_pred_uncond[:, :, :-wrap_count, :, :]
+                            noise_pred_text = noise_pred_text[:, :, :-wrap_count, :, :]
+                        noise_pred_uncond_sum[:, :, current_context_start : current_context_start + context_size, :, :] += noise_pred_uncond
+                        noise_pred_text_sum[:, :, current_context_start : current_context_start + context_size, :, :] += noise_pred_text
                     # set the step index to the current batch
                     self.scheduler._step_index = i
                 # perform guidance
                 if do_classifier_free_guidance:
                     latent_counter = latent_counter.reshape(1, 1, num_frames, 1, 1)
                     progress_bar.update()
                     if callback is not None and i % callback_steps == 0:
                         callback(i, t, None)
                 # shuffle rotate latent images by step places, wrapping around the last 2 to the start
+                latents = torch.cat([current_context_latents[:, :, -step:, :, :], current_context_latents[:, :, :-step, :, :]], dim=2)
         if output_type == "latent":
             return AnimateDiffPipelineOutput(frames=latents)