smoothieAI
/

pipeline_animatediff_context_controlnet

Model card Files Files and versions Community

smoothieAI commited on Jan 25, 2024

Commit

b04efd0

verified ·

1 Parent(s): e3b7e33

Update pipeline.py

Browse files

Files changed (1) hide show

pipeline.py +56 -10

pipeline.py CHANGED Viewed

@@ -825,6 +825,37 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
             return latents
     @torch.no_grad()
     # @replace_example_docstring(EXAMPLE_DOC_STRING)
     def __call__(
@@ -1028,19 +1059,34 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
                 #     do_classifier_free_guidance=self.do_classifier_free_guidance,
                 #     guess_mode=guess_mode,
                 # )
-                conditioning_frames = self.prepare_control_latents(
-                    num_frames,
-                    conditioning_frames,
-                    num_channels_latents,
-                    height,
-                    width,
-                    prompt_embeds.dtype,
-                    device,
                 )
             elif isinstance(controlnet, MultiControlNetModel):
                 cond_prepared_frames = []
                 for frame_ in conditioning_frames:
-                    prepared_frame = self.prepare_image(
                         image=frame_,
                         width=width,
                         height=height,
@@ -1051,7 +1097,7 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
                         do_classifier_free_guidance=self.do_classifier_free_guidance,
                         guess_mode=guess_mode,
                     )
                     cond_prepared_frames.append(prepared_frame)
                 conditioning_frames = cond_prepared_frames

             return latents
+    # Copied from diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline.prepare_image
+    def prepare_control_frames(
+        self,
+        image,
+        width,
+        height,
+        batch_size,
+        num_images_per_prompt,
+        device,
+        dtype,
+        do_classifier_free_guidance=False,
+        guess_mode=False,
+    ):
+        image = self.control_image_processor.preprocess(image, height=height, width=width).to(dtype=torch.float32)
+        image_batch_size = image.shape[0]
+        if image_batch_size == 1:
+            repeat_by = batch_size
+        else:
+            # image batch size is the same as prompt batch size
+            repeat_by = num_images_per_prompt
+        image = image.repeat_interleave(repeat_by, dim=0)
+        image = image.to(device=device, dtype=dtype)
+        if do_classifier_free_guidance and not guess_mode:
+            image = torch.cat([image] * 2)
+        return image
     @torch.no_grad()
     # @replace_example_docstring(EXAMPLE_DOC_STRING)
     def __call__(
                 #     do_classifier_free_guidance=self.do_classifier_free_guidance,
                 #     guess_mode=guess_mode,
                 # )
+                conditioning_frames = self.prepare_control_frames(
+                    image=frame_,
+                    width=width,
+                    height=height,
+                    batch_size=batch_size * num_videos_per_prompt * num_frames,
+                    num_images_per_prompt=num_videos_per_prompt,
+                    device=device,
+                    dtype=controlnet.dtype,
+                    do_classifier_free_guidance=self.do_classifier_free_guidance,
+                    guess_mode=guess_mode,
                 )
             elif isinstance(controlnet, MultiControlNetModel):
                 cond_prepared_frames = []
                 for frame_ in conditioning_frames:
+                    # prepared_frame = self.prepare_image(
+                    #     image=frame_,
+                    #     width=width,
+                    #     height=height,
+                    #     batch_size=batch_size * num_videos_per_prompt * num_frames,
+                    #     num_images_per_prompt=num_videos_per_prompt,
+                    #     device=device,
+                    #     dtype=controlnet.dtype,
+                    #     do_classifier_free_guidance=self.do_classifier_free_guidance,
+                    #     guess_mode=guess_mode,
+                    # )
+                    prepared_frame = self.prepare_control_frames(
                         image=frame_,
                         width=width,
                         height=height,
                         do_classifier_free_guidance=self.do_classifier_free_guidance,
                         guess_mode=guess_mode,
                     )
                     cond_prepared_frames.append(prepared_frame)
                 conditioning_frames = cond_prepared_frames