smoothieAI
/

pipeline_animatediff_context_controlnet

Model card Files Files and versions Community

smoothieAI commited on Jan 25, 2024

Commit

d51def2

·

verified ·

1 Parent(s): c05212b

Update pipeline.py

Files changed (1) hide show

pipeline.py +32 -11

pipeline.py CHANGED Viewed

@@ -812,7 +812,19 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
         latents = latents.to(device)
         return latents, init_latents
     @torch.no_grad()
     # @replace_example_docstring(EXAMPLE_DOC_STRING)
     def __call__(
@@ -1005,16 +1017,25 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
         if self.controlnet != None:
             if isinstance(controlnet, ControlNetModel):
-                conditioning_frames = self.prepare_image(
-                    image=conditioning_frames,
-                    width=width,
-                    height=height,
-                    batch_size=batch_size * num_videos_per_prompt * num_frames,
-                    num_images_per_prompt=num_videos_per_prompt,
-                    device=device,
-                    dtype=controlnet.dtype,
-                    do_classifier_free_guidance=self.do_classifier_free_guidance,
-                    guess_mode=guess_mode,
                 )
             elif isinstance(controlnet, MultiControlNetModel):
                 cond_prepared_frames = []

         latents = latents.to(device)
         return latents, init_latents
+    def prepare_control_latents(self, batch_size, contorl_frames, num_channels_latents, num_frames, height, width, dtype, device):
+            shape = (
+                num_frames,
+                num_channels_latents,
+                height // self.vae_scale_factor,
+                width // self.vae_scale_factor,
+            )
+            # convert input control image array to latents tensor array
+            latents = torch.zeros(shape, dtype=dtype, device=device)
+            return latents
     @torch.no_grad()
     # @replace_example_docstring(EXAMPLE_DOC_STRING)
     def __call__(
         if self.controlnet != None:
             if isinstance(controlnet, ControlNetModel):
+                # conditioning_frames = self.prepare_image(
+                #     image=conditioning_frames,
+                #     width=width,
+                #     height=height,
+                #     batch_size=batch_size * num_videos_per_prompt * num_frames,
+                #     num_images_per_prompt=num_videos_per_prompt,
+                #     device=device,
+                #     dtype=controlnet.dtype,
+                #     do_classifier_free_guidance=self.do_classifier_free_guidance,
+                #     guess_mode=guess_mode,
+                # )
+                conditioning_frames = self.prepare_control_latents(
+                    num_frames,
+                    conditioning_frames,
+                    num_channels_latents,
+                    height,
+                    width,
+                    prompt_embeds.dtype,
+                    device,
                 )
             elif isinstance(controlnet, MultiControlNetModel):
                 cond_prepared_frames = []