Update pipeline.py
Browse files- pipeline.py +7 -3
pipeline.py
CHANGED
@@ -843,6 +843,7 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
843 |
):
|
844 |
image = self.control_image_processor.preprocess(image, height=height, width=width).to(dtype=torch.float32)
|
845 |
image_batch_size = image.shape[0]
|
|
|
846 |
|
847 |
if image_batch_size == 1:
|
848 |
repeat_by = batch_size
|
@@ -856,6 +857,8 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
856 |
|
857 |
if do_classifier_free_guidance and not guess_mode:
|
858 |
image = torch.cat([image] * 2)
|
|
|
|
|
859 |
|
860 |
return image
|
861 |
|
@@ -1266,6 +1269,7 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
1266 |
else:
|
1267 |
# select the relevent context from the conditioning frames of shape (frame_number, channel, height, width)
|
1268 |
current_context_conditioning_frames = conditioning_frames[current_context_indexes, :, :, :]
|
|
|
1269 |
else:
|
1270 |
current_context_conditioning_frames = None
|
1271 |
|
@@ -1300,9 +1304,9 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
1300 |
)
|
1301 |
|
1302 |
down_block_res_samples, mid_block_res_sample = self.controlnet(
|
1303 |
-
control_model_input
|
1304 |
-
t
|
1305 |
-
encoder_hidden_states=controlnet_prompt_embeds
|
1306 |
controlnet_cond=current_context_conditioning_frames,
|
1307 |
conditioning_scale=cond_scale,
|
1308 |
guess_mode=guess_mode,
|
|
|
843 |
):
|
844 |
image = self.control_image_processor.preprocess(image, height=height, width=width).to(dtype=torch.float32)
|
845 |
image_batch_size = image.shape[0]
|
846 |
+
print("prepared control image_batch_size", image_batch_size)
|
847 |
|
848 |
if image_batch_size == 1:
|
849 |
repeat_by = batch_size
|
|
|
857 |
|
858 |
if do_classifier_free_guidance and not guess_mode:
|
859 |
image = torch.cat([image] * 2)
|
860 |
+
|
861 |
+
print("prepared control image_batch_size", image.shape)
|
862 |
|
863 |
return image
|
864 |
|
|
|
1269 |
else:
|
1270 |
# select the relevent context from the conditioning frames of shape (frame_number, channel, height, width)
|
1271 |
current_context_conditioning_frames = conditioning_frames[current_context_indexes, :, :, :]
|
1272 |
+
current_context_conditioning_frames = current_context_conditioning_frames.to(device)
|
1273 |
else:
|
1274 |
current_context_conditioning_frames = None
|
1275 |
|
|
|
1304 |
)
|
1305 |
|
1306 |
down_block_res_samples, mid_block_res_sample = self.controlnet(
|
1307 |
+
control_model_input,
|
1308 |
+
t,
|
1309 |
+
encoder_hidden_states=controlnet_prompt_embeds,
|
1310 |
controlnet_cond=current_context_conditioning_frames,
|
1311 |
conditioning_scale=cond_scale,
|
1312 |
guess_mode=guess_mode,
|