Update pipeline.py
Browse files- pipeline.py +5 -6
pipeline.py
CHANGED
@@ -1426,6 +1426,10 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
1426 |
# expand the latents if we are doing classifier free guidance
|
1427 |
latent_model_input = torch.cat([current_context_latents] * 2) if do_classifier_free_guidance else current_context_latents
|
1428 |
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
|
|
|
|
|
|
|
|
|
1429 |
|
1430 |
if self.controlnet != None and i < int(control_end*num_inference_steps):
|
1431 |
|
@@ -1456,11 +1460,6 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
1456 |
control_model_input = control_model_input.reshape(
|
1457 |
(-1, control_model_input.shape[2], control_model_input.shape[3], control_model_input.shape[4])
|
1458 |
)
|
1459 |
-
|
1460 |
-
|
1461 |
-
# get the current prompt index based on the current context position (for blending between multiple prompts)
|
1462 |
-
context_position = current_context_indexes[0] % context_size
|
1463 |
-
current_prompt_index = int(context_position / (context_size / num_prompts))
|
1464 |
|
1465 |
down_block_res_samples, mid_block_res_sample = self.controlnet(
|
1466 |
control_model_input,
|
@@ -1488,7 +1487,7 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
1488 |
noise_pred = self.unet(
|
1489 |
latent_model_input,
|
1490 |
t,
|
1491 |
-
encoder_hidden_states=prompt_embeds,
|
1492 |
cross_attention_kwargs=cross_attention_kwargs,
|
1493 |
added_cond_kwargs=added_cond_kwargs,
|
1494 |
).sample
|
|
|
1426 |
# expand the latents if we are doing classifier free guidance
|
1427 |
latent_model_input = torch.cat([current_context_latents] * 2) if do_classifier_free_guidance else current_context_latents
|
1428 |
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
|
1429 |
+
|
1430 |
+
# get the current prompt index based on the current context position (for blending between multiple prompts)
|
1431 |
+
context_position = current_context_indexes[0] % context_size
|
1432 |
+
current_prompt_index = int(context_position / (context_size / num_prompts))
|
1433 |
|
1434 |
if self.controlnet != None and i < int(control_end*num_inference_steps):
|
1435 |
|
|
|
1460 |
control_model_input = control_model_input.reshape(
|
1461 |
(-1, control_model_input.shape[2], control_model_input.shape[3], control_model_input.shape[4])
|
1462 |
)
|
|
|
|
|
|
|
|
|
|
|
1463 |
|
1464 |
down_block_res_samples, mid_block_res_sample = self.controlnet(
|
1465 |
control_model_input,
|
|
|
1487 |
noise_pred = self.unet(
|
1488 |
latent_model_input,
|
1489 |
t,
|
1490 |
+
encoder_hidden_states=prompt_embeds[current_prompt_index],
|
1491 |
cross_attention_kwargs=cross_attention_kwargs,
|
1492 |
added_cond_kwargs=added_cond_kwargs,
|
1493 |
).sample
|