smoothieAI commited on
Commit
93c3c38
·
verified ·
1 Parent(s): b06bdc3

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +7 -13
pipeline.py CHANGED
@@ -1139,8 +1139,6 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
1139
  else:
1140
  batch_size = 1
1141
 
1142
- # print promtp embed shape
1143
- print(prompt_embeds.shape)
1144
 
1145
  device = self._execution_device
1146
 
@@ -1176,9 +1174,6 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
1176
  lora_scale=text_encoder_lora_scale,
1177
  clip_skip=clip_skip,
1178
  )
1179
- # print promtp embed shape
1180
- print("prompt_embeds shape after encoding")
1181
- print(prompt_embeds.shape)
1182
 
1183
  # For classifier free guidance, we need to do two forward passes.
1184
  # Here we concatenate the unconditional and text embeddings into a single batch
@@ -1187,9 +1182,6 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
1187
  # concatenate negative prompt embeddings with prompt embeddings on a new dimension after the first batch dimension
1188
  prompt_embeds = torch.stack([negative_prompt_embeds, prompt_embeds], dim=1)
1189
 
1190
- print("prompt_embeds shape after stacking")
1191
- print(prompt_embeds.shape)
1192
-
1193
  if ip_adapter_image is not None:
1194
  output_hidden_state = False if isinstance(self.unet.encoder_hid_proj, ImageProjection) else True
1195
  image_embeds, negative_image_embeds = self.encode_image(
@@ -1433,6 +1425,12 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
1433
  # get the current prompt index based on the current context position (for blending between multiple prompts)
1434
  context_position = current_context_indexes[0] % context_size
1435
  current_prompt_index = int(context_position / (context_size / num_prompts))
 
 
 
 
 
 
1436
 
1437
  if self.controlnet != None and i < int(control_end*num_inference_steps):
1438
 
@@ -1467,7 +1465,7 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
1467
  down_block_res_samples, mid_block_res_sample = self.controlnet(
1468
  control_model_input,
1469
  t,
1470
- encoder_hidden_states=controlnet_prompt_embeds,
1471
  controlnet_cond=current_context_conditioning_frames,
1472
  conditioning_scale=cond_scale,
1473
  guess_mode=guess_mode,
@@ -1486,10 +1484,6 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
1486
  ).sample
1487
 
1488
  else:
1489
- # predict the noise residual without contorlnet
1490
- # print current context embeding shape
1491
- print("current context embeding shape")
1492
- print(prompt_embeds[current_prompt_index].shape)
1493
  noise_pred = self.unet(
1494
  latent_model_input,
1495
  t,
 
1139
  else:
1140
  batch_size = 1
1141
 
 
 
1142
 
1143
  device = self._execution_device
1144
 
 
1174
  lora_scale=text_encoder_lora_scale,
1175
  clip_skip=clip_skip,
1176
  )
 
 
 
1177
 
1178
  # For classifier free guidance, we need to do two forward passes.
1179
  # Here we concatenate the unconditional and text embeddings into a single batch
 
1182
  # concatenate negative prompt embeddings with prompt embeddings on a new dimension after the first batch dimension
1183
  prompt_embeds = torch.stack([negative_prompt_embeds, prompt_embeds], dim=1)
1184
 
 
 
 
1185
  if ip_adapter_image is not None:
1186
  output_hidden_state = False if isinstance(self.unet.encoder_hid_proj, ImageProjection) else True
1187
  image_embeds, negative_image_embeds = self.encode_image(
 
1425
  # get the current prompt index based on the current context position (for blending between multiple prompts)
1426
  context_position = current_context_indexes[0] % context_size
1427
  current_prompt_index = int(context_position / (context_size / num_prompts))
1428
+
1429
+ print("current_prompt_index", current_prompt_index)
1430
+ print("current prompt embed shape", prompt_embeds[current_prompt_index].shape)
1431
+ # print min and max values of the current prompt embed
1432
+ print("min", torch.min(prompt_embeds[current_prompt_index]))
1433
+ print("max", torch.max(prompt_embeds[current_prompt_index]))
1434
 
1435
  if self.controlnet != None and i < int(control_end*num_inference_steps):
1436
 
 
1465
  down_block_res_samples, mid_block_res_sample = self.controlnet(
1466
  control_model_input,
1467
  t,
1468
+ encoder_hidden_states=controlnet_prompt_embeds[current_prompt_index],
1469
  controlnet_cond=current_context_conditioning_frames,
1470
  conditioning_scale=cond_scale,
1471
  guess_mode=guess_mode,
 
1484
  ).sample
1485
 
1486
  else:
 
 
 
 
1487
  noise_pred = self.unet(
1488
  latent_model_input,
1489
  t,