smoothieAI commited on
Commit
be549e2
·
verified ·
1 Parent(s): 2d17ea5

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +12 -8
pipeline.py CHANGED
@@ -1015,7 +1015,7 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
1015
  latents: Optional[torch.FloatTensor] = None,
1016
  prompt_embeds: Optional[torch.FloatTensor] = None,
1017
  negative_prompt_embeds: Optional[torch.FloatTensor] = None,
1018
- ip_adapter_image: Optional[PipelineImageInput] = None,
1019
  output_type: Optional[str] = "pil",
1020
  output_path: Optional[str] = None,
1021
  return_dict: bool = True,
@@ -1184,11 +1184,14 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
1184
 
1185
  if ip_adapter_image is not None:
1186
  output_hidden_state = False if isinstance(self.unet.encoder_hid_proj, ImageProjection) else True
1187
- image_embeds, negative_image_embeds = self.encode_image(
1188
- ip_adapter_image, device, num_videos_per_prompt, output_hidden_state
1189
- )
1190
- if do_classifier_free_guidance:
1191
- image_embeds = torch.cat([negative_image_embeds, image_embeds])
 
 
 
1192
 
1193
  if self.controlnet != None:
1194
  if isinstance(controlnet, ControlNetModel):
@@ -1352,8 +1355,6 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
1352
  # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
1353
  extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
1354
 
1355
- # 7 Add image embeds for IP-Adapter
1356
- added_cond_kwargs = {"image_embeds": image_embeds} if ip_adapter_image is not None else None
1357
 
1358
  # 7.1 Create tensor stating which controlnets to keep
1359
  if self.controlnet != None:
@@ -1430,6 +1431,9 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
1430
  print("avg", torch.mean(prompt_embeds[current_prompt_index][0]))
1431
  print("max", torch.max(prompt_embeds[current_prompt_index][0]))
1432
 
 
 
 
1433
  if self.controlnet != None and i < int(control_end*num_inference_steps):
1434
 
1435
  current_context_conditioning_frames = conditioning_frames[current_context_indexes, :, :, :]
 
1015
  latents: Optional[torch.FloatTensor] = None,
1016
  prompt_embeds: Optional[torch.FloatTensor] = None,
1017
  negative_prompt_embeds: Optional[torch.FloatTensor] = None,
1018
+ ip_adapter_image: Optional[Union[PipelineImageInput, List[PipelineImageInput]]] = None,
1019
  output_type: Optional[str] = "pil",
1020
  output_path: Optional[str] = None,
1021
  return_dict: bool = True,
 
1184
 
1185
  if ip_adapter_image is not None:
1186
  output_hidden_state = False if isinstance(self.unet.encoder_hid_proj, ImageProjection) else True
1187
+ # foreach ip_adapter_image in ip_adapter_image
1188
+ image_embeds = []
1189
+ # if ip_adapter_image is not list, convert to list
1190
+ ip_adapter_image = [ip_adapter_image] if not isinstance(ip_adapter_image, list) else ip_adapter_image
1191
+ for image in ip_adapter_image:
1192
+ image_embeds, negative_image_embeds = self.encode_image(image, device, num_videos_per_prompt, output_hidden_state)
1193
+ if do_classifier_free_guidance:image_embeds = torch.cat([negative_image_embeds, image_embeds])
1194
+ image_embeds.append(image_embeds)
1195
 
1196
  if self.controlnet != None:
1197
  if isinstance(controlnet, ControlNetModel):
 
1355
  # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
1356
  extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
1357
 
 
 
1358
 
1359
  # 7.1 Create tensor stating which controlnets to keep
1360
  if self.controlnet != None:
 
1431
  print("avg", torch.mean(prompt_embeds[current_prompt_index][0]))
1432
  print("max", torch.max(prompt_embeds[current_prompt_index][0]))
1433
 
1434
+ # 7 Add image embeds for IP-Adapter
1435
+ added_cond_kwargs = {"image_embeds": image_embeds[min(current_prompt_index, len(image_embeds) - 1)]} if ip_adapter_image is not None else None
1436
+
1437
  if self.controlnet != None and i < int(control_end*num_inference_steps):
1438
 
1439
  current_context_conditioning_frames = conditioning_frames[current_context_indexes, :, :, :]