smoothieAI commited on
Commit
f34f3f1
·
verified ·
1 Parent(s): 5cdd2d4

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +51 -7
pipeline.py CHANGED
@@ -540,6 +540,36 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
540
  # scale the initial noise by the standard deviation required by the scheduler
541
  latents = latents * self.scheduler.init_noise_sigma
542
  return latents
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
543
 
544
  def prepare_latents_consistent(self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None,smooth_weight=0.5,smooth_steps=3):
545
  shape = (
@@ -954,6 +984,20 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
954
  generator,
955
  latents,
956
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
957
  elif(latent_mode == "motion"):
958
  latents = self.prepare_motion_latents(
959
  batch_size * num_videos_per_prompt,
@@ -1022,15 +1066,15 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
1022
  # Iterate over each index in the context group
1023
  local_context_size = context_size
1024
  if timestep <= 1:
1025
- local_context_size = context_size * 2
1026
  for index in range(local_context_size):
1027
  # if its the first timestep, spread the indexes out evenly over the full frame range, offset by the group index
1028
- # if timestep <= 1:
1029
- # step_size = (total_frames // context_size)+2
1030
- # frame_index = ((index * step_size)+group_index)+timestep
1031
- # else:
1032
- # # Calculate the frame index
1033
- frame_index = (group_index * (local_context_size - overlap)) + (offset * timestep) + index
1034
  # If frame index exceeds total frames, wrap around
1035
  if frame_index >= total_frames:
1036
  frame_index %= total_frames
 
540
  # scale the initial noise by the standard deviation required by the scheduler
541
  latents = latents * self.scheduler.init_noise_sigma
542
  return latents
543
+
544
+ def prepare_latents_same_start(self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None, context_size=16, blend_frames=4):
545
+ shape = (
546
+ batch_size,
547
+ num_channels_latents,
548
+ num_frames,
549
+ height // self.vae_scale_factor,
550
+ width // self.vae_scale_factor,
551
+ )
552
+ if isinstance(generator, list) and len(generator) != batch_size:
553
+ raise ValueError(
554
+ f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
555
+ f" size of {batch_size}. Make sure the batch size matches the length of the generators."
556
+ )
557
+
558
+ if latents is None:
559
+ latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
560
+ else:
561
+ latents = latents.to(device)
562
+
563
+ # make every (context_size-blend_frames) frames have the same noise
564
+ loop_size = context_size - blend_frames
565
+ loop_count = num_frames // loop_size
566
+ for i in range(loop_count):
567
+ # repeat the first frames noise for i*loop_size frame
568
+ latents[:, :, i*loop_size, :, :] = latents[:, :, 0, :, :]
569
+
570
+ # scale the initial noise by the standard deviation required by the scheduler
571
+ latents = latents * self.scheduler.init_noise_sigma
572
+ return latents
573
 
574
  def prepare_latents_consistent(self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None,smooth_weight=0.5,smooth_steps=3):
575
  shape = (
 
984
  generator,
985
  latents,
986
  )
987
+ if(latent_mode == "same_start"):
988
+ latents = self.prepare_latents_same_start(
989
+ batch_size * num_videos_per_prompt,
990
+ num_channels_latents,
991
+ num_frames,
992
+ height,
993
+ width,
994
+ prompt_embeds.dtype,
995
+ device,
996
+ generator,
997
+ latents,
998
+ context_size=context_size,
999
+ blend_frames=overlap,
1000
+ )
1001
  elif(latent_mode == "motion"):
1002
  latents = self.prepare_motion_latents(
1003
  batch_size * num_videos_per_prompt,
 
1066
  # Iterate over each index in the context group
1067
  local_context_size = context_size
1068
  if timestep <= 1:
1069
+ local_context_size = context_size * 1.5
1070
  for index in range(local_context_size):
1071
  # if its the first timestep, spread the indexes out evenly over the full frame range, offset by the group index
1072
+ if timestep <= 1:
1073
+ step_size = 2
1074
+ # make the context group stretch
1075
+ else:
1076
+ # Calculate the frame index
1077
+ frame_index = (group_index * (local_context_size - overlap)) + (offset * timestep) + index
1078
  # If frame index exceeds total frames, wrap around
1079
  if frame_index >= total_frames:
1080
  frame_index %= total_frames