smoothieAI commited on
Commit
663e8f9
·
verified ·
1 Parent(s): 0359c79

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +59 -1
pipeline.py CHANGED
@@ -539,6 +539,51 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
539
  latents = latents * self.scheduler.init_noise_sigma
540
  return latents
541
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
542
  @torch.no_grad()
543
  # @replace_example_docstring(EXAMPLE_DOC_STRING)
544
  def __call__(
@@ -695,7 +740,18 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
695
 
696
  # 5. Prepare latent variables
697
  num_channels_latents = self.unet.config.in_channels
698
- latents = self.prepare_latents(
 
 
 
 
 
 
 
 
 
 
 
699
  batch_size * num_videos_per_prompt,
700
  num_channels_latents,
701
  num_frames,
@@ -705,6 +761,8 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
705
  device,
706
  generator,
707
  latents,
 
 
708
  )
709
 
710
  # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
 
539
  latents = latents * self.scheduler.init_noise_sigma
540
  return latents
541
 
542
+ def prepare_motion_latents(self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator,
543
+ latents=None, x_velocity=0, y_velocity=0, scale_velocity=0):
544
+ shape = (
545
+ batch_size,
546
+ num_channels_latents,
547
+ num_frames,
548
+ height // self.vae_scale_factor,
549
+ width // self.vae_scale_factor,
550
+ )
551
+ if isinstance(generator, list) and len(generator) != batch_size:
552
+ raise ValueError(
553
+ f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
554
+ f" size of {batch_size}. Make sure the batch size matches the length of the generators."
555
+ )
556
+
557
+ if latents is None:
558
+ latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
559
+ else:
560
+ latents = latents.to(device)
561
+
562
+ # scale the initial noise by the standard deviation required by the scheduler
563
+ latents = latents * self.scheduler.init_noise_sigma
564
+
565
+ # Apply motion and scale dynamics
566
+ for frame in range(num_frames):
567
+ x_offset = frame * x_velocity
568
+ y_offset = frame * y_velocity
569
+ scale_factor = 1 + frame * scale_velocity
570
+
571
+ # Apply offsets
572
+ latents[:, :, frame] = torch.roll(latents[:, :, frame], shifts=x_offset, dims=3) # x direction
573
+ latents[:, :, frame] = torch.roll(latents[:, :, frame], shifts=y_offset, dims=2) # y direction
574
+
575
+ # Apply scaling - This is a simple approach and might not be ideal for all applications
576
+ if scale_factor != 1:
577
+ scaled_size = (
578
+ int(latents.shape[3] * scale_factor),
579
+ int(latents.shape[4] * scale_factor)
580
+ )
581
+ latents[:, :, frame] = torch.nn.functional.interpolate(
582
+ latents[:, :, frame].unsqueeze(0), size=scaled_size, mode='bilinear', align_corners=False
583
+ ).squeeze(0)
584
+
585
+ return latents
586
+
587
  @torch.no_grad()
588
  # @replace_example_docstring(EXAMPLE_DOC_STRING)
589
  def __call__(
 
740
 
741
  # 5. Prepare latent variables
742
  num_channels_latents = self.unet.config.in_channels
743
+ # latents = self.prepare_latents(
744
+ # batch_size * num_videos_per_prompt,
745
+ # num_channels_latents,
746
+ # num_frames,
747
+ # height,
748
+ # width,
749
+ # prompt_embeds.dtype,
750
+ # device,
751
+ # generator,
752
+ # latents,
753
+ # )
754
+ latents = self.prepare_motion_latents(
755
  batch_size * num_videos_per_prompt,
756
  num_channels_latents,
757
  num_frames,
 
761
  device,
762
  generator,
763
  latents,
764
+ x_velocity=0.1,
765
+ y_velocity=0.1,
766
  )
767
 
768
  # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline