smoothieAI commited on
Commit
81489e7
·
1 Parent(s): 54b1758

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +60 -11
pipeline.py CHANGED
@@ -1,3 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Copyright 2023 The HuggingFace Team. All rights reserved.
2
  #
3
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -58,7 +99,6 @@ EXAMPLE_DOC_STRING = """
58
  >>> import torch
59
  >>> from diffusers import MotionAdapter, AnimateDiffPipeline, DDIMScheduler
60
  >>> from diffusers.utils import export_to_gif
61
-
62
  >>> adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-2")
63
  >>> pipe = AnimateDiffPipeline.from_pretrained("frankjoshua/toonyou_beta6", motion_adapter=adapter)
64
  >>> pipe.scheduler = DDIMScheduler(beta_schedule="linear", steps_offset=1, clip_sample=False)
@@ -92,16 +132,13 @@ class AnimateDiffPipelineOutput(BaseOutput):
92
  class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin):
93
  r"""
94
  Pipeline for text-to-video generation.
95
-
96
  This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
97
  implemented for all pipelines (downloading, saving, running on a particular device, etc.).
98
-
99
  The pipeline also inherits the following loading methods:
100
  - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
101
  - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
102
  - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
103
  - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
104
-
105
  Args:
106
  vae ([`AutoencoderKL`]):
107
  Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
@@ -170,7 +207,6 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
170
  ):
171
  r"""
172
  Encodes the prompt into text encoder hidden states.
173
-
174
  Args:
175
  prompt (`str` or `List[str]`, *optional*):
176
  prompt to be encoded
@@ -422,12 +458,9 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
422
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
423
  def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
424
  r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
425
-
426
  The suffixes after the scaling factors represent the stages where they are being applied.
427
-
428
  Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
429
  that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
430
-
431
  Args:
432
  s1 (`float`):
433
  Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
@@ -566,6 +599,7 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
566
  negative_prompt_embeds: Optional[torch.FloatTensor] = None,
567
  ip_adapter_image: Optional[PipelineImageInput] = None,
568
  output_type: Optional[str] = "pil",
 
569
  return_dict: bool = True,
570
  callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
571
  callback_steps: Optional[int] = 1,
@@ -574,7 +608,6 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
574
  ):
575
  r"""
576
  The call function to the pipeline for generation.
577
-
578
  Args:
579
  prompt (`str` or `List[str]`, *optional*):
580
  The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
@@ -631,7 +664,6 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
631
  Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
632
  the output of the pre-final layer will be used for computing the prompt embeddings.
633
  Examples:
634
-
635
  Returns:
636
  [`~pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput`] or `tuple`:
637
  If `return_dict` is `True`, [`~pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput`] is
@@ -780,6 +812,23 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
780
  if output_type == "latent":
781
  return AnimateDiffPipelineOutput(frames=latents)
782
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
783
  # Post-processing
784
  video_tensor = self.decode_latents(latents)
785
 
@@ -794,4 +843,4 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
794
  if not return_dict:
795
  return (video,)
796
 
797
- return AnimateDiffPipelineOutput(frames=video)
 
1
+ Hugging Face's logo
2
+ Hugging Face
3
+ Search models, datasets, users...
4
+ Models
5
+ Datasets
6
+ Spaces
7
+ Docs
8
+ Solutions
9
+ Pricing
10
+
11
+
12
+
13
+
14
+ smoothieAI
15
+ /
16
+ pipeline_animatediff_context
17
+
18
+ like
19
+ 0
20
+
21
+ License:
22
+ apache-2.0
23
+ Model card
24
+ Files and versions
25
+ Community
26
+ Settings
27
+ pipeline_animatediff_context
28
+ /
29
+ pipeline.py
30
+ smoothieAI's picture
31
+ smoothieAI
32
+ Update pipeline.py
33
+ 54b1758
34
+ about 19 hours ago
35
+ raw
36
+ history
37
+ blame
38
+ edit
39
+ delete
40
+ No virus
41
+ 40.1 kB
42
  # Copyright 2023 The HuggingFace Team. All rights reserved.
43
  #
44
  # Licensed under the Apache License, Version 2.0 (the "License");
 
99
  >>> import torch
100
  >>> from diffusers import MotionAdapter, AnimateDiffPipeline, DDIMScheduler
101
  >>> from diffusers.utils import export_to_gif
 
102
  >>> adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-2")
103
  >>> pipe = AnimateDiffPipeline.from_pretrained("frankjoshua/toonyou_beta6", motion_adapter=adapter)
104
  >>> pipe.scheduler = DDIMScheduler(beta_schedule="linear", steps_offset=1, clip_sample=False)
 
132
  class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin):
133
  r"""
134
  Pipeline for text-to-video generation.
 
135
  This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
136
  implemented for all pipelines (downloading, saving, running on a particular device, etc.).
 
137
  The pipeline also inherits the following loading methods:
138
  - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
139
  - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
140
  - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
141
  - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
 
142
  Args:
143
  vae ([`AutoencoderKL`]):
144
  Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
 
207
  ):
208
  r"""
209
  Encodes the prompt into text encoder hidden states.
 
210
  Args:
211
  prompt (`str` or `List[str]`, *optional*):
212
  prompt to be encoded
 
458
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
459
  def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
460
  r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
 
461
  The suffixes after the scaling factors represent the stages where they are being applied.
 
462
  Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
463
  that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
 
464
  Args:
465
  s1 (`float`):
466
  Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
 
599
  negative_prompt_embeds: Optional[torch.FloatTensor] = None,
600
  ip_adapter_image: Optional[PipelineImageInput] = None,
601
  output_type: Optional[str] = "pil",
602
+ output_path: Optional[str] = None,
603
  return_dict: bool = True,
604
  callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
605
  callback_steps: Optional[int] = 1,
 
608
  ):
609
  r"""
610
  The call function to the pipeline for generation.
 
611
  Args:
612
  prompt (`str` or `List[str]`, *optional*):
613
  The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
 
664
  Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
665
  the output of the pre-final layer will be used for computing the prompt embeddings.
666
  Examples:
 
667
  Returns:
668
  [`~pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput`] or `tuple`:
669
  If `return_dict` is `True`, [`~pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput`] is
 
812
  if output_type == "latent":
813
  return AnimateDiffPipelineOutput(frames=latents)
814
 
815
+ # save frames
816
+ if output_path is not None:
817
+ output_batch_size = 10 #this prevents out of memory errors with large videos
818
+ num_frames = latents.size(2) #latents' shape is [batch, channels, frames, height, width]
819
+ for start_idx in range(0, num_frames, output_batch_size):
820
+ end_idx = min(start_idx + output_batch_size, num_frames)
821
+ video_tensor = self.decode_latents(latents[:, :, start_idx:end_idx, :, :])
822
+ video = tensor2vid(video_tensor, self.image_processor, output_type=output_type)
823
+
824
+ for batch_idx, frame_batch in enumerate(video):
825
+ for frame in frame_batch[0][0]:
826
+ digit_substring = ''.join(filter(str.isdigit, output_path))
827
+ frame_number = int(digit_substring) + start_idx + batch_idx
828
+ new_output_path = output_path.replace(digit_substring, str(frame_number).zfill(5), 1)
829
+ frame.save(new_output_path)
830
+ return output_path
831
+
832
  # Post-processing
833
  video_tensor = self.decode_latents(latents)
834
 
 
843
  if not return_dict:
844
  return (video,)
845
 
846
+ return AnimateDiffPipelineOutput(frames=video)