AideepImage commited on
Commit
99df5c4
Β·
verified Β·
1 Parent(s): 8521ef5

Update txt2panoimg/pipeline_base.py

Browse files
Files changed (1) hide show
  1. txt2panoimg/pipeline_base.py +1 -18
txt2panoimg/pipeline_base.py CHANGED
@@ -1,8 +1,3 @@
1
- # Copyright Β© Alibaba, Inc. and its affiliates.
2
- # The implementation here is modifed based on diffusers.StableDiffusionPipeline,
3
- # originally Apache 2.0 License and public available at
4
- # https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
5
-
6
  import re
7
  from typing import Any, Callable, Dict, List, Optional, Union
8
 
@@ -124,7 +119,6 @@ def get_prompts_with_weights(pipe: DiffusionPipeline, prompt: List[str],
124
  max_length: int):
125
  r"""
126
  Tokenize a list of prompts and return its tokens with weights of each token.
127
-
128
  No padding, starting or ending token is included.
129
  """
130
  tokens = []
@@ -248,9 +242,7 @@ def get_weighted_text_embeddings(
248
  Prompts can be assigned with local weights using brackets. For example,
249
  prompt 'A (very beautiful) masterpiece' highlights the words 'very beautiful',
250
  and the embedding tokens corresponding to the words get multiplied by a constant, 1.1.
251
-
252
  Also, to regularize of the embedding, the weighted embedding would be scaled to preserve the original mean.
253
-
254
  Args:
255
  pipe (`DiffusionPipeline`):
256
  Pipe to provide access to the tokenizer and the text encoder.
@@ -408,18 +400,14 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
408
  class StableDiffusionBlendExtendPipeline(StableDiffusionPipeline):
409
  r"""
410
  Pipeline for text-to-image generation using Stable Diffusion.
411
-
412
  This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
413
  library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
414
-
415
  In addition the pipeline inherits the following loading methods:
416
  - *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`]
417
  - *LoRA*: [`loaders.LoraLoaderMixin.load_lora_weights`]
418
  - *Ckpt*: [`loaders.FromCkptMixin.from_ckpt`]
419
-
420
  as well as the following saving methods:
421
  - *LoRA*: [`loaders.LoraLoaderMixin.save_lora_weights`]
422
-
423
  Args:
424
  vae ([`AutoencoderKL`]):
425
  Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
@@ -457,7 +445,6 @@ class StableDiffusionBlendExtendPipeline(StableDiffusionPipeline):
457
  ):
458
  r"""
459
  Encodes the prompt into text encoder hidden states.
460
-
461
  Args:
462
  prompt (`str` or `list(int)`):
463
  prompt to be encoded
@@ -572,7 +559,6 @@ class StableDiffusionBlendExtendPipeline(StableDiffusionPipeline):
572
  ):
573
  r"""
574
  Function invoked when calling the pipeline for generation.
575
-
576
  Args:
577
  prompt (`str` or `List[str]`, *optional*):
578
  The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
@@ -634,9 +620,7 @@ class StableDiffusionBlendExtendPipeline(StableDiffusionPipeline):
634
  Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `Ο†` in equation 16. of
635
  [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
636
  Guidance rescale factor should fix overexposure when using zero terminal SNR.
637
-
638
  Examples:
639
-
640
  Returns:
641
  [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
642
  [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple.
@@ -651,7 +635,6 @@ class StableDiffusionBlendExtendPipeline(StableDiffusionPipeline):
651
  return_dict: bool = True
652
  ) -> Union[DecoderOutput, torch.FloatTensor]:
653
  r"""Decode a batch of images using a tiled decoder.
654
-
655
  Args:
656
  When this option is enabled, the VAE will split the input tensor into tiles to compute decoding in several
657
  steps. This is useful to keep memory use constant regardless of image size.
@@ -846,4 +829,4 @@ class StableDiffusionBlendExtendPipeline(StableDiffusionPipeline):
846
  return (image, has_nsfw_concept)
847
 
848
  return StableDiffusionPipelineOutput(
849
- images=image, nsfw_content_detected=has_nsfw_concept)
 
 
 
 
 
 
1
  import re
2
  from typing import Any, Callable, Dict, List, Optional, Union
3
 
 
119
  max_length: int):
120
  r"""
121
  Tokenize a list of prompts and return its tokens with weights of each token.
 
122
  No padding, starting or ending token is included.
123
  """
124
  tokens = []
 
242
  Prompts can be assigned with local weights using brackets. For example,
243
  prompt 'A (very beautiful) masterpiece' highlights the words 'very beautiful',
244
  and the embedding tokens corresponding to the words get multiplied by a constant, 1.1.
 
245
  Also, to regularize of the embedding, the weighted embedding would be scaled to preserve the original mean.
 
246
  Args:
247
  pipe (`DiffusionPipeline`):
248
  Pipe to provide access to the tokenizer and the text encoder.
 
400
  class StableDiffusionBlendExtendPipeline(StableDiffusionPipeline):
401
  r"""
402
  Pipeline for text-to-image generation using Stable Diffusion.
 
403
  This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
404
  library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
 
405
  In addition the pipeline inherits the following loading methods:
406
  - *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`]
407
  - *LoRA*: [`loaders.LoraLoaderMixin.load_lora_weights`]
408
  - *Ckpt*: [`loaders.FromCkptMixin.from_ckpt`]
 
409
  as well as the following saving methods:
410
  - *LoRA*: [`loaders.LoraLoaderMixin.save_lora_weights`]
 
411
  Args:
412
  vae ([`AutoencoderKL`]):
413
  Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
 
445
  ):
446
  r"""
447
  Encodes the prompt into text encoder hidden states.
 
448
  Args:
449
  prompt (`str` or `list(int)`):
450
  prompt to be encoded
 
559
  ):
560
  r"""
561
  Function invoked when calling the pipeline for generation.
 
562
  Args:
563
  prompt (`str` or `List[str]`, *optional*):
564
  The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
 
620
  Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `Ο†` in equation 16. of
621
  [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
622
  Guidance rescale factor should fix overexposure when using zero terminal SNR.
 
623
  Examples:
 
624
  Returns:
625
  [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
626
  [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple.
 
635
  return_dict: bool = True
636
  ) -> Union[DecoderOutput, torch.FloatTensor]:
637
  r"""Decode a batch of images using a tiled decoder.
 
638
  Args:
639
  When this option is enabled, the VAE will split the input tensor into tiles to compute decoding in several
640
  steps. This is useful to keep memory use constant regardless of image size.
 
829
  return (image, has_nsfw_concept)
830
 
831
  return StableDiffusionPipelineOutput(
832
+ images=image, nsfw_content_detected=has_nsfw_concept)