Spaces:
Runtime error
Runtime error
Update txt2panoimg/pipeline_base.py
Browse files- txt2panoimg/pipeline_base.py +1 -18
txt2panoimg/pipeline_base.py
CHANGED
@@ -1,8 +1,3 @@
|
|
1 |
-
# Copyright Β© Alibaba, Inc. and its affiliates.
|
2 |
-
# The implementation here is modifed based on diffusers.StableDiffusionPipeline,
|
3 |
-
# originally Apache 2.0 License and public available at
|
4 |
-
# https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
|
5 |
-
|
6 |
import re
|
7 |
from typing import Any, Callable, Dict, List, Optional, Union
|
8 |
|
@@ -124,7 +119,6 @@ def get_prompts_with_weights(pipe: DiffusionPipeline, prompt: List[str],
|
|
124 |
max_length: int):
|
125 |
r"""
|
126 |
Tokenize a list of prompts and return its tokens with weights of each token.
|
127 |
-
|
128 |
No padding, starting or ending token is included.
|
129 |
"""
|
130 |
tokens = []
|
@@ -248,9 +242,7 @@ def get_weighted_text_embeddings(
|
|
248 |
Prompts can be assigned with local weights using brackets. For example,
|
249 |
prompt 'A (very beautiful) masterpiece' highlights the words 'very beautiful',
|
250 |
and the embedding tokens corresponding to the words get multiplied by a constant, 1.1.
|
251 |
-
|
252 |
Also, to regularize of the embedding, the weighted embedding would be scaled to preserve the original mean.
|
253 |
-
|
254 |
Args:
|
255 |
pipe (`DiffusionPipeline`):
|
256 |
Pipe to provide access to the tokenizer and the text encoder.
|
@@ -408,18 +400,14 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
408 |
class StableDiffusionBlendExtendPipeline(StableDiffusionPipeline):
|
409 |
r"""
|
410 |
Pipeline for text-to-image generation using Stable Diffusion.
|
411 |
-
|
412 |
This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
|
413 |
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
|
414 |
-
|
415 |
In addition the pipeline inherits the following loading methods:
|
416 |
- *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`]
|
417 |
- *LoRA*: [`loaders.LoraLoaderMixin.load_lora_weights`]
|
418 |
- *Ckpt*: [`loaders.FromCkptMixin.from_ckpt`]
|
419 |
-
|
420 |
as well as the following saving methods:
|
421 |
- *LoRA*: [`loaders.LoraLoaderMixin.save_lora_weights`]
|
422 |
-
|
423 |
Args:
|
424 |
vae ([`AutoencoderKL`]):
|
425 |
Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
|
@@ -457,7 +445,6 @@ class StableDiffusionBlendExtendPipeline(StableDiffusionPipeline):
|
|
457 |
):
|
458 |
r"""
|
459 |
Encodes the prompt into text encoder hidden states.
|
460 |
-
|
461 |
Args:
|
462 |
prompt (`str` or `list(int)`):
|
463 |
prompt to be encoded
|
@@ -572,7 +559,6 @@ class StableDiffusionBlendExtendPipeline(StableDiffusionPipeline):
|
|
572 |
):
|
573 |
r"""
|
574 |
Function invoked when calling the pipeline for generation.
|
575 |
-
|
576 |
Args:
|
577 |
prompt (`str` or `List[str]`, *optional*):
|
578 |
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
|
@@ -634,9 +620,7 @@ class StableDiffusionBlendExtendPipeline(StableDiffusionPipeline):
|
|
634 |
Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `Ο` in equation 16. of
|
635 |
[Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
|
636 |
Guidance rescale factor should fix overexposure when using zero terminal SNR.
|
637 |
-
|
638 |
Examples:
|
639 |
-
|
640 |
Returns:
|
641 |
[`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
|
642 |
[`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple.
|
@@ -651,7 +635,6 @@ class StableDiffusionBlendExtendPipeline(StableDiffusionPipeline):
|
|
651 |
return_dict: bool = True
|
652 |
) -> Union[DecoderOutput, torch.FloatTensor]:
|
653 |
r"""Decode a batch of images using a tiled decoder.
|
654 |
-
|
655 |
Args:
|
656 |
When this option is enabled, the VAE will split the input tensor into tiles to compute decoding in several
|
657 |
steps. This is useful to keep memory use constant regardless of image size.
|
@@ -846,4 +829,4 @@ class StableDiffusionBlendExtendPipeline(StableDiffusionPipeline):
|
|
846 |
return (image, has_nsfw_concept)
|
847 |
|
848 |
return StableDiffusionPipelineOutput(
|
849 |
-
images=image, nsfw_content_detected=has_nsfw_concept)
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import re
|
2 |
from typing import Any, Callable, Dict, List, Optional, Union
|
3 |
|
|
|
119 |
max_length: int):
|
120 |
r"""
|
121 |
Tokenize a list of prompts and return its tokens with weights of each token.
|
|
|
122 |
No padding, starting or ending token is included.
|
123 |
"""
|
124 |
tokens = []
|
|
|
242 |
Prompts can be assigned with local weights using brackets. For example,
|
243 |
prompt 'A (very beautiful) masterpiece' highlights the words 'very beautiful',
|
244 |
and the embedding tokens corresponding to the words get multiplied by a constant, 1.1.
|
|
|
245 |
Also, to regularize of the embedding, the weighted embedding would be scaled to preserve the original mean.
|
|
|
246 |
Args:
|
247 |
pipe (`DiffusionPipeline`):
|
248 |
Pipe to provide access to the tokenizer and the text encoder.
|
|
|
400 |
class StableDiffusionBlendExtendPipeline(StableDiffusionPipeline):
|
401 |
r"""
|
402 |
Pipeline for text-to-image generation using Stable Diffusion.
|
|
|
403 |
This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
|
404 |
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
|
|
|
405 |
In addition the pipeline inherits the following loading methods:
|
406 |
- *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`]
|
407 |
- *LoRA*: [`loaders.LoraLoaderMixin.load_lora_weights`]
|
408 |
- *Ckpt*: [`loaders.FromCkptMixin.from_ckpt`]
|
|
|
409 |
as well as the following saving methods:
|
410 |
- *LoRA*: [`loaders.LoraLoaderMixin.save_lora_weights`]
|
|
|
411 |
Args:
|
412 |
vae ([`AutoencoderKL`]):
|
413 |
Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
|
|
|
445 |
):
|
446 |
r"""
|
447 |
Encodes the prompt into text encoder hidden states.
|
|
|
448 |
Args:
|
449 |
prompt (`str` or `list(int)`):
|
450 |
prompt to be encoded
|
|
|
559 |
):
|
560 |
r"""
|
561 |
Function invoked when calling the pipeline for generation.
|
|
|
562 |
Args:
|
563 |
prompt (`str` or `List[str]`, *optional*):
|
564 |
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
|
|
|
620 |
Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `Ο` in equation 16. of
|
621 |
[Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
|
622 |
Guidance rescale factor should fix overexposure when using zero terminal SNR.
|
|
|
623 |
Examples:
|
|
|
624 |
Returns:
|
625 |
[`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
|
626 |
[`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple.
|
|
|
635 |
return_dict: bool = True
|
636 |
) -> Union[DecoderOutput, torch.FloatTensor]:
|
637 |
r"""Decode a batch of images using a tiled decoder.
|
|
|
638 |
Args:
|
639 |
When this option is enabled, the VAE will split the input tensor into tiles to compute decoding in several
|
640 |
steps. This is useful to keep memory use constant regardless of image size.
|
|
|
829 |
return (image, has_nsfw_concept)
|
830 |
|
831 |
return StableDiffusionPipelineOutput(
|
832 |
+
images=image, nsfw_content_detected=has_nsfw_concept)
|