Spaces:
Runtime error
Runtime error
Update txt2panoimg/pipeline_sr.py
Browse files- txt2panoimg/pipeline_sr.py +1 -18
txt2panoimg/pipeline_sr.py
CHANGED
@@ -1,8 +1,3 @@
|
|
1 |
-
# Copyright Β© Alibaba, Inc. and its affiliates.
|
2 |
-
# The implementation here is modifed based on diffusers.StableDiffusionControlNetImg2ImgPipeline,
|
3 |
-
# originally Apache 2.0 License and public available at
|
4 |
-
# https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py
|
5 |
-
|
6 |
import copy
|
7 |
import re
|
8 |
from typing import Any, Callable, Dict, List, Optional, Union
|
@@ -53,7 +48,6 @@ EXAMPLE_DOC_STRING = """
|
|
53 |
... width=1536,
|
54 |
... control_image=image,
|
55 |
... ).images[0]
|
56 |
-
|
57 |
```
|
58 |
"""
|
59 |
|
@@ -141,7 +135,6 @@ def get_prompts_with_weights(pipe: DiffusionPipeline, prompt: List[str],
|
|
141 |
max_length: int):
|
142 |
r"""
|
143 |
Tokenize a list of prompts and return its tokens with weights of each token.
|
144 |
-
|
145 |
No padding, starting or ending token is included.
|
146 |
"""
|
147 |
tokens = []
|
@@ -265,9 +258,7 @@ def get_weighted_text_embeddings(
|
|
265 |
Prompts can be assigned with local weights using brackets. For example,
|
266 |
prompt 'A (very beautiful) masterpiece' highlights the words 'very beautiful',
|
267 |
and the embedding tokens corresponding to the words get multiplied by a constant, 1.1.
|
268 |
-
|
269 |
Also, to regularize of the embedding, the weighted embedding would be scaled to preserve the original mean.
|
270 |
-
|
271 |
Args:
|
272 |
pipe (`DiffusionPipeline`):
|
273 |
Pipe to provide access to the tokenizer and the text encoder.
|
@@ -434,13 +425,10 @@ class StableDiffusionControlNetImg2ImgPanoPipeline(
|
|
434 |
StableDiffusionControlNetImg2ImgPipeline):
|
435 |
r"""
|
436 |
Pipeline for text-to-image generation using Stable Diffusion with ControlNet guidance.
|
437 |
-
|
438 |
This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
|
439 |
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
|
440 |
-
|
441 |
In addition the pipeline inherits the following loading methods:
|
442 |
- *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`]
|
443 |
-
|
444 |
Args:
|
445 |
vae ([`AutoencoderKL`]):
|
446 |
Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
|
@@ -610,7 +598,6 @@ class StableDiffusionControlNetImg2ImgPanoPipeline(
|
|
610 |
):
|
611 |
r"""
|
612 |
Encodes the prompt into text encoder hidden states.
|
613 |
-
|
614 |
Args:
|
615 |
prompt (`str` or `list(int)`):
|
616 |
prompt to be encoded
|
@@ -813,7 +800,6 @@ class StableDiffusionControlNetImg2ImgPanoPipeline(
|
|
813 |
):
|
814 |
r"""
|
815 |
Function invoked when calling the pipeline for generation.
|
816 |
-
|
817 |
Args:
|
818 |
prompt (`str` or `List[str]`, *optional*):
|
819 |
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
|
@@ -889,9 +875,7 @@ class StableDiffusionControlNetImg2ImgPanoPipeline(
|
|
889 |
you remove all prompts. The `guidance_scale` between 3.0 and 5.0 is recommended.
|
890 |
context_size ('int', *optional*, defaults to '768'):
|
891 |
tiled size when denoise the latents.
|
892 |
-
|
893 |
Examples:
|
894 |
-
|
895 |
Returns:
|
896 |
[`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
|
897 |
[`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple.
|
@@ -906,7 +890,6 @@ class StableDiffusionControlNetImg2ImgPanoPipeline(
|
|
906 |
return_dict: bool = True
|
907 |
) -> Union[DecoderOutput, torch.FloatTensor]:
|
908 |
r"""Decode a batch of images using a tiled decoder.
|
909 |
-
|
910 |
Args:
|
911 |
When this option is enabled, the VAE will split the input tensor into tiles to compute decoding in several
|
912 |
steps. This is useful to keep memory use constant regardless of image size. The end result of tiled
|
@@ -1199,4 +1182,4 @@ class StableDiffusionControlNetImg2ImgPanoPipeline(
|
|
1199 |
return (image, has_nsfw_concept)
|
1200 |
|
1201 |
return StableDiffusionPipelineOutput(
|
1202 |
-
images=image, nsfw_content_detected=has_nsfw_concept)
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import copy
|
2 |
import re
|
3 |
from typing import Any, Callable, Dict, List, Optional, Union
|
|
|
48 |
... width=1536,
|
49 |
... control_image=image,
|
50 |
... ).images[0]
|
|
|
51 |
```
|
52 |
"""
|
53 |
|
|
|
135 |
max_length: int):
|
136 |
r"""
|
137 |
Tokenize a list of prompts and return its tokens with weights of each token.
|
|
|
138 |
No padding, starting or ending token is included.
|
139 |
"""
|
140 |
tokens = []
|
|
|
258 |
Prompts can be assigned with local weights using brackets. For example,
|
259 |
prompt 'A (very beautiful) masterpiece' highlights the words 'very beautiful',
|
260 |
and the embedding tokens corresponding to the words get multiplied by a constant, 1.1.
|
|
|
261 |
Also, to regularize of the embedding, the weighted embedding would be scaled to preserve the original mean.
|
|
|
262 |
Args:
|
263 |
pipe (`DiffusionPipeline`):
|
264 |
Pipe to provide access to the tokenizer and the text encoder.
|
|
|
425 |
StableDiffusionControlNetImg2ImgPipeline):
|
426 |
r"""
|
427 |
Pipeline for text-to-image generation using Stable Diffusion with ControlNet guidance.
|
|
|
428 |
This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
|
429 |
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
|
|
|
430 |
In addition the pipeline inherits the following loading methods:
|
431 |
- *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`]
|
|
|
432 |
Args:
|
433 |
vae ([`AutoencoderKL`]):
|
434 |
Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
|
|
|
598 |
):
|
599 |
r"""
|
600 |
Encodes the prompt into text encoder hidden states.
|
|
|
601 |
Args:
|
602 |
prompt (`str` or `list(int)`):
|
603 |
prompt to be encoded
|
|
|
800 |
):
|
801 |
r"""
|
802 |
Function invoked when calling the pipeline for generation.
|
|
|
803 |
Args:
|
804 |
prompt (`str` or `List[str]`, *optional*):
|
805 |
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
|
|
|
875 |
you remove all prompts. The `guidance_scale` between 3.0 and 5.0 is recommended.
|
876 |
context_size ('int', *optional*, defaults to '768'):
|
877 |
tiled size when denoise the latents.
|
|
|
878 |
Examples:
|
|
|
879 |
Returns:
|
880 |
[`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
|
881 |
[`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple.
|
|
|
890 |
return_dict: bool = True
|
891 |
) -> Union[DecoderOutput, torch.FloatTensor]:
|
892 |
r"""Decode a batch of images using a tiled decoder.
|
|
|
893 |
Args:
|
894 |
When this option is enabled, the VAE will split the input tensor into tiles to compute decoding in several
|
895 |
steps. This is useful to keep memory use constant regardless of image size. The end result of tiled
|
|
|
1182 |
return (image, has_nsfw_concept)
|
1183 |
|
1184 |
return StableDiffusionPipelineOutput(
|
1185 |
+
images=image, nsfw_content_detected=has_nsfw_concept)
|