Spaces:

AideepImage
/

360_Virtual_Image_prediction

Runtime error

App Files Files Community

AideepImage commited on Aug 3, 2024

Commit

99df5c4

verified ·

1 Parent(s): 8521ef5

Update txt2panoimg/pipeline_base.py

Browse files

Files changed (1) hide show

txt2panoimg/pipeline_base.py +1 -18

txt2panoimg/pipeline_base.py CHANGED Viewed

@@ -1,8 +1,3 @@
-# Copyright © Alibaba, Inc. and its affiliates.
-# The implementation here is modifed based on diffusers.StableDiffusionPipeline,
-# originally Apache 2.0 License and public available at
-# https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
 import re
 from typing import Any, Callable, Dict, List, Optional, Union
@@ -124,7 +119,6 @@ def get_prompts_with_weights(pipe: DiffusionPipeline, prompt: List[str],
                              max_length: int):
     r"""
     Tokenize a list of prompts and return its tokens with weights of each token.
     No padding, starting or ending token is included.
     """
     tokens = []
@@ -248,9 +242,7 @@ def get_weighted_text_embeddings(
     Prompts can be assigned with local weights using brackets. For example,
     prompt 'A (very beautiful) masterpiece' highlights the words 'very beautiful',
     and the embedding tokens corresponding to the words get multiplied by a constant, 1.1.
     Also, to regularize of the embedding, the weighted embedding would be scaled to preserve the original mean.
     Args:
         pipe (`DiffusionPipeline`):
             Pipe to provide access to the tokenizer and the text encoder.
@@ -408,18 +400,14 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
 class StableDiffusionBlendExtendPipeline(StableDiffusionPipeline):
     r"""
     Pipeline for text-to-image generation using Stable Diffusion.
     This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
     library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
     In addition the pipeline inherits the following loading methods:
         - *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`]
         - *LoRA*: [`loaders.LoraLoaderMixin.load_lora_weights`]
         - *Ckpt*: [`loaders.FromCkptMixin.from_ckpt`]
     as well as the following saving methods:
         - *LoRA*: [`loaders.LoraLoaderMixin.save_lora_weights`]
     Args:
         vae ([`AutoencoderKL`]):
             Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
@@ -457,7 +445,6 @@ class StableDiffusionBlendExtendPipeline(StableDiffusionPipeline):
     ):
         r"""
         Encodes the prompt into text encoder hidden states.
         Args:
             prompt (`str` or `list(int)`):
                 prompt to be encoded
@@ -572,7 +559,6 @@ class StableDiffusionBlendExtendPipeline(StableDiffusionPipeline):
     ):
         r"""
         Function invoked when calling the pipeline for generation.
         Args:
             prompt (`str` or `List[str]`, *optional*):
                 The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
@@ -634,9 +620,7 @@ class StableDiffusionBlendExtendPipeline(StableDiffusionPipeline):
                 Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of
                 [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
                 Guidance rescale factor should fix overexposure when using zero terminal SNR.
         Examples:
         Returns:
             [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
             [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple.
@@ -651,7 +635,6 @@ class StableDiffusionBlendExtendPipeline(StableDiffusionPipeline):
             return_dict: bool = True
         ) -> Union[DecoderOutput, torch.FloatTensor]:
             r"""Decode a batch of images using a tiled decoder.
             Args:
             When this option is enabled, the VAE will split the input tensor into tiles to compute decoding in several
             steps. This is useful to keep memory use constant regardless of image size.
@@ -846,4 +829,4 @@ class StableDiffusionBlendExtendPipeline(StableDiffusionPipeline):
             return (image, has_nsfw_concept)
         return StableDiffusionPipelineOutput(
-            images=image, nsfw_content_detected=has_nsfw_concept)

 import re
 from typing import Any, Callable, Dict, List, Optional, Union
                              max_length: int):
     r"""
     Tokenize a list of prompts and return its tokens with weights of each token.
     No padding, starting or ending token is included.
     """
     tokens = []
     Prompts can be assigned with local weights using brackets. For example,
     prompt 'A (very beautiful) masterpiece' highlights the words 'very beautiful',
     and the embedding tokens corresponding to the words get multiplied by a constant, 1.1.
     Also, to regularize of the embedding, the weighted embedding would be scaled to preserve the original mean.
     Args:
         pipe (`DiffusionPipeline`):
             Pipe to provide access to the tokenizer and the text encoder.
 class StableDiffusionBlendExtendPipeline(StableDiffusionPipeline):
     r"""
     Pipeline for text-to-image generation using Stable Diffusion.
     This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
     library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
     In addition the pipeline inherits the following loading methods:
         - *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`]
         - *LoRA*: [`loaders.LoraLoaderMixin.load_lora_weights`]
         - *Ckpt*: [`loaders.FromCkptMixin.from_ckpt`]
     as well as the following saving methods:
         - *LoRA*: [`loaders.LoraLoaderMixin.save_lora_weights`]
     Args:
         vae ([`AutoencoderKL`]):
             Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
     ):
         r"""
         Encodes the prompt into text encoder hidden states.
         Args:
             prompt (`str` or `list(int)`):
                 prompt to be encoded
     ):
         r"""
         Function invoked when calling the pipeline for generation.
         Args:
             prompt (`str` or `List[str]`, *optional*):
                 The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
                 Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of
                 [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
                 Guidance rescale factor should fix overexposure when using zero terminal SNR.
         Examples:
         Returns:
             [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
             [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple.
             return_dict: bool = True
         ) -> Union[DecoderOutput, torch.FloatTensor]:
             r"""Decode a batch of images using a tiled decoder.
             Args:
             When this option is enabled, the VAE will split the input tensor into tiles to compute decoding in several
             steps. This is useful to keep memory use constant regardless of image size.
             return (image, has_nsfw_concept)
         return StableDiffusionPipelineOutput(
+            images=image, nsfw_content_detected=has_nsfw_concept)