Spaces:
Build error
Build error
# coding=utf-8 | |
import importlib | |
import inspect | |
import os | |
from dataclasses import dataclass | |
from typing import Any, Dict, List, Optional, Union | |
from collections import OrderedDict | |
import numpy as np | |
import torch | |
from torch import nn | |
import functools | |
import diffusers | |
import PIL | |
from accelerate.utils.versions import is_torch_version | |
from huggingface_hub import snapshot_download | |
from packaging import version | |
from PIL import Image | |
from tqdm.auto import tqdm | |
from diffusers.configuration_utils import ConfigMixin, register_to_config | |
from diffusers.dynamic_modules_utils import get_class_from_dynamic_module | |
from diffusers.modeling_utils import ModelMixin | |
from diffusers.hub_utils import http_user_agent | |
from diffusers.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT | |
from diffusers.schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME | |
from diffusers.utils import ( | |
CONFIG_NAME, | |
DIFFUSERS_CACHE, | |
ONNX_WEIGHTS_NAME, | |
WEIGHTS_NAME, | |
BaseOutput, | |
deprecate, | |
is_transformers_available, | |
logging, | |
) | |
if is_transformers_available(): | |
import transformers | |
from transformers import PreTrainedModel | |
INDEX_FILE = "diffusion_pytorch_model.bin" | |
CUSTOM_PIPELINE_FILE_NAME = "pipeline.py" | |
DUMMY_MODULES_FOLDER = "diffusers.utils" | |
logger = logging.get_logger(__name__) | |
LOADABLE_CLASSES = { | |
"diffusers": { | |
"ModelMixin": ["save_pretrained", "from_pretrained"], | |
"SchedulerMixin": ["save_config", "from_config"], | |
"DiffusionPipeline": ["save_pretrained", "from_pretrained"], | |
"OnnxRuntimeModel": ["save_pretrained", "from_pretrained"], | |
}, | |
"transformers": { | |
"PreTrainedTokenizer": ["save_pretrained", "from_pretrained"], | |
"PreTrainedTokenizerFast": ["save_pretrained", "from_pretrained"], | |
"PreTrainedModel": ["save_pretrained", "from_pretrained"], | |
"FeatureExtractionMixin": ["save_pretrained", "from_pretrained"], | |
}, | |
"LdmZhPipeline": { | |
"WukongClipTextEncoder": ["save_pretrained", "from_pretrained"], | |
"ESRGAN": ["save_pretrained", "from_pretrained"], | |
}, | |
} | |
ALL_IMPORTABLE_CLASSES = {} | |
for library in LOADABLE_CLASSES: | |
ALL_IMPORTABLE_CLASSES.update(LOADABLE_CLASSES[library]) | |
class ImagePipelineOutput(BaseOutput): | |
""" | |
Output class for image pipelines. | |
Args: | |
images (`List[PIL.Image.Image]` or `np.ndarray`) | |
List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width, | |
num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline. | |
""" | |
images: Union[List[PIL.Image.Image], np.ndarray] | |
class AudioPipelineOutput(BaseOutput): | |
""" | |
Output class for audio pipelines. | |
Args: | |
audios (`np.ndarray`) | |
List of denoised samples of shape `(batch_size, num_channels, sample_rate)`. Numpy array present the | |
denoised audio samples of the diffusion pipeline. | |
""" | |
audios: np.ndarray | |
class DiffusionPipeline(ConfigMixin): | |
r""" | |
Base class for all models. | |
[`DiffusionPipeline`] takes care of storing all components (models, schedulers, processors) for diffusion pipelines | |
and handles methods for loading, downloading and saving models as well as a few methods common to all pipelines to: | |
- move all PyTorch modules to the device of your choice | |
- enabling/disabling the progress bar for the denoising iteration | |
Class attributes: | |
- **config_name** ([`str`]) -- name of the config file that will store the class and module names of all | |
components of the diffusion pipeline. | |
""" | |
config_name = "model_index.json" | |
def register_modules(self, **kwargs): | |
# import it here to avoid circular import | |
from diffusers import pipelines | |
for name, module in kwargs.items(): | |
# retrieve library | |
if module is None: | |
register_dict = {name: (None, None)} | |
else: | |
library = module.__module__.split(".")[0] | |
# check if the module is a pipeline module | |
pipeline_dir = module.__module__.split(".")[-2] if len(module.__module__.split(".")) > 2 else None | |
path = module.__module__.split(".") | |
is_pipeline_module = pipeline_dir in path and hasattr(pipelines, pipeline_dir) | |
# if library is not in LOADABLE_CLASSES, then it is a custom module. | |
# Or if it's a pipeline module, then the module is inside the pipeline | |
# folder so we set the library to module name. | |
if library not in LOADABLE_CLASSES or is_pipeline_module: | |
library = pipeline_dir | |
# retrieve class_name | |
class_name = module.__class__.__name__ | |
register_dict = {name: (library, class_name)} | |
# save model index config | |
self.register_to_config(**register_dict) | |
# set models | |
setattr(self, name, module) | |
def save_pretrained(self, save_directory: Union[str, os.PathLike]): | |
""" | |
Save all variables of the pipeline that can be saved and loaded as well as the pipelines configuration file to | |
a directory. A pipeline variable can be saved and loaded if its class implements both a save and loading | |
method. The pipeline can easily be re-loaded using the `[`~DiffusionPipeline.from_pretrained`]` class method. | |
Arguments: | |
save_directory (`str` or `os.PathLike`): | |
Directory to which to save. Will be created if it doesn't exist. | |
""" | |
self.save_config(save_directory) | |
model_index_dict = dict(self.config) | |
model_index_dict.pop("_class_name") | |
model_index_dict.pop("_diffusers_version") | |
model_index_dict.pop("_module", None) | |
for pipeline_component_name in model_index_dict.keys(): | |
sub_model = getattr(self, pipeline_component_name) | |
if sub_model is None: | |
# edge case for saving a pipeline with safety_checker=None | |
continue | |
model_cls = sub_model.__class__ | |
save_method_name = None | |
# search for the model's base class in LOADABLE_CLASSES | |
for library_name, library_classes in LOADABLE_CLASSES.items(): | |
library = importlib.import_module(library_name) | |
for base_class, save_load_methods in library_classes.items(): | |
class_candidate = getattr(library, base_class) | |
if issubclass(model_cls, class_candidate): | |
# if we found a suitable base class in LOADABLE_CLASSES then grab its save method | |
save_method_name = save_load_methods[0] | |
break | |
if save_method_name is not None: | |
break | |
save_method = getattr(sub_model, save_method_name) | |
save_method(os.path.join(save_directory, pipeline_component_name)) | |
def to(self, torch_device: Optional[Union[str, torch.device]] = None): | |
if torch_device is None: | |
return self | |
module_names, _ = self.extract_init_dict(dict(self.config)) | |
for name in module_names.keys(): | |
module = getattr(self, name) | |
if isinstance(module, torch.nn.Module): | |
if module.dtype == torch.float16 and str(torch_device) in ["cpu", "mps"]: | |
logger.warning( | |
"Pipelines loaded with `torch_dtype=torch.float16` cannot run with `cpu` or `mps` device. It" | |
" is not recommended to move them to `cpu` or `mps` as running them will fail. Please make" | |
" sure to use a `cuda` device to run the pipeline in inference. due to the lack of support for" | |
" `float16` operations on those devices in PyTorch. Please remove the" | |
" `torch_dtype=torch.float16` argument, or use a `cuda` device to run inference." | |
) | |
module.to(torch_device) | |
return self | |
def device(self) -> torch.device: | |
r""" | |
Returns: | |
`torch.device`: The torch device on which the pipeline is located. | |
""" | |
module_names, _ = self.extract_init_dict(dict(self.config)) | |
for name in module_names.keys(): | |
module = getattr(self, name) | |
if isinstance(module, torch.nn.Module): | |
# if module.device == torch.device("meta"): | |
# return torch.device("cpu") | |
return module.device | |
return torch.device("cpu") | |
def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], **kwargs): | |
r""" | |
Instantiate a PyTorch diffusion pipeline from pre-trained pipeline weights. | |
The pipeline is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated). | |
The warning *Weights from XXX not initialized from pretrained model* means that the weights of XXX do not come | |
pretrained with the rest of the model. It is up to you to train those weights with a downstream fine-tuning | |
task. | |
The warning *Weights from XXX not used in YYY* means that the layer XXX is not used by YYY, therefore those | |
weights are discarded. | |
Parameters: | |
pretrained_model_name_or_path (`str` or `os.PathLike`, *optional*): | |
Can be either: | |
- A string, the *repo id* of a pretrained pipeline hosted inside a model repo on | |
https://huggingface.co/ Valid repo ids have to be located under a user or organization name, like | |
`CompVis/ldm-text2im-large-256`. | |
- A path to a *directory* containing pipeline weights saved using | |
[`~DiffusionPipeline.save_pretrained`], e.g., `./my_pipeline_directory/`. | |
torch_dtype (`str` or `torch.dtype`, *optional*): | |
Override the default `torch.dtype` and load the model under this dtype. If `"auto"` is passed the dtype | |
will be automatically derived from the model's weights. | |
custom_pipeline (`str`, *optional*): | |
<Tip warning={true}> | |
This is an experimental feature and is likely to change in the future. | |
</Tip> | |
Can be either: | |
- A string, the *repo id* of a custom pipeline hosted inside a model repo on | |
https://huggingface.co/. Valid repo ids have to be located under a user or organization name, | |
like `hf-internal-testing/diffusers-dummy-pipeline`. | |
<Tip> | |
It is required that the model repo has a file, called `pipeline.py` that defines the custom | |
pipeline. | |
</Tip> | |
- A string, the *file name* of a community pipeline hosted on GitHub under | |
https://github.com/huggingface/diffusers/tree/main/examples/community. Valid file names have to | |
match exactly the file name without `.py` located under the above link, *e.g.* | |
`clip_guided_stable_diffusion`. | |
<Tip> | |
Community pipelines are always loaded from the current `main` branch of GitHub. | |
</Tip> | |
- A path to a *directory* containing a custom pipeline, e.g., `./my_pipeline_directory/`. | |
<Tip> | |
It is required that the directory has a file, called `pipeline.py` that defines the custom | |
pipeline. | |
</Tip> | |
For more information on how to load and create custom pipelines, please have a look at [Loading and | |
Creating Custom | |
Pipelines](https://huggingface.co/docs/diffusers/main/en/using-diffusers/custom_pipelines) | |
torch_dtype (`str` or `torch.dtype`, *optional*): | |
force_download (`bool`, *optional*, defaults to `False`): | |
Whether or not to force the (re-)download of the model weights and configuration files, overriding the | |
cached versions if they exist. | |
resume_download (`bool`, *optional*, defaults to `False`): | |
Whether or not to delete incompletely received files. Will attempt to resume the download if such a | |
file exists. | |
proxies (`Dict[str, str]`, *optional*): | |
A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128', | |
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. | |
output_loading_info(`bool`, *optional*, defaults to `False`): | |
Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages. | |
local_files_only(`bool`, *optional*, defaults to `False`): | |
Whether or not to only look at local files (i.e., do not try to download the model). | |
use_auth_token (`str` or *bool*, *optional*): | |
The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated | |
when running `huggingface-cli login` (stored in `~/.huggingface`). | |
revision (`str`, *optional*, defaults to `"main"`): | |
The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a | |
git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any | |
identifier allowed by git. | |
mirror (`str`, *optional*): | |
Mirror source to accelerate downloads in China. If you are from China and have an accessibility | |
problem, you can set this option to resolve it. Note that we do not guarantee the timeliness or safety. | |
Please refer to the mirror site for more information. specify the folder name here. | |
device_map (`str` or `Dict[str, Union[int, str, torch.device]]`, *optional*): | |
A map that specifies where each submodule should go. It doesn't need to be refined to each | |
parameter/buffer name, once a given module name is inside, every submodule of it will be sent to the | |
same device. | |
To have Accelerate compute the most optimized `device_map` automatically, set `device_map="auto"`. For | |
more information about each option see [designing a device | |
map](https://hf.co/docs/accelerate/main/en/usage_guides/big_modeling#designing-a-device-map). | |
low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`): | |
Speed up model loading by not initializing the weights and only loading the pre-trained weights. This | |
also tries to not use more than 1x model size in CPU memory (including peak memory) while loading the | |
model. This is only supported when torch version >= 1.9.0. If you are using an older version of torch, | |
setting this argument to `True` will raise an error. | |
kwargs (remaining dictionary of keyword arguments, *optional*): | |
Can be used to overwrite load - and saveable variables - *i.e.* the pipeline components - of the | |
specific pipeline class. The overwritten components are then directly passed to the pipelines | |
`__init__` method. See example below for more information. | |
<Tip> | |
It is required to be logged in (`huggingface-cli login`) when you want to use private or [gated | |
models](https://huggingface.co/docs/hub/models-gated#gated-models), *e.g.* `"runwayml/stable-diffusion-v1-5"` | |
</Tip> | |
<Tip> | |
Activate the special ["offline-mode"](https://huggingface.co/diffusers/installation.html#offline-mode) to use | |
this method in a firewalled environment. | |
</Tip> | |
Examples: | |
```py | |
>>> from diffusers import DiffusionPipeline | |
>>> # Download pipeline from huggingface.co and cache. | |
>>> pipeline = DiffusionPipeline.from_pretrained("CompVis/ldm-text2im-large-256") | |
>>> # Download pipeline that requires an authorization token | |
>>> # For more information on access tokens, please refer to this section | |
>>> # of the documentation](https://huggingface.co/docs/hub/security-tokens) | |
>>> pipeline = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5") | |
>>> # Download pipeline, but overwrite scheduler | |
>>> from diffusers import LMSDiscreteScheduler | |
>>> scheduler = LMSDiscreteScheduler.from_config("runwayml/stable-diffusion-v1-5", subfolder="scheduler") | |
>>> pipeline = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", scheduler=scheduler) | |
``` | |
""" | |
cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE) | |
resume_download = kwargs.pop("resume_download", False) | |
force_download = kwargs.pop("force_download", False) | |
proxies = kwargs.pop("proxies", None) | |
local_files_only = kwargs.pop("local_files_only", False) | |
use_auth_token = kwargs.pop("use_auth_token", None) | |
revision = kwargs.pop("revision", None) | |
torch_dtype = kwargs.pop("torch_dtype", None) | |
custom_pipeline = kwargs.pop("custom_pipeline", None) | |
provider = kwargs.pop("provider", None) | |
sess_options = kwargs.pop("sess_options", None) | |
device_map = kwargs.pop("device_map", None) | |
low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT) | |
if device_map is not None and not is_torch_version(">=", "1.9.0"): | |
raise NotImplementedError( | |
"Loading and dispatching requires torch >= 1.9.0. Please either update your PyTorch version or set" | |
" `device_map=None`." | |
) | |
if low_cpu_mem_usage is True and not is_torch_version(">=", "1.9.0"): | |
raise NotImplementedError( | |
"Low memory initialization requires torch >= 1.9.0. Please either update your PyTorch version or set" | |
" `low_cpu_mem_usage=False`." | |
) | |
if low_cpu_mem_usage is False and device_map is not None: | |
raise ValueError( | |
f"You cannot set `low_cpu_mem_usage` to False while using device_map={device_map} for loading and" | |
" dispatching. Please make sure to set `low_cpu_mem_usage=True`." | |
) | |
# 1. Download the checkpoints and configs | |
# use snapshot download here to get it working from from_pretrained | |
if not os.path.isdir(pretrained_model_name_or_path): | |
config_dict = cls.get_config_dict( | |
pretrained_model_name_or_path, | |
cache_dir=cache_dir, | |
resume_download=resume_download, | |
force_download=force_download, | |
proxies=proxies, | |
local_files_only=local_files_only, | |
use_auth_token=use_auth_token, | |
revision=revision, | |
) | |
# make sure we only download sub-folders and `diffusers` filenames | |
folder_names = [k for k in config_dict.keys() if not k.startswith("_")] | |
allow_patterns = [os.path.join(k, "*") for k in folder_names] | |
allow_patterns += [WEIGHTS_NAME, SCHEDULER_CONFIG_NAME, CONFIG_NAME, ONNX_WEIGHTS_NAME, cls.config_name] | |
# make sure we don't download flax weights | |
ignore_patterns = "*.msgpack" | |
if custom_pipeline is not None: | |
allow_patterns += [CUSTOM_PIPELINE_FILE_NAME] | |
if cls != DiffusionPipeline: | |
requested_pipeline_class = cls.__name__ | |
else: | |
requested_pipeline_class = config_dict.get("_class_name", cls.__name__) | |
user_agent = {"pipeline_class": requested_pipeline_class} | |
if custom_pipeline is not None: | |
user_agent["custom_pipeline"] = custom_pipeline | |
user_agent = http_user_agent(user_agent) | |
# download all allow_patterns | |
cached_folder = snapshot_download( | |
pretrained_model_name_or_path, | |
cache_dir=cache_dir, | |
resume_download=resume_download, | |
proxies=proxies, | |
local_files_only=local_files_only, | |
use_auth_token=use_auth_token, | |
revision=revision, | |
allow_patterns=allow_patterns, | |
ignore_patterns=ignore_patterns, | |
user_agent=user_agent, | |
) | |
else: | |
cached_folder = pretrained_model_name_or_path | |
config_dict = cls.get_config_dict(cached_folder) | |
# 2. Load the pipeline class, if using custom module then load it from the hub | |
# if we load from explicit class, let's use it | |
if custom_pipeline is not None: | |
pipeline_class = get_class_from_dynamic_module( | |
custom_pipeline, module_file=CUSTOM_PIPELINE_FILE_NAME, cache_dir=custom_pipeline | |
) | |
elif cls != DiffusionPipeline: | |
pipeline_class = cls | |
else: | |
diffusers_module = importlib.import_module(cls.__module__.split(".")[0]) | |
pipeline_class = getattr(diffusers_module, config_dict["_class_name"]) | |
# To be removed in 1.0.0 | |
if pipeline_class.__name__ == "StableDiffusionInpaintPipeline" and version.parse( | |
version.parse(config_dict["_diffusers_version"]).base_version | |
) <= version.parse("0.5.1"): | |
from diffusers import StableDiffusionInpaintPipeline, StableDiffusionInpaintPipelineLegacy | |
pipeline_class = StableDiffusionInpaintPipelineLegacy | |
deprecation_message = ( | |
"You are using a legacy checkpoint for inpainting with Stable Diffusion, therefore we are loading the" | |
f" {StableDiffusionInpaintPipelineLegacy} class instead of {StableDiffusionInpaintPipeline}. For" | |
" better inpainting results, we strongly suggest using Stable Diffusion's official inpainting" | |
" checkpoint: https://huggingface.co/runwayml/stable-diffusion-inpainting instead or adapting your" | |
f" checkpoint {pretrained_model_name_or_path} to the format of" | |
" https://huggingface.co/runwayml/stable-diffusion-inpainting. Note that we do not actively maintain" | |
" the {StableDiffusionInpaintPipelineLegacy} class and will likely remove it in version 1.0.0." | |
) | |
deprecate("StableDiffusionInpaintPipelineLegacy", "1.0.0", deprecation_message, standard_warn=False) | |
# some modules can be passed directly to the init | |
# in this case they are already instantiated in `kwargs` | |
# extract them here | |
expected_modules = set(inspect.signature(pipeline_class.__init__).parameters.keys()) - set(["self"]) | |
passed_class_obj = {k: kwargs.pop(k) for k in expected_modules if k in kwargs} | |
init_dict, unused_kwargs = pipeline_class.extract_init_dict(config_dict, **kwargs) | |
if len(unused_kwargs) > 0: | |
logger.warning(f"Keyword arguments {unused_kwargs} not recognized.") | |
init_kwargs = {} | |
# import it here to avoid circular import | |
from diffusers import pipelines | |
# 3. Load each module in the pipeline | |
for name, (library_name, class_name) in init_dict.items(): | |
if class_name is None: | |
# edge case for when the pipeline was saved with safety_checker=None | |
init_kwargs[name] = None | |
continue | |
# 3.1 - now that JAX/Flax is an official framework of the library, we might load from Flax names | |
if class_name.startswith("Flax"): | |
class_name = class_name[4:] | |
is_pipeline_module = hasattr(pipelines, library_name) | |
loaded_sub_model = None | |
sub_model_should_be_defined = True | |
# if the model is in a pipeline module, then we load it from the pipeline | |
if name in passed_class_obj: | |
# 1. check that passed_class_obj has correct parent class | |
if not is_pipeline_module: | |
library = importlib.import_module(library_name) | |
class_obj = getattr(library, class_name) | |
importable_classes = LOADABLE_CLASSES[library_name] | |
class_candidates = {c: getattr(library, c) for c in importable_classes.keys()} | |
expected_class_obj = None | |
for class_name, class_candidate in class_candidates.items(): | |
if issubclass(class_obj, class_candidate): | |
expected_class_obj = class_candidate | |
if not issubclass(passed_class_obj[name].__class__, expected_class_obj): | |
raise ValueError( | |
f"{passed_class_obj[name]} is of type: {type(passed_class_obj[name])}, but should be" | |
f" {expected_class_obj}" | |
) | |
elif passed_class_obj[name] is None: | |
logger.warn( | |
f"You have passed `None` for {name} to disable its functionality in {pipeline_class}. Note" | |
f" that this might lead to problems when using {pipeline_class} and is not recommended." | |
) | |
sub_model_should_be_defined = False | |
else: | |
logger.warn( | |
f"You have passed a non-standard module {passed_class_obj[name]}. We cannot verify whether it" | |
" has the correct type" | |
) | |
# set passed class object | |
loaded_sub_model = passed_class_obj[name] | |
elif is_pipeline_module: | |
pipeline_module = getattr(pipelines, library_name) | |
class_obj = getattr(pipeline_module, class_name) | |
importable_classes = ALL_IMPORTABLE_CLASSES | |
class_candidates = {c: class_obj for c in importable_classes.keys()} | |
else: | |
# else we just import it from the library. | |
library = importlib.import_module(library_name) | |
class_obj = getattr(library, class_name) | |
importable_classes = LOADABLE_CLASSES[library_name] | |
class_candidates = {c: getattr(library, c) for c in importable_classes.keys()} | |
if loaded_sub_model is None and sub_model_should_be_defined: | |
load_method_name = None | |
for class_name, class_candidate in class_candidates.items(): | |
if issubclass(class_obj, class_candidate): | |
load_method_name = importable_classes[class_name][1] | |
if load_method_name is None: | |
none_module = class_obj.__module__ | |
if none_module.startswith(DUMMY_MODULES_FOLDER) and "dummy" in none_module: | |
# call class_obj for nice error message of missing requirements | |
class_obj() | |
raise ValueError( | |
f"The component {class_obj} of {pipeline_class} cannot be loaded as it does not seem to have" | |
f" any of the loading methods defined in {ALL_IMPORTABLE_CLASSES}." | |
) | |
load_method = getattr(class_obj, load_method_name) | |
loading_kwargs = {} | |
if issubclass(class_obj, torch.nn.Module): | |
loading_kwargs["torch_dtype"] = torch_dtype | |
if issubclass(class_obj, diffusers.OnnxRuntimeModel): | |
loading_kwargs["provider"] = provider | |
loading_kwargs["sess_options"] = sess_options | |
is_diffusers_model = issubclass(class_obj, diffusers.ModelMixin) | |
is_transformers_model = ( | |
is_transformers_available() | |
and issubclass(class_obj, PreTrainedModel) | |
and version.parse(version.parse(transformers.__version__).base_version) >= version.parse("4.20.0") | |
) | |
# When loading a transformers model, if the device_map is None, the weights will be initialized as opposed to diffusers. | |
# To make default loading faster we set the `low_cpu_mem_usage=low_cpu_mem_usage` flag which is `True` by default. | |
# This makes sure that the weights won't be initialized which significantly speeds up loading. | |
if is_diffusers_model or is_transformers_model: | |
loading_kwargs["device_map"] = device_map | |
loading_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage | |
# check if the module is in a subdirectory | |
if os.path.isdir(os.path.join(cached_folder, name)): | |
loaded_sub_model = load_method(os.path.join(cached_folder, name), **loading_kwargs) | |
else: | |
# else load from the root directory | |
loaded_sub_model = load_method(cached_folder, **loading_kwargs) | |
init_kwargs[name] = loaded_sub_model # UNet(...), # DiffusionSchedule(...) | |
# 4. Potentially add passed objects if expected | |
missing_modules = set(expected_modules) - set(init_kwargs.keys()) | |
if len(missing_modules) > 0 and missing_modules <= set(passed_class_obj.keys()): | |
for module in missing_modules: | |
init_kwargs[module] = passed_class_obj[module] | |
elif len(missing_modules) > 0: | |
passed_modules = set(list(init_kwargs.keys()) + list(passed_class_obj.keys())) | |
raise ValueError( | |
f"Pipeline {pipeline_class} expected {expected_modules}, but only {passed_modules} were passed." | |
) | |
# 5. Instantiate the pipeline | |
model = pipeline_class(**init_kwargs) | |
return model | |
def components(self) -> Dict[str, Any]: | |
r""" | |
The `self.components` property can be useful to run different pipelines with the same weights and | |
configurations to not have to re-allocate memory. | |
Examples: | |
```py | |
>>> from diffusers import ( | |
... StableDiffusionPipeline, | |
... StableDiffusionImg2ImgPipeline, | |
... StableDiffusionInpaintPipeline, | |
... ) | |
>>> img2text = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5") | |
>>> img2img = StableDiffusionImg2ImgPipeline(**img2text.components) | |
>>> inpaint = StableDiffusionInpaintPipeline(**img2text.components) | |
``` | |
Returns: | |
A dictionaly containing all the modules needed to initialize the pipeline. | |
""" | |
components = {k: getattr(self, k) for k in self.config.keys() if not k.startswith("_")} | |
expected_modules = set(inspect.signature(self.__init__).parameters.keys()) - set(["self"]) | |
if set(components.keys()) != expected_modules: | |
raise ValueError( | |
f"{self} has been incorrectly initialized or {self.__class__} is incorrectly implemented. Expected" | |
f" {expected_modules} to be defined, but {components} are defined." | |
) | |
return components | |
def numpy_to_pil(images): | |
""" | |
Convert a numpy image or a batch of images to a PIL image. | |
""" | |
if images.ndim == 3: | |
images = images[None, ...] | |
images = (images * 255).round().astype("uint8") | |
if images.shape[-1] == 1: | |
# special case for grayscale (single channel) images | |
pil_images = [Image.fromarray(image.squeeze(), mode="L") for image in images] | |
else: | |
pil_images = [Image.fromarray(image) for image in images] | |
return pil_images | |
def progress_bar(self, iterable): | |
if not hasattr(self, "_progress_bar_config"): | |
self._progress_bar_config = {} | |
elif not isinstance(self._progress_bar_config, dict): | |
raise ValueError( | |
f"`self._progress_bar_config` should be of type `dict`, but is {type(self._progress_bar_config)}." | |
) | |
return tqdm(iterable, **self._progress_bar_config) | |
def set_progress_bar_config(self, **kwargs): | |
self._progress_bar_config = kwargs | |
class LDMZhTextToImagePipeline(DiffusionPipeline): | |
def __init__( | |
self, | |
vqvae, | |
bert, | |
tokenizer, | |
unet, | |
scheduler, | |
sr, | |
): | |
super().__init__() | |
self.register_modules(vqvae=vqvae, bert=bert, tokenizer=tokenizer, unet=unet, scheduler=scheduler, sr=sr) | |
def __call__( | |
self, | |
prompt: Union[str, List[str]], | |
height: Optional[int] = 256, | |
width: Optional[int] = 256, | |
num_inference_steps: Optional[int] = 50, | |
guidance_scale: Optional[float] = 5.0, | |
eta: Optional[float] = 0.0, | |
generator: Optional[torch.Generator] = None, | |
output_type: Optional[str] = "pil", | |
return_dict: bool = True, | |
use_sr: bool = False, | |
**kwargs, | |
): | |
r""" | |
Args: | |
prompt (`str` or `List[str]`): | |
The prompt or prompts to guide the image generation. | |
height (`int`, *optional*, defaults to 256): | |
The height in pixels of the generated image. | |
width (`int`, *optional*, defaults to 256): | |
The width in pixels of the generated image. | |
num_inference_steps (`int`, *optional*, defaults to 50): | |
The number of denoising steps. More denoising steps usually lead to a higher quality image at the | |
expense of slower inference. | |
guidance_scale (`float`, *optional*, defaults to 1.0): | |
Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). | |
`guidance_scale` is defined as `w` of equation 2. of [Imagen | |
Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale > | |
1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt` at | |
the, usually at the expense of lower image quality. | |
generator (`torch.Generator`, *optional*): | |
A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation | |
deterministic. | |
output_type (`str`, *optional*, defaults to `"pil"`): | |
The output format of the generate image. Choose between | |
[PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. | |
return_dict (`bool`, *optional*): | |
Whether or not to return a [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple. | |
Returns: | |
[`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if | |
`return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the | |
generated images. | |
""" | |
if isinstance(prompt, str): | |
batch_size = 1 | |
elif isinstance(prompt, list): | |
batch_size = len(prompt) | |
else: | |
raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}") | |
if height % 8 != 0 or width % 8 != 0: | |
raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.") | |
# get unconditional embeddings for classifier free guidance | |
if guidance_scale != 1.0: | |
uncond_input = self.tokenizer([""] * batch_size, padding="max_length", max_length=32, return_tensors="pt") | |
uncond_embeddings = self.bert(uncond_input.input_ids.to(self.device)) | |
# get prompt text embeddings | |
text_input = self.tokenizer(prompt, padding="max_length", max_length=32, return_tensors="pt") | |
text_embeddings = self.bert(text_input.input_ids.to(self.device)) | |
latents = torch.randn( | |
(batch_size, self.unet.in_channels, height // 8, width // 8), | |
generator=generator, | |
) | |
latents = latents.to(self.device) | |
self.scheduler.set_timesteps(num_inference_steps) | |
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature | |
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) | |
extra_kwargs = {} | |
if accepts_eta: | |
extra_kwargs["eta"] = eta | |
for t in self.progress_bar(self.scheduler.timesteps): | |
if guidance_scale == 1.0: | |
# guidance_scale of 1 means no guidance | |
latents_input = latents | |
context = text_embeddings | |
else: | |
# For classifier free guidance, we need to do two forward passes. | |
# Here we concatenate the unconditional and text embeddings into a single batch | |
# to avoid doing two forward passes | |
latents_input = torch.cat([latents] * 2) | |
context = torch.cat([uncond_embeddings, text_embeddings]) | |
# predict the noise residual | |
noise_pred = self.unet(latents_input, t, encoder_hidden_states=context).sample | |
# perform guidance | |
if guidance_scale != 1.0: | |
noise_pred_uncond, noise_prediction_text = noise_pred.chunk(2) | |
noise_pred = noise_pred_uncond + guidance_scale * (noise_prediction_text - noise_pred_uncond) | |
# compute the previous noisy sample x_t -> x_t-1 | |
latents = self.scheduler.step(noise_pred, t, latents, **extra_kwargs).prev_sample | |
# scale and decode the image latents with vae | |
latents = 1 / 0.18215 * latents | |
image = self.vqvae.decode(latents).sample | |
image = (image / 2 + 0.5).clamp(0, 1) | |
if use_sr: | |
image = self.sr(image) | |
image = image.cpu().permute(0, 2, 3, 1).numpy() | |
if output_type == "pil": | |
image = self.numpy_to_pil(image) | |
if not return_dict: | |
return (image,) | |
return ImagePipelineOutput(images=image) | |
class QuickGELU(nn.Module): | |
def forward(self, x: torch.Tensor): | |
return x * torch.sigmoid(1.702 * x) | |
class ResidualAttentionBlock(nn.Module): | |
def __init__(self, d_model: int, n_head: int, attn_mask: torch.Tensor = None): | |
super().__init__() | |
self.attn = nn.MultiheadAttention(d_model, n_head) | |
self.ln_1 = nn.LayerNorm(d_model,eps=1e-07) | |
self.mlp = nn.Sequential(OrderedDict([ | |
("c_fc", nn.Linear(d_model, d_model * 4)), | |
("gelu", QuickGELU()), | |
("c_proj", nn.Linear(d_model * 4, d_model)) | |
])) | |
self.ln_2 = nn.LayerNorm(d_model,eps=1e-07) | |
self.attn_mask = attn_mask | |
def attention(self, x: torch.Tensor): | |
self.attn_mask = self.attn_mask.to(dtype=x.dtype, device=x.device) if self.attn_mask is not None else None | |
return self.attn(x, x, x, need_weights=False, attn_mask=self.attn_mask)[0] | |
def forward(self, x: torch.Tensor): | |
x = x + self.attention(self.ln_1(x)) | |
x = x + self.mlp(self.ln_2(x)) | |
return x | |
class Transformer(nn.Module): | |
def __init__(self, width: int, layers: int, heads: int, attn_mask: torch.Tensor = None): | |
super().__init__() | |
self.width = width | |
self.layers = layers | |
self.resblocks = nn.Sequential(*[ResidualAttentionBlock(width, heads, attn_mask) for _ in range(layers)]) | |
def forward(self, x: torch.Tensor): | |
return self.resblocks(x) | |
class TextTransformer(nn.Module): | |
def __init__(self, | |
context_length = 32, | |
vocab_size = 21128, | |
output_dim = 768, | |
width = 768, | |
layers = 12, | |
heads = 12, | |
return_full_embed = False): | |
super(TextTransformer, self).__init__() | |
self.width = width | |
self.layers = layers | |
self.vocab_size = vocab_size | |
self.return_full_embed = return_full_embed | |
self.transformer = Transformer(width, layers, heads, self.build_attntion_mask(context_length)) | |
self.text_projection = torch.nn.Parameter( | |
torch.tensor(np.random.normal(0, self.width ** -0.5, size=(self.width, output_dim)).astype(np.float32))) | |
self.ln_final = nn.LayerNorm(width,eps=1e-07) | |
# https://discuss.pytorch.org/t/implementing-truncated-normal-initializer/4778/27 | |
# https://github.com/pytorch/pytorch/blob/a40812de534b42fcf0eb57a5cecbfdc7a70100cf/torch/nn/init.py#L22 | |
self.embedding_table = nn.Parameter(nn.init.trunc_normal_(torch.empty(vocab_size, width),std=0.02)) | |
# self.embedding_table = nn.Embedding.from_pretrained(nn.init.trunc_normal_(torch.empty(vocab_size, width),std=0.02)) | |
self.positional_embedding = nn.Parameter(nn.init.trunc_normal_(torch.empty(context_length, width),std=0.01)) | |
# self.positional_embedding = nn.Embedding.from_pretrained(nn.init.trunc_normal_(torch.empty(context_length, width),std=0.01)) | |
self.index_select=torch.index_select | |
self.reshape=torch.reshape | |
def build_attntion_mask(context_length): | |
mask = np.triu(np.full((context_length, context_length), -np.inf).astype(np.float32), 1) | |
mask = torch.tensor(mask) | |
return mask | |
def forward(self, x: torch.Tensor): | |
tail_token=(x==102).nonzero(as_tuple=True) | |
bsz, ctx_len = x.shape | |
flatten_id = x.flatten() | |
index_select_result = self.index_select(self.embedding_table,0, flatten_id) | |
x = self.reshape(index_select_result, (bsz, ctx_len, -1)) | |
x = x + self.positional_embedding | |
x = x.permute(1, 0, 2) # NLD -> LND | |
x = self.transformer(x) | |
x = x.permute(1, 0, 2) # LND -> NLD | |
x = self.ln_final(x) | |
x=x[tail_token] | |
x = x @ self.text_projection | |
return x | |
class WukongClipTextEncoder(ModelMixin, ConfigMixin): | |
def __init__( | |
self, | |
): | |
super().__init__() | |
self.model = TextTransformer() | |
def forward( | |
self, | |
tokens | |
): | |
z = self.model(tokens) | |
z = z / torch.linalg.norm(z, dim=-1, keepdim=True) | |
if z.ndim==2: | |
z = z.view((z.shape[0], 1, z.shape[1])) | |
return z | |
def make_layer(block, n_layers): | |
layers = [] | |
for _ in range(n_layers): | |
layers.append(block()) | |
return nn.Sequential(*layers) | |
class ResidualDenseBlock_5C(nn.Module): | |
def __init__(self, nf=64, gc=32, bias=True): | |
super(ResidualDenseBlock_5C, self).__init__() | |
# gc: growth channel, i.e. intermediate channels | |
self.conv1 = nn.Conv2d(nf, gc, 3, 1, 1, bias=bias) | |
self.conv2 = nn.Conv2d(nf + gc, gc, 3, 1, 1, bias=bias) | |
self.conv3 = nn.Conv2d(nf + 2 * gc, gc, 3, 1, 1, bias=bias) | |
self.conv4 = nn.Conv2d(nf + 3 * gc, gc, 3, 1, 1, bias=bias) | |
self.conv5 = nn.Conv2d(nf + 4 * gc, nf, 3, 1, 1, bias=bias) | |
self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True) | |
# initialization | |
# mutil.initialize_weights([self.conv1, self.conv2, self.conv3, self.conv4, self.conv5], 0.1) | |
def forward(self, x): | |
x1 = self.lrelu(self.conv1(x)) | |
x2 = self.lrelu(self.conv2(torch.cat((x, x1), 1))) | |
x3 = self.lrelu(self.conv3(torch.cat((x, x1, x2), 1))) | |
x4 = self.lrelu(self.conv4(torch.cat((x, x1, x2, x3), 1))) | |
x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1)) | |
return x5 * 0.2 + x | |
class RRDB(nn.Module): | |
'''Residual in Residual Dense Block''' | |
def __init__(self, nf, gc=32): | |
super(RRDB, self).__init__() | |
self.RDB1 = ResidualDenseBlock_5C(nf, gc) | |
self.RDB2 = ResidualDenseBlock_5C(nf, gc) | |
self.RDB3 = ResidualDenseBlock_5C(nf, gc) | |
def forward(self, x): | |
out = self.RDB1(x) | |
out = self.RDB2(out) | |
out = self.RDB3(out) | |
return out * 0.2 + x | |
class RRDBNet(nn.Module): | |
def __init__(self, in_nc, out_nc, nf, nb, gc=32): | |
super(RRDBNet, self).__init__() | |
RRDB_block_f = functools.partial(RRDB, nf=nf, gc=gc) | |
self.conv_first = nn.Conv2d(in_nc, nf, 3, 1, 1, bias=True) | |
self.RRDB_trunk = make_layer(RRDB_block_f, nb) | |
self.trunk_conv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True) | |
#### upsampling | |
self.upconv1 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True) | |
self.upconv2 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True) | |
self.HRconv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True) | |
self.conv_last = nn.Conv2d(nf, out_nc, 3, 1, 1, bias=True) | |
self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True) | |
def forward(self, x): | |
fea = self.conv_first(x) | |
trunk = self.trunk_conv(self.RRDB_trunk(fea)) | |
fea = fea + trunk | |
fea = self.lrelu(self.upconv1(torch.nn.functional.interpolate(fea, scale_factor=2, mode='nearest'))) | |
fea = self.lrelu(self.upconv2(torch.nn.functional.interpolate(fea, scale_factor=2, mode='nearest'))) | |
out = self.conv_last(self.lrelu(self.HRconv(fea))) | |
return out | |
class ESRGAN(ModelMixin, ConfigMixin): | |
def __init__( | |
self, | |
): | |
super().__init__() | |
self.model = RRDBNet(3, 3, 64, 23, gc=32) | |
def forward( | |
self, | |
img_LR | |
): | |
img_LR = img_LR[:,[2,1,0],:,:] | |
img_LR = img_LR.to(self.device) | |
with torch.no_grad(): | |
output = self.model(img_LR) | |
output = output.data.float().clamp_(0, 1) | |
output = output[:,[2,1,0],:,:] | |
return output | |