Fp16 version.
Browse files- README.md +17 -126
- model_index.json +1 -1
- safety_checker/config.json +7 -4
- safety_checker/pytorch_model.bin +2 -2
- scheduler/scheduler_config.json +2 -6
- text_encoder/config.json +3 -3
- text_encoder/pytorch_model.bin +2 -2
- tokenizer/tokenizer_config.json +1 -1
- unet/config.json +2 -1
- unet/diffusion_pytorch_model.bin +2 -2
- vae/config.json +2 -1
- vae/diffusion_pytorch_model.bin +2 -2
README.md
CHANGED
@@ -6,16 +6,14 @@ tags:
|
|
6 |
- stable-diffusion-diffusers
|
7 |
- text-to-image
|
8 |
extra_gated_prompt: |-
|
9 |
-
One more step before getting this model
|
10 |
-
This model is open access and available to all,
|
11 |
-
|
12 |
-
|
13 |
1. You can't use the model to deliberately produce nor share illegal or harmful outputs or content
|
14 |
-
2.
|
15 |
3. You may re-distribute the weights and use the model commercially and/or as a service. If you do, please be aware you have to include the same use restrictions as the ones in the license and share a copy of the CreativeML OpenRAIL-M to all your users (please read the license entirely and carefully)
|
16 |
Please read the full license here: https://huggingface.co/spaces/CompVis/stable-diffusion-license
|
17 |
-
|
18 |
-
By clicking on "Access repository" below, you accept that your *contact information* (email address and username) can be shared with the model authors as well.
|
19 |
|
20 |
extra_gated_fields:
|
21 |
I have read the License and agree with its terms: checkbox
|
@@ -24,12 +22,10 @@ extra_gated_fields:
|
|
24 |
# Stable Diffusion v1-4 Model Card
|
25 |
|
26 |
Stable Diffusion is a latent text-to-image diffusion model capable of generating photo-realistic images given any text input.
|
27 |
-
For more information about how Stable Diffusion functions, please have a look at [🤗's Stable Diffusion with 🧨
|
28 |
|
29 |
-
The **Stable-Diffusion-v1-4** checkpoint was initialized with the weights of the [Stable-Diffusion-v1-
|
30 |
-
checkpoint and subsequently fine-tuned on
|
31 |
-
|
32 |
-
This weights here are intended to be used with the 🧨 Diffusers library. If you are looking for the weights to be loaded into the CompVis Stable Diffusion codebase, [come here](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original)
|
33 |
|
34 |
## Model Details
|
35 |
- **Developed by:** Robin Rombach, Patrick Esser
|
@@ -53,8 +49,6 @@ This weights here are intended to be used with the 🧨 Diffusers library. If yo
|
|
53 |
|
54 |
We recommend using [🤗's Diffusers library](https://github.com/huggingface/diffusers) to run Stable Diffusion.
|
55 |
|
56 |
-
### PyTorch
|
57 |
-
|
58 |
```bash
|
59 |
pip install --upgrade diffusers transformers scipy
|
60 |
```
|
@@ -65,8 +59,7 @@ Run this command to log in with your HF Hub token if you haven't before:
|
|
65 |
huggingface-cli login
|
66 |
```
|
67 |
|
68 |
-
Running the pipeline with the default
|
69 |
-
|
70 |
```python
|
71 |
import torch
|
72 |
from torch import autocast
|
@@ -75,32 +68,15 @@ from diffusers import StableDiffusionPipeline
|
|
75 |
model_id = "CompVis/stable-diffusion-v1-4"
|
76 |
device = "cuda"
|
77 |
|
78 |
-
|
79 |
pipe = StableDiffusionPipeline.from_pretrained(model_id, use_auth_token=True)
|
80 |
pipe = pipe.to(device)
|
81 |
|
82 |
-
prompt = "a
|
83 |
with autocast("cuda"):
|
84 |
-
image = pipe(prompt,
|
85 |
|
86 |
-
image.save("astronaut_rides_horse.png")
|
87 |
-
```
|
88 |
-
|
89 |
-
**Note**:
|
90 |
-
If you are limited by GPU memory and have less than 10GB of GPU RAM available, please make sure to load the StableDiffusionPipeline in float16 precision instead of the default float32 precision as done above. You can do so by telling diffusers to expect the weights to be in float16 precision:
|
91 |
-
|
92 |
-
|
93 |
-
```py
|
94 |
-
import torch
|
95 |
-
|
96 |
-
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, revision="fp16", use_auth_token=True)
|
97 |
-
pipe = pipe.to(device)
|
98 |
-
|
99 |
-
prompt = "a photo of an astronaut riding a horse on mars"
|
100 |
-
with autocast("cuda"):
|
101 |
-
image = pipe(prompt, guidance_scale=7.5).images[0]
|
102 |
-
|
103 |
-
image.save("astronaut_rides_horse.png")
|
104 |
```
|
105 |
|
106 |
To swap out the noise scheduler, pass it to `from_pretrained`:
|
@@ -113,81 +89,6 @@ model_id = "CompVis/stable-diffusion-v1-4"
|
|
113 |
scheduler = LMSDiscreteScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", num_train_timesteps=1000)
|
114 |
pipe = StableDiffusionPipeline.from_pretrained(model_id, scheduler=scheduler, use_auth_token=True)
|
115 |
pipe = pipe.to("cuda")
|
116 |
-
|
117 |
-
prompt = "a photo of an astronaut riding a horse on mars"
|
118 |
-
with autocast("cuda"):
|
119 |
-
image = pipe(prompt, guidance_scale=7.5).images[0]
|
120 |
-
|
121 |
-
image.save("astronaut_rides_horse.png")
|
122 |
-
```
|
123 |
-
|
124 |
-
### JAX/Flax
|
125 |
-
|
126 |
-
To use StableDiffusion on TPUs and GPUs for faster inference you can leverage JAX/Flax.
|
127 |
-
|
128 |
-
Running the pipeline with default PNDMScheduler
|
129 |
-
|
130 |
-
```python
|
131 |
-
import jax
|
132 |
-
import numpy as np
|
133 |
-
from flax.jax_utils import replicate
|
134 |
-
from flax.training.common_utils import shard
|
135 |
-
|
136 |
-
from diffusers import FlaxStableDiffusionPipeline
|
137 |
-
|
138 |
-
pipeline, params = FlaxStableDiffusionPipeline.from_pretrained(
|
139 |
-
"CompVis/stable-diffusion-v1-4", revision="flax", dtype=jax.numpy.bfloat16
|
140 |
-
)
|
141 |
-
|
142 |
-
prompt = "a photo of an astronaut riding a horse on mars"
|
143 |
-
|
144 |
-
prng_seed = jax.random.PRNGKey(0)
|
145 |
-
num_inference_steps = 50
|
146 |
-
|
147 |
-
num_samples = jax.device_count()
|
148 |
-
prompt = num_samples * [prompt]
|
149 |
-
prompt_ids = pipeline.prepare_inputs(prompt)
|
150 |
-
|
151 |
-
# shard inputs and rng
|
152 |
-
params = replicate(params)
|
153 |
-
prng_seed = jax.random.split(prng_seed, 8)
|
154 |
-
prompt_ids = shard(prompt_ids)
|
155 |
-
|
156 |
-
images = pipeline(prompt_ids, params, prng_seed, num_inference_steps, jit=True).images
|
157 |
-
images = pipeline.numpy_to_pil(np.asarray(images.reshape((num_samples,) + images.shape[-3:])))
|
158 |
-
```
|
159 |
-
|
160 |
-
**Note**:
|
161 |
-
If you are limited by TPU memory, please make sure to load the `FlaxStableDiffusionPipeline` in `bfloat16` precision instead of the default `float32` precision as done above. You can do so by telling diffusers to load the weights from "bf16" branch.
|
162 |
-
|
163 |
-
```python
|
164 |
-
import jax
|
165 |
-
import numpy as np
|
166 |
-
from flax.jax_utils import replicate
|
167 |
-
from flax.training.common_utils import shard
|
168 |
-
|
169 |
-
from diffusers import FlaxStableDiffusionPipeline
|
170 |
-
|
171 |
-
pipeline, params = FlaxStableDiffusionPipeline.from_pretrained(
|
172 |
-
"CompVis/stable-diffusion-v1-4", revision="bf16", dtype=jax.numpy.bfloat16
|
173 |
-
)
|
174 |
-
|
175 |
-
prompt = "a photo of an astronaut riding a horse on mars"
|
176 |
-
|
177 |
-
prng_seed = jax.random.PRNGKey(0)
|
178 |
-
num_inference_steps = 50
|
179 |
-
|
180 |
-
num_samples = jax.device_count()
|
181 |
-
prompt = num_samples * [prompt]
|
182 |
-
prompt_ids = pipeline.prepare_inputs(prompt)
|
183 |
-
|
184 |
-
# shard inputs and rng
|
185 |
-
params = replicate(params)
|
186 |
-
prng_seed = jax.random.split(prng_seed, 8)
|
187 |
-
prompt_ids = shard(prompt_ids)
|
188 |
-
|
189 |
-
images = pipeline(prompt_ids, params, prng_seed, num_inference_steps, jit=True).images
|
190 |
-
images = pipeline.numpy_to_pil(np.asarray(images.reshape((num_samples,) + images.shape[-3:])))
|
191 |
```
|
192 |
|
193 |
# Uses
|
@@ -239,8 +140,6 @@ Using the model to generate content that is cruel to individuals is a misuse of
|
|
239 |
[LAION-5B](https://laion.ai/blog/laion-5b/) which contains adult material
|
240 |
and is not fit for product use without additional safety mechanisms and
|
241 |
considerations.
|
242 |
-
- No additional measures were used to deduplicate the dataset. As a result, we observe some degree of memorization for images that are duplicated in the training data.
|
243 |
-
The training data can be searched at [https://rom1504.github.io/clip-retrieval/](https://rom1504.github.io/clip-retrieval/) to possibly assist in the detection of memorized images.
|
244 |
|
245 |
### Bias
|
246 |
|
@@ -251,14 +150,6 @@ Texts and images from communities and cultures that use other languages are like
|
|
251 |
This affects the overall output of the model, as white and western cultures are often set as the default. Further, the
|
252 |
ability of the model to generate content with non-English prompts is significantly worse than with English-language prompts.
|
253 |
|
254 |
-
### Safety Module
|
255 |
-
|
256 |
-
The intended use of this model is with the [Safety Checker](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/safety_checker.py) in Diffusers.
|
257 |
-
This checker works by checking model outputs against known hard-coded NSFW concepts.
|
258 |
-
The concepts are intentionally hidden to reduce the likelihood of reverse-engineering this filter.
|
259 |
-
Specifically, the checker compares the class probability of harmful concepts in the embedding space of the `CLIPTextModel` *after generation* of the images.
|
260 |
-
The concepts are passed into the model with the generated image and compared to a hand-engineered weight for each NSFW concept.
|
261 |
-
|
262 |
|
263 |
## Training
|
264 |
|
@@ -281,8 +172,8 @@ We currently provide four checkpoints, which were trained as follows.
|
|
281 |
- [`stable-diffusion-v1-2`](https://huggingface.co/CompVis/stable-diffusion-v1-2): Resumed from `stable-diffusion-v1-1`.
|
282 |
515,000 steps at resolution `512x512` on "laion-improved-aesthetics" (a subset of laion2B-en,
|
283 |
filtered to images with an original size `>= 512x512`, estimated aesthetics score `> 5.0`, and an estimated watermark probability `< 0.5`. The watermark estimate is from the LAION-5B metadata, the aesthetics score is estimated using an [improved aesthetics estimator](https://github.com/christophschuhmann/improved-aesthetic-predictor)).
|
284 |
-
- [`stable-diffusion-v1-3`](https://huggingface.co/CompVis/stable-diffusion-v1-3): Resumed from `stable-diffusion-v1-2`. 195,000 steps at resolution `512x512` on "laion-improved-aesthetics" and 10 % dropping of the text-conditioning to improve [classifier-free guidance sampling](https://arxiv.org/abs/2207.12598)
|
285 |
-
- [
|
286 |
|
287 |
- **Hardware:** 32 x 8 x A100 GPUs
|
288 |
- **Optimizer:** AdamW
|
@@ -295,7 +186,7 @@ Evaluations with different classifier-free guidance scales (1.5, 2.0, 3.0, 4.0,
|
|
295 |
5.0, 6.0, 7.0, 8.0) and 50 PLMS sampling
|
296 |
steps show the relative improvements of the checkpoints:
|
297 |
|
298 |
-
![pareto](
|
299 |
|
300 |
Evaluated using 50 PLMS steps and 10000 random prompts from the COCO2017 validation set, evaluated at 512x512 resolution. Not optimized for FID scores.
|
301 |
## Environmental Impact
|
@@ -323,4 +214,4 @@ Based on that information, we estimate the following CO2 emissions using the [Ma
|
|
323 |
}
|
324 |
```
|
325 |
|
326 |
-
*This model card was written by: Robin Rombach and Patrick Esser and is based on the [DALL-E Mini model card](https://huggingface.co/dalle-mini/dalle-mini).*
|
|
|
6 |
- stable-diffusion-diffusers
|
7 |
- text-to-image
|
8 |
extra_gated_prompt: |-
|
9 |
+
One more step before getting this model
|
10 |
+
This model is open access and available to all, but it has the CreativeML OpenRAIL-M license you have to be aware of before using it - don't worry you are just one click away!
|
11 |
+
By clicking on "Access repository" below, you accept that your *contact information* (email address and username) can be shared with the model authors as well.
|
12 |
+
Summary of the CreativeML OpenRAIL License:
|
13 |
1. You can't use the model to deliberately produce nor share illegal or harmful outputs or content
|
14 |
+
2. We claim no rights on the outputs you generate, you are free to use them and are accountable for their use which should not go against the provisions set in the license
|
15 |
3. You may re-distribute the weights and use the model commercially and/or as a service. If you do, please be aware you have to include the same use restrictions as the ones in the license and share a copy of the CreativeML OpenRAIL-M to all your users (please read the license entirely and carefully)
|
16 |
Please read the full license here: https://huggingface.co/spaces/CompVis/stable-diffusion-license
|
|
|
|
|
17 |
|
18 |
extra_gated_fields:
|
19 |
I have read the License and agree with its terms: checkbox
|
|
|
22 |
# Stable Diffusion v1-4 Model Card
|
23 |
|
24 |
Stable Diffusion is a latent text-to-image diffusion model capable of generating photo-realistic images given any text input.
|
25 |
+
For more information about how Stable Diffusion functions, please have a look at [🤗's Stable Diffusion with D🧨iffusers blog](https://huggingface.co/blog/stable_diffusion).
|
26 |
|
27 |
+
The **Stable-Diffusion-v1-4** checkpoint was initialized with the weights of the [Stable-Diffusion-v1-3](https:/steps/huggingface.co/CompVis/stable-diffusion-v1-3)
|
28 |
+
checkpoint and subsequently fine-tuned on X steps on Y with Z.
|
|
|
|
|
29 |
|
30 |
## Model Details
|
31 |
- **Developed by:** Robin Rombach, Patrick Esser
|
|
|
49 |
|
50 |
We recommend using [🤗's Diffusers library](https://github.com/huggingface/diffusers) to run Stable Diffusion.
|
51 |
|
|
|
|
|
52 |
```bash
|
53 |
pip install --upgrade diffusers transformers scipy
|
54 |
```
|
|
|
59 |
huggingface-cli login
|
60 |
```
|
61 |
|
62 |
+
Running the pipeline with the default PLMS scheduler:
|
|
|
63 |
```python
|
64 |
import torch
|
65 |
from torch import autocast
|
|
|
68 |
model_id = "CompVis/stable-diffusion-v1-4"
|
69 |
device = "cuda"
|
70 |
|
71 |
+
generator = torch.Generator(device=device).manual_seed(0)
|
72 |
pipe = StableDiffusionPipeline.from_pretrained(model_id, use_auth_token=True)
|
73 |
pipe = pipe.to(device)
|
74 |
|
75 |
+
prompt = "a photograph of an astronaut riding a horse"
|
76 |
with autocast("cuda"):
|
77 |
+
image = pipe(prompt, generator=generator)["sample"][0] # image here is in PIL format
|
78 |
|
79 |
+
image.save(f"astronaut_rides_horse.png")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
```
|
81 |
|
82 |
To swap out the noise scheduler, pass it to `from_pretrained`:
|
|
|
89 |
scheduler = LMSDiscreteScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", num_train_timesteps=1000)
|
90 |
pipe = StableDiffusionPipeline.from_pretrained(model_id, scheduler=scheduler, use_auth_token=True)
|
91 |
pipe = pipe.to("cuda")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
```
|
93 |
|
94 |
# Uses
|
|
|
140 |
[LAION-5B](https://laion.ai/blog/laion-5b/) which contains adult material
|
141 |
and is not fit for product use without additional safety mechanisms and
|
142 |
considerations.
|
|
|
|
|
143 |
|
144 |
### Bias
|
145 |
|
|
|
150 |
This affects the overall output of the model, as white and western cultures are often set as the default. Further, the
|
151 |
ability of the model to generate content with non-English prompts is significantly worse than with English-language prompts.
|
152 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
|
154 |
## Training
|
155 |
|
|
|
172 |
- [`stable-diffusion-v1-2`](https://huggingface.co/CompVis/stable-diffusion-v1-2): Resumed from `stable-diffusion-v1-1`.
|
173 |
515,000 steps at resolution `512x512` on "laion-improved-aesthetics" (a subset of laion2B-en,
|
174 |
filtered to images with an original size `>= 512x512`, estimated aesthetics score `> 5.0`, and an estimated watermark probability `< 0.5`. The watermark estimate is from the LAION-5B metadata, the aesthetics score is estimated using an [improved aesthetics estimator](https://github.com/christophschuhmann/improved-aesthetic-predictor)).
|
175 |
+
- [`stable-diffusion-v1-3`](https://huggingface.co/CompVis/stable-diffusion-v1-3): Resumed from `stable-diffusion-v1-2`. 195,000 steps at resolution `512x512` on "laion-improved-aesthetics" and 10 % dropping of the text-conditioning to improve [classifier-free guidance sampling](https://arxiv.org/abs/2207.12598)
|
176 |
+
- [**`stable-diffusion-v1-4`**](https://huggingface.co/CompVis/stable-diffusion-v1-4) *To-fill-here*
|
177 |
|
178 |
- **Hardware:** 32 x 8 x A100 GPUs
|
179 |
- **Optimizer:** AdamW
|
|
|
186 |
5.0, 6.0, 7.0, 8.0) and 50 PLMS sampling
|
187 |
steps show the relative improvements of the checkpoints:
|
188 |
|
189 |
+
![pareto](v1-variants-scores.jpg)
|
190 |
|
191 |
Evaluated using 50 PLMS steps and 10000 random prompts from the COCO2017 validation set, evaluated at 512x512 resolution. Not optimized for FID scores.
|
192 |
## Environmental Impact
|
|
|
214 |
}
|
215 |
```
|
216 |
|
217 |
+
*This model card was written by: Robin Rombach and Patrick Esser and is based on the [DALL-E Mini model card](https://huggingface.co/dalle-mini/dalle-mini).*
|
model_index.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"_class_name": "StableDiffusionPipeline",
|
3 |
-
"_diffusers_version": "0.2.
|
4 |
"feature_extractor": [
|
5 |
"transformers",
|
6 |
"CLIPFeatureExtractor"
|
|
|
1 |
{
|
2 |
"_class_name": "StableDiffusionPipeline",
|
3 |
+
"_diffusers_version": "0.2.3",
|
4 |
"feature_extractor": [
|
5 |
"transformers",
|
6 |
"CLIPFeatureExtractor"
|
safety_checker/config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "./
|
3 |
"architectures": [
|
4 |
"StableDiffusionSafetyChecker"
|
5 |
],
|
@@ -68,6 +68,7 @@
|
|
68 |
"sep_token_id": null,
|
69 |
"task_specific_params": null,
|
70 |
"temperature": 1.0,
|
|
|
71 |
"tie_encoder_decoder": false,
|
72 |
"tie_word_embeddings": true,
|
73 |
"tokenizer_class": null,
|
@@ -75,7 +76,7 @@
|
|
75 |
"top_p": 1.0,
|
76 |
"torch_dtype": null,
|
77 |
"torchscript": false,
|
78 |
-
"transformers_version": "4.21.
|
79 |
"typical_p": 1.0,
|
80 |
"use_bfloat16": false,
|
81 |
"vocab_size": 49408
|
@@ -86,7 +87,7 @@
|
|
86 |
"num_attention_heads": 12,
|
87 |
"num_hidden_layers": 12
|
88 |
},
|
89 |
-
"torch_dtype": "
|
90 |
"transformers_version": null,
|
91 |
"vision_config": {
|
92 |
"_name_or_path": "",
|
@@ -133,6 +134,7 @@
|
|
133 |
"num_attention_heads": 16,
|
134 |
"num_beam_groups": 1,
|
135 |
"num_beams": 1,
|
|
|
136 |
"num_hidden_layers": 24,
|
137 |
"num_return_sequences": 1,
|
138 |
"output_attentions": false,
|
@@ -150,6 +152,7 @@
|
|
150 |
"sep_token_id": null,
|
151 |
"task_specific_params": null,
|
152 |
"temperature": 1.0,
|
|
|
153 |
"tie_encoder_decoder": false,
|
154 |
"tie_word_embeddings": true,
|
155 |
"tokenizer_class": null,
|
@@ -157,7 +160,7 @@
|
|
157 |
"top_p": 1.0,
|
158 |
"torch_dtype": null,
|
159 |
"torchscript": false,
|
160 |
-
"transformers_version": "4.21.
|
161 |
"typical_p": 1.0,
|
162 |
"use_bfloat16": false
|
163 |
},
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "./safety_checker",
|
3 |
"architectures": [
|
4 |
"StableDiffusionSafetyChecker"
|
5 |
],
|
|
|
68 |
"sep_token_id": null,
|
69 |
"task_specific_params": null,
|
70 |
"temperature": 1.0,
|
71 |
+
"tf_legacy_loss": false,
|
72 |
"tie_encoder_decoder": false,
|
73 |
"tie_word_embeddings": true,
|
74 |
"tokenizer_class": null,
|
|
|
76 |
"top_p": 1.0,
|
77 |
"torch_dtype": null,
|
78 |
"torchscript": false,
|
79 |
+
"transformers_version": "4.21.1",
|
80 |
"typical_p": 1.0,
|
81 |
"use_bfloat16": false,
|
82 |
"vocab_size": 49408
|
|
|
87 |
"num_attention_heads": 12,
|
88 |
"num_hidden_layers": 12
|
89 |
},
|
90 |
+
"torch_dtype": "float16",
|
91 |
"transformers_version": null,
|
92 |
"vision_config": {
|
93 |
"_name_or_path": "",
|
|
|
134 |
"num_attention_heads": 16,
|
135 |
"num_beam_groups": 1,
|
136 |
"num_beams": 1,
|
137 |
+
"num_channels": 3,
|
138 |
"num_hidden_layers": 24,
|
139 |
"num_return_sequences": 1,
|
140 |
"output_attentions": false,
|
|
|
152 |
"sep_token_id": null,
|
153 |
"task_specific_params": null,
|
154 |
"temperature": 1.0,
|
155 |
+
"tf_legacy_loss": false,
|
156 |
"tie_encoder_decoder": false,
|
157 |
"tie_word_embeddings": true,
|
158 |
"tokenizer_class": null,
|
|
|
160 |
"top_p": 1.0,
|
161 |
"torch_dtype": null,
|
162 |
"torchscript": false,
|
163 |
+
"transformers_version": "4.21.1",
|
164 |
"typical_p": 1.0,
|
165 |
"use_bfloat16": false
|
166 |
},
|
safety_checker/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d37ca6e57ace94e4c2f03ed0f67b6dc83e1ef1160892074917aa68b28e2afc1
|
3 |
+
size 608098599
|
scheduler/scheduler_config.json
CHANGED
@@ -1,13 +1,9 @@
|
|
1 |
{
|
2 |
"_class_name": "PNDMScheduler",
|
3 |
-
"_diffusers_version": "0.
|
4 |
"beta_end": 0.012,
|
5 |
"beta_schedule": "scaled_linear",
|
6 |
"beta_start": 0.00085,
|
7 |
"num_train_timesteps": 1000,
|
8 |
-
"
|
9 |
-
"skip_prk_steps": true,
|
10 |
-
"steps_offset": 1,
|
11 |
-
"trained_betas": null,
|
12 |
-
"clip_sample": false
|
13 |
}
|
|
|
1 |
{
|
2 |
"_class_name": "PNDMScheduler",
|
3 |
+
"_diffusers_version": "0.2.3",
|
4 |
"beta_end": 0.012,
|
5 |
"beta_schedule": "scaled_linear",
|
6 |
"beta_start": 0.00085,
|
7 |
"num_train_timesteps": 1000,
|
8 |
+
"skip_prk_steps": true
|
|
|
|
|
|
|
|
|
9 |
}
|
text_encoder/config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"CLIPTextModel"
|
5 |
],
|
@@ -18,7 +18,7 @@
|
|
18 |
"num_attention_heads": 12,
|
19 |
"num_hidden_layers": 12,
|
20 |
"pad_token_id": 1,
|
21 |
-
"torch_dtype": "
|
22 |
-
"transformers_version": "4.21.
|
23 |
"vocab_size": 49408
|
24 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "./text_encoder",
|
3 |
"architectures": [
|
4 |
"CLIPTextModel"
|
5 |
],
|
|
|
18 |
"num_attention_heads": 12,
|
19 |
"num_hidden_layers": 12,
|
20 |
"pad_token_id": 1,
|
21 |
+
"torch_dtype": "float16",
|
22 |
+
"transformers_version": "4.21.1",
|
23 |
"vocab_size": 49408
|
24 |
}
|
text_encoder/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88bd85efb0f84e70521633f578715afb2873db4f2615fdfb1f66e99934715865
|
3 |
+
size 246184375
|
tokenizer/tokenizer_config.json
CHANGED
@@ -19,7 +19,7 @@
|
|
19 |
},
|
20 |
"errors": "replace",
|
21 |
"model_max_length": 77,
|
22 |
-
"name_or_path": "
|
23 |
"pad_token": "<|endoftext|>",
|
24 |
"special_tokens_map_file": "./special_tokens_map.json",
|
25 |
"tokenizer_class": "CLIPTokenizer",
|
|
|
19 |
},
|
20 |
"errors": "replace",
|
21 |
"model_max_length": 77,
|
22 |
+
"name_or_path": "./tokenizer",
|
23 |
"pad_token": "<|endoftext|>",
|
24 |
"special_tokens_map_file": "./special_tokens_map.json",
|
25 |
"tokenizer_class": "CLIPTokenizer",
|
unet/config.json
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
{
|
2 |
"_class_name": "UNet2DConditionModel",
|
3 |
-
"_diffusers_version": "0.2.
|
|
|
4 |
"act_fn": "silu",
|
5 |
"attention_head_dim": 8,
|
6 |
"block_out_channels": [
|
|
|
1 |
{
|
2 |
"_class_name": "UNet2DConditionModel",
|
3 |
+
"_diffusers_version": "0.2.3",
|
4 |
+
"_name_or_path": "./unet",
|
5 |
"act_fn": "silu",
|
6 |
"attention_head_dim": 8,
|
7 |
"block_out_channels": [
|
unet/diffusion_pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d98edd280d5e040ee77f5802b8e3be3513de757335d1dedc4e495647e7c2d573
|
3 |
+
size 1719312805
|
vae/config.json
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
{
|
2 |
"_class_name": "AutoencoderKL",
|
3 |
-
"_diffusers_version": "0.2.
|
|
|
4 |
"act_fn": "silu",
|
5 |
"block_out_channels": [
|
6 |
128,
|
|
|
1 |
{
|
2 |
"_class_name": "AutoencoderKL",
|
3 |
+
"_diffusers_version": "0.2.3",
|
4 |
+
"_name_or_path": "./vae",
|
5 |
"act_fn": "silu",
|
6 |
"block_out_channels": [
|
7 |
128,
|
vae/diffusion_pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51c8904bc921e1e6f354b5fa8e99a1c82ead2f0540114de21557b8abfbb24ad0
|
3 |
+
size 167399505
|