Upload 43 files
Browse filesv1.1 safetensor version
- 1.1/README.md +6 -0
- 1.1/control_v11e_sd15_ip2p.fp16.safetensors +3 -0
- 1.1/control_v11e_sd15_ip2p.safetensors +3 -0
- 1.1/control_v11e_sd15_ip2p.yaml +79 -0
- 1.1/control_v11e_sd15_shuffle.fp16.safetensors +3 -0
- 1.1/control_v11e_sd15_shuffle.safetensors +3 -0
- 1.1/control_v11e_sd15_shuffle.yaml +80 -0
- 1.1/control_v11f1e_sd15_tile.fp16.safetensors +3 -0
- 1.1/control_v11f1e_sd15_tile.safetensors +3 -0
- 1.1/control_v11f1e_sd15_tile.yaml +79 -0
- 1.1/control_v11f1p_sd15_depth.yaml +79 -0
- 1.1/control_v11p_sd15_canny.fp16.safetensors +3 -0
- 1.1/control_v11p_sd15_canny.safetensors +3 -0
- 1.1/control_v11p_sd15_canny.yaml +79 -0
- 1.1/control_v11p_sd15_depth..safetensors +3 -0
- 1.1/control_v11p_sd15_depth.fp16.safetensors +3 -0
- 1.1/control_v11p_sd15_inpaint.fp16.safetensors +3 -0
- 1.1/control_v11p_sd15_inpaint.safetensors +3 -0
- 1.1/control_v11p_sd15_inpaint.yaml +79 -0
- 1.1/control_v11p_sd15_lineart.fp16.safetensors +3 -0
- 1.1/control_v11p_sd15_lineart.safetensors +3 -0
- 1.1/control_v11p_sd15_lineart.yaml +79 -0
- 1.1/control_v11p_sd15_mlsd.fp16.safetensors +3 -0
- 1.1/control_v11p_sd15_mlsd.safetensors +3 -0
- 1.1/control_v11p_sd15_mlsd.yaml +79 -0
- 1.1/control_v11p_sd15_normalbae.fp16.safetensors +3 -0
- 1.1/control_v11p_sd15_normalbae.safetensors +3 -0
- 1.1/control_v11p_sd15_normalbae.yaml +79 -0
- 1.1/control_v11p_sd15_openpose.fp16.safetensors +3 -0
- 1.1/control_v11p_sd15_openpose.safetensors +3 -0
- 1.1/control_v11p_sd15_openpose.yaml +79 -0
- 1.1/control_v11p_sd15_scribble.fp16.safetensors +3 -0
- 1.1/control_v11p_sd15_scribble.safetensors +3 -0
- 1.1/control_v11p_sd15_scribble.yaml +79 -0
- 1.1/control_v11p_sd15_seg.fp16.safetensors +3 -0
- 1.1/control_v11p_sd15_seg.safetensors +3 -0
- 1.1/control_v11p_sd15_seg.yaml +79 -0
- 1.1/control_v11p_sd15_softedge.fp16.safetensors +3 -0
- 1.1/control_v11p_sd15_softedge.safetensors +3 -0
- 1.1/control_v11p_sd15_softedge.yaml +79 -0
- 1.1/control_v11p_sd15s2_lineart_anime.fp16.safetensors +3 -0
- 1.1/control_v11p_sd15s2_lineart_anime.safetensors +3 -0
- 1.1/control_v11p_sd15s2_lineart_anime.yaml +79 -0
1.1/README.md
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: openrail
|
3 |
+
---
|
4 |
+
|
5 |
+
This is the model files for [ControlNet 1.1](https://github.com/lllyasviel/ControlNet-v1-1-nightly).
|
6 |
+
This model card will be filled in a more detailed way after 1.1 is officially merged into ControlNet.
|
1.1/control_v11e_sd15_ip2p.fp16.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:01f214c8e6a0043b32004dade5bc40612d93ec5c468b09a26f97deba84b0fceb
|
3 |
+
size 722598642
|
1.1/control_v11e_sd15_ip2p.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8f35e0869d2160cc4c6841401c31dc18eb773f6b43ab74f08d987aff1e143a5
|
3 |
+
size 1445157124
|
1.1/control_v11e_sd15_ip2p.yaml
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model:
|
2 |
+
target: cldm.cldm.ControlLDM
|
3 |
+
params:
|
4 |
+
linear_start: 0.00085
|
5 |
+
linear_end: 0.0120
|
6 |
+
num_timesteps_cond: 1
|
7 |
+
log_every_t: 200
|
8 |
+
timesteps: 1000
|
9 |
+
first_stage_key: "jpg"
|
10 |
+
cond_stage_key: "txt"
|
11 |
+
control_key: "hint"
|
12 |
+
image_size: 64
|
13 |
+
channels: 4
|
14 |
+
cond_stage_trainable: false
|
15 |
+
conditioning_key: crossattn
|
16 |
+
monitor: val/loss_simple_ema
|
17 |
+
scale_factor: 0.18215
|
18 |
+
use_ema: False
|
19 |
+
only_mid_control: False
|
20 |
+
|
21 |
+
control_stage_config:
|
22 |
+
target: cldm.cldm.ControlNet
|
23 |
+
params:
|
24 |
+
image_size: 32 # unused
|
25 |
+
in_channels: 4
|
26 |
+
hint_channels: 3
|
27 |
+
model_channels: 320
|
28 |
+
attention_resolutions: [ 4, 2, 1 ]
|
29 |
+
num_res_blocks: 2
|
30 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
31 |
+
num_heads: 8
|
32 |
+
use_spatial_transformer: True
|
33 |
+
transformer_depth: 1
|
34 |
+
context_dim: 768
|
35 |
+
use_checkpoint: True
|
36 |
+
legacy: False
|
37 |
+
|
38 |
+
unet_config:
|
39 |
+
target: cldm.cldm.ControlledUnetModel
|
40 |
+
params:
|
41 |
+
image_size: 32 # unused
|
42 |
+
in_channels: 4
|
43 |
+
out_channels: 4
|
44 |
+
model_channels: 320
|
45 |
+
attention_resolutions: [ 4, 2, 1 ]
|
46 |
+
num_res_blocks: 2
|
47 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
48 |
+
num_heads: 8
|
49 |
+
use_spatial_transformer: True
|
50 |
+
transformer_depth: 1
|
51 |
+
context_dim: 768
|
52 |
+
use_checkpoint: True
|
53 |
+
legacy: False
|
54 |
+
|
55 |
+
first_stage_config:
|
56 |
+
target: ldm.models.autoencoder.AutoencoderKL
|
57 |
+
params:
|
58 |
+
embed_dim: 4
|
59 |
+
monitor: val/rec_loss
|
60 |
+
ddconfig:
|
61 |
+
double_z: true
|
62 |
+
z_channels: 4
|
63 |
+
resolution: 256
|
64 |
+
in_channels: 3
|
65 |
+
out_ch: 3
|
66 |
+
ch: 128
|
67 |
+
ch_mult:
|
68 |
+
- 1
|
69 |
+
- 2
|
70 |
+
- 4
|
71 |
+
- 4
|
72 |
+
num_res_blocks: 2
|
73 |
+
attn_resolutions: []
|
74 |
+
dropout: 0.0
|
75 |
+
lossconfig:
|
76 |
+
target: torch.nn.Identity
|
77 |
+
|
78 |
+
cond_stage_config:
|
79 |
+
target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
|
1.1/control_v11e_sd15_shuffle.fp16.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b9a2f4a3da626652169cd478e51426c9a16f4a92ab4e2268889981046d8118ca
|
3 |
+
size 722598642
|
1.1/control_v11e_sd15_shuffle.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5f97dd719b0fe95f8235b8dbf39d4af70e44252c3a84bb672cadd48b0d55555
|
3 |
+
size 1445157124
|
1.1/control_v11e_sd15_shuffle.yaml
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model:
|
2 |
+
target: cldm.cldm.ControlLDM
|
3 |
+
params:
|
4 |
+
linear_start: 0.00085
|
5 |
+
linear_end: 0.0120
|
6 |
+
num_timesteps_cond: 1
|
7 |
+
log_every_t: 200
|
8 |
+
timesteps: 1000
|
9 |
+
first_stage_key: "jpg"
|
10 |
+
cond_stage_key: "txt"
|
11 |
+
control_key: "hint"
|
12 |
+
image_size: 64
|
13 |
+
channels: 4
|
14 |
+
cond_stage_trainable: false
|
15 |
+
conditioning_key: crossattn
|
16 |
+
monitor: val/loss_simple_ema
|
17 |
+
scale_factor: 0.18215
|
18 |
+
use_ema: False
|
19 |
+
only_mid_control: False
|
20 |
+
global_average_pooling: True
|
21 |
+
|
22 |
+
control_stage_config:
|
23 |
+
target: cldm.cldm.ControlNet
|
24 |
+
params:
|
25 |
+
image_size: 32 # unused
|
26 |
+
in_channels: 4
|
27 |
+
hint_channels: 3
|
28 |
+
model_channels: 320
|
29 |
+
attention_resolutions: [ 4, 2, 1 ]
|
30 |
+
num_res_blocks: 2
|
31 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
32 |
+
num_heads: 8
|
33 |
+
use_spatial_transformer: True
|
34 |
+
transformer_depth: 1
|
35 |
+
context_dim: 768
|
36 |
+
use_checkpoint: True
|
37 |
+
legacy: False
|
38 |
+
|
39 |
+
unet_config:
|
40 |
+
target: cldm.cldm.ControlledUnetModel
|
41 |
+
params:
|
42 |
+
image_size: 32 # unused
|
43 |
+
in_channels: 4
|
44 |
+
out_channels: 4
|
45 |
+
model_channels: 320
|
46 |
+
attention_resolutions: [ 4, 2, 1 ]
|
47 |
+
num_res_blocks: 2
|
48 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
49 |
+
num_heads: 8
|
50 |
+
use_spatial_transformer: True
|
51 |
+
transformer_depth: 1
|
52 |
+
context_dim: 768
|
53 |
+
use_checkpoint: True
|
54 |
+
legacy: False
|
55 |
+
|
56 |
+
first_stage_config:
|
57 |
+
target: ldm.models.autoencoder.AutoencoderKL
|
58 |
+
params:
|
59 |
+
embed_dim: 4
|
60 |
+
monitor: val/rec_loss
|
61 |
+
ddconfig:
|
62 |
+
double_z: true
|
63 |
+
z_channels: 4
|
64 |
+
resolution: 256
|
65 |
+
in_channels: 3
|
66 |
+
out_ch: 3
|
67 |
+
ch: 128
|
68 |
+
ch_mult:
|
69 |
+
- 1
|
70 |
+
- 2
|
71 |
+
- 4
|
72 |
+
- 4
|
73 |
+
num_res_blocks: 2
|
74 |
+
attn_resolutions: []
|
75 |
+
dropout: 0.0
|
76 |
+
lossconfig:
|
77 |
+
target: torch.nn.Identity
|
78 |
+
|
79 |
+
cond_stage_config:
|
80 |
+
target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
|
1.1/control_v11f1e_sd15_tile.fp16.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:830f6389ce968dbb99ac215d8f6009d09bd820c5369e49cdde2e88bbd6616711
|
3 |
+
size 722601704
|
1.1/control_v11f1e_sd15_tile.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09ddb29fd470e306d7ad29b636f27f4420e9265c8b63a633af7c7da0006a0de5
|
3 |
+
size 1445157120
|
1.1/control_v11f1e_sd15_tile.yaml
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model:
|
2 |
+
target: cldm.cldm.ControlLDM
|
3 |
+
params:
|
4 |
+
linear_start: 0.00085
|
5 |
+
linear_end: 0.0120
|
6 |
+
num_timesteps_cond: 1
|
7 |
+
log_every_t: 200
|
8 |
+
timesteps: 1000
|
9 |
+
first_stage_key: "jpg"
|
10 |
+
cond_stage_key: "txt"
|
11 |
+
control_key: "hint"
|
12 |
+
image_size: 64
|
13 |
+
channels: 4
|
14 |
+
cond_stage_trainable: false
|
15 |
+
conditioning_key: crossattn
|
16 |
+
monitor: val/loss_simple_ema
|
17 |
+
scale_factor: 0.18215
|
18 |
+
use_ema: False
|
19 |
+
only_mid_control: False
|
20 |
+
|
21 |
+
control_stage_config:
|
22 |
+
target: cldm.cldm.ControlNet
|
23 |
+
params:
|
24 |
+
image_size: 32 # unused
|
25 |
+
in_channels: 4
|
26 |
+
hint_channels: 3
|
27 |
+
model_channels: 320
|
28 |
+
attention_resolutions: [ 4, 2, 1 ]
|
29 |
+
num_res_blocks: 2
|
30 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
31 |
+
num_heads: 8
|
32 |
+
use_spatial_transformer: True
|
33 |
+
transformer_depth: 1
|
34 |
+
context_dim: 768
|
35 |
+
use_checkpoint: True
|
36 |
+
legacy: False
|
37 |
+
|
38 |
+
unet_config:
|
39 |
+
target: cldm.cldm.ControlledUnetModel
|
40 |
+
params:
|
41 |
+
image_size: 32 # unused
|
42 |
+
in_channels: 4
|
43 |
+
out_channels: 4
|
44 |
+
model_channels: 320
|
45 |
+
attention_resolutions: [ 4, 2, 1 ]
|
46 |
+
num_res_blocks: 2
|
47 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
48 |
+
num_heads: 8
|
49 |
+
use_spatial_transformer: True
|
50 |
+
transformer_depth: 1
|
51 |
+
context_dim: 768
|
52 |
+
use_checkpoint: True
|
53 |
+
legacy: False
|
54 |
+
|
55 |
+
first_stage_config:
|
56 |
+
target: ldm.models.autoencoder.AutoencoderKL
|
57 |
+
params:
|
58 |
+
embed_dim: 4
|
59 |
+
monitor: val/rec_loss
|
60 |
+
ddconfig:
|
61 |
+
double_z: true
|
62 |
+
z_channels: 4
|
63 |
+
resolution: 256
|
64 |
+
in_channels: 3
|
65 |
+
out_ch: 3
|
66 |
+
ch: 128
|
67 |
+
ch_mult:
|
68 |
+
- 1
|
69 |
+
- 2
|
70 |
+
- 4
|
71 |
+
- 4
|
72 |
+
num_res_blocks: 2
|
73 |
+
attn_resolutions: []
|
74 |
+
dropout: 0.0
|
75 |
+
lossconfig:
|
76 |
+
target: torch.nn.Identity
|
77 |
+
|
78 |
+
cond_stage_config:
|
79 |
+
target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
|
1.1/control_v11f1p_sd15_depth.yaml
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model:
|
2 |
+
target: cldm.cldm.ControlLDM
|
3 |
+
params:
|
4 |
+
linear_start: 0.00085
|
5 |
+
linear_end: 0.0120
|
6 |
+
num_timesteps_cond: 1
|
7 |
+
log_every_t: 200
|
8 |
+
timesteps: 1000
|
9 |
+
first_stage_key: "jpg"
|
10 |
+
cond_stage_key: "txt"
|
11 |
+
control_key: "hint"
|
12 |
+
image_size: 64
|
13 |
+
channels: 4
|
14 |
+
cond_stage_trainable: false
|
15 |
+
conditioning_key: crossattn
|
16 |
+
monitor: val/loss_simple_ema
|
17 |
+
scale_factor: 0.18215
|
18 |
+
use_ema: False
|
19 |
+
only_mid_control: False
|
20 |
+
|
21 |
+
control_stage_config:
|
22 |
+
target: cldm.cldm.ControlNet
|
23 |
+
params:
|
24 |
+
image_size: 32 # unused
|
25 |
+
in_channels: 4
|
26 |
+
hint_channels: 3
|
27 |
+
model_channels: 320
|
28 |
+
attention_resolutions: [ 4, 2, 1 ]
|
29 |
+
num_res_blocks: 2
|
30 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
31 |
+
num_heads: 8
|
32 |
+
use_spatial_transformer: True
|
33 |
+
transformer_depth: 1
|
34 |
+
context_dim: 768
|
35 |
+
use_checkpoint: True
|
36 |
+
legacy: False
|
37 |
+
|
38 |
+
unet_config:
|
39 |
+
target: cldm.cldm.ControlledUnetModel
|
40 |
+
params:
|
41 |
+
image_size: 32 # unused
|
42 |
+
in_channels: 4
|
43 |
+
out_channels: 4
|
44 |
+
model_channels: 320
|
45 |
+
attention_resolutions: [ 4, 2, 1 ]
|
46 |
+
num_res_blocks: 2
|
47 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
48 |
+
num_heads: 8
|
49 |
+
use_spatial_transformer: True
|
50 |
+
transformer_depth: 1
|
51 |
+
context_dim: 768
|
52 |
+
use_checkpoint: True
|
53 |
+
legacy: False
|
54 |
+
|
55 |
+
first_stage_config:
|
56 |
+
target: ldm.models.autoencoder.AutoencoderKL
|
57 |
+
params:
|
58 |
+
embed_dim: 4
|
59 |
+
monitor: val/rec_loss
|
60 |
+
ddconfig:
|
61 |
+
double_z: true
|
62 |
+
z_channels: 4
|
63 |
+
resolution: 256
|
64 |
+
in_channels: 3
|
65 |
+
out_ch: 3
|
66 |
+
ch: 128
|
67 |
+
ch_mult:
|
68 |
+
- 1
|
69 |
+
- 2
|
70 |
+
- 4
|
71 |
+
- 4
|
72 |
+
num_res_blocks: 2
|
73 |
+
attn_resolutions: []
|
74 |
+
dropout: 0.0
|
75 |
+
lossconfig:
|
76 |
+
target: torch.nn.Identity
|
77 |
+
|
78 |
+
cond_stage_config:
|
79 |
+
target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
|
1.1/control_v11p_sd15_canny.fp16.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ed3a7f834aca0d2b26f9c7f186b3ee29465de1d13e152459e8c9cbb25a0c0bc
|
3 |
+
size 722598642
|
1.1/control_v11p_sd15_canny.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be713fb941fc7c625f0c7d816b6a19115783a665f3049a8974f127e0c075d9a9
|
3 |
+
size 1445157124
|
1.1/control_v11p_sd15_canny.yaml
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model:
|
2 |
+
target: cldm.cldm.ControlLDM
|
3 |
+
params:
|
4 |
+
linear_start: 0.00085
|
5 |
+
linear_end: 0.0120
|
6 |
+
num_timesteps_cond: 1
|
7 |
+
log_every_t: 200
|
8 |
+
timesteps: 1000
|
9 |
+
first_stage_key: "jpg"
|
10 |
+
cond_stage_key: "txt"
|
11 |
+
control_key: "hint"
|
12 |
+
image_size: 64
|
13 |
+
channels: 4
|
14 |
+
cond_stage_trainable: false
|
15 |
+
conditioning_key: crossattn
|
16 |
+
monitor: val/loss_simple_ema
|
17 |
+
scale_factor: 0.18215
|
18 |
+
use_ema: False
|
19 |
+
only_mid_control: False
|
20 |
+
|
21 |
+
control_stage_config:
|
22 |
+
target: cldm.cldm.ControlNet
|
23 |
+
params:
|
24 |
+
image_size: 32 # unused
|
25 |
+
in_channels: 4
|
26 |
+
hint_channels: 3
|
27 |
+
model_channels: 320
|
28 |
+
attention_resolutions: [ 4, 2, 1 ]
|
29 |
+
num_res_blocks: 2
|
30 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
31 |
+
num_heads: 8
|
32 |
+
use_spatial_transformer: True
|
33 |
+
transformer_depth: 1
|
34 |
+
context_dim: 768
|
35 |
+
use_checkpoint: True
|
36 |
+
legacy: False
|
37 |
+
|
38 |
+
unet_config:
|
39 |
+
target: cldm.cldm.ControlledUnetModel
|
40 |
+
params:
|
41 |
+
image_size: 32 # unused
|
42 |
+
in_channels: 4
|
43 |
+
out_channels: 4
|
44 |
+
model_channels: 320
|
45 |
+
attention_resolutions: [ 4, 2, 1 ]
|
46 |
+
num_res_blocks: 2
|
47 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
48 |
+
num_heads: 8
|
49 |
+
use_spatial_transformer: True
|
50 |
+
transformer_depth: 1
|
51 |
+
context_dim: 768
|
52 |
+
use_checkpoint: True
|
53 |
+
legacy: False
|
54 |
+
|
55 |
+
first_stage_config:
|
56 |
+
target: ldm.models.autoencoder.AutoencoderKL
|
57 |
+
params:
|
58 |
+
embed_dim: 4
|
59 |
+
monitor: val/rec_loss
|
60 |
+
ddconfig:
|
61 |
+
double_z: true
|
62 |
+
z_channels: 4
|
63 |
+
resolution: 256
|
64 |
+
in_channels: 3
|
65 |
+
out_ch: 3
|
66 |
+
ch: 128
|
67 |
+
ch_mult:
|
68 |
+
- 1
|
69 |
+
- 2
|
70 |
+
- 4
|
71 |
+
- 4
|
72 |
+
num_res_blocks: 2
|
73 |
+
attn_resolutions: []
|
74 |
+
dropout: 0.0
|
75 |
+
lossconfig:
|
76 |
+
target: torch.nn.Identity
|
77 |
+
|
78 |
+
cond_stage_config:
|
79 |
+
target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
|
1.1/control_v11p_sd15_depth..safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:999aca923ca5e19e70e6afc8d11073cc3c03553ca935b636bd5925df4a1c77d1
|
3 |
+
size 1445157124
|
1.1/control_v11p_sd15_depth.fp16.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:838f3e848cb93231d41084d858d117416ab32907a5c8c80c421a131d101ba1ef
|
3 |
+
size 722598642
|
1.1/control_v11p_sd15_inpaint.fp16.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd6fa262ac0820cb24572a5f133249f867a5264307d693ecff23b1eaa4cccf7e
|
3 |
+
size 722598642
|
1.1/control_v11p_sd15_inpaint.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22be8eebe5a5699b3abe074296b3984167ea2a762ef60cab06a494248ceaf41b
|
3 |
+
size 1445157124
|
1.1/control_v11p_sd15_inpaint.yaml
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model:
|
2 |
+
target: cldm.cldm.ControlLDM
|
3 |
+
params:
|
4 |
+
linear_start: 0.00085
|
5 |
+
linear_end: 0.0120
|
6 |
+
num_timesteps_cond: 1
|
7 |
+
log_every_t: 200
|
8 |
+
timesteps: 1000
|
9 |
+
first_stage_key: "jpg"
|
10 |
+
cond_stage_key: "txt"
|
11 |
+
control_key: "hint"
|
12 |
+
image_size: 64
|
13 |
+
channels: 4
|
14 |
+
cond_stage_trainable: false
|
15 |
+
conditioning_key: crossattn
|
16 |
+
monitor: val/loss_simple_ema
|
17 |
+
scale_factor: 0.18215
|
18 |
+
use_ema: False
|
19 |
+
only_mid_control: False
|
20 |
+
|
21 |
+
control_stage_config:
|
22 |
+
target: cldm.cldm.ControlNet
|
23 |
+
params:
|
24 |
+
image_size: 32 # unused
|
25 |
+
in_channels: 4
|
26 |
+
hint_channels: 3
|
27 |
+
model_channels: 320
|
28 |
+
attention_resolutions: [ 4, 2, 1 ]
|
29 |
+
num_res_blocks: 2
|
30 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
31 |
+
num_heads: 8
|
32 |
+
use_spatial_transformer: True
|
33 |
+
transformer_depth: 1
|
34 |
+
context_dim: 768
|
35 |
+
use_checkpoint: True
|
36 |
+
legacy: False
|
37 |
+
|
38 |
+
unet_config:
|
39 |
+
target: cldm.cldm.ControlledUnetModel
|
40 |
+
params:
|
41 |
+
image_size: 32 # unused
|
42 |
+
in_channels: 4
|
43 |
+
out_channels: 4
|
44 |
+
model_channels: 320
|
45 |
+
attention_resolutions: [ 4, 2, 1 ]
|
46 |
+
num_res_blocks: 2
|
47 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
48 |
+
num_heads: 8
|
49 |
+
use_spatial_transformer: True
|
50 |
+
transformer_depth: 1
|
51 |
+
context_dim: 768
|
52 |
+
use_checkpoint: True
|
53 |
+
legacy: False
|
54 |
+
|
55 |
+
first_stage_config:
|
56 |
+
target: ldm.models.autoencoder.AutoencoderKL
|
57 |
+
params:
|
58 |
+
embed_dim: 4
|
59 |
+
monitor: val/rec_loss
|
60 |
+
ddconfig:
|
61 |
+
double_z: true
|
62 |
+
z_channels: 4
|
63 |
+
resolution: 256
|
64 |
+
in_channels: 3
|
65 |
+
out_ch: 3
|
66 |
+
ch: 128
|
67 |
+
ch_mult:
|
68 |
+
- 1
|
69 |
+
- 2
|
70 |
+
- 4
|
71 |
+
- 4
|
72 |
+
num_res_blocks: 2
|
73 |
+
attn_resolutions: []
|
74 |
+
dropout: 0.0
|
75 |
+
lossconfig:
|
76 |
+
target: torch.nn.Identity
|
77 |
+
|
78 |
+
cond_stage_config:
|
79 |
+
target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
|
1.1/control_v11p_sd15_lineart.fp16.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de4f699573813c5ad07091fe1ad4097ddc957c22e313187caa885c82e3b6a85b
|
3 |
+
size 722598642
|
1.1/control_v11p_sd15_lineart.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9d6d0e5526dd21dfc503f9e42a93ff1f977aa52df3c14e8ac11085b518cb114
|
3 |
+
size 1445157124
|
1.1/control_v11p_sd15_lineart.yaml
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model:
|
2 |
+
target: cldm.cldm.ControlLDM
|
3 |
+
params:
|
4 |
+
linear_start: 0.00085
|
5 |
+
linear_end: 0.0120
|
6 |
+
num_timesteps_cond: 1
|
7 |
+
log_every_t: 200
|
8 |
+
timesteps: 1000
|
9 |
+
first_stage_key: "jpg"
|
10 |
+
cond_stage_key: "txt"
|
11 |
+
control_key: "hint"
|
12 |
+
image_size: 64
|
13 |
+
channels: 4
|
14 |
+
cond_stage_trainable: false
|
15 |
+
conditioning_key: crossattn
|
16 |
+
monitor: val/loss_simple_ema
|
17 |
+
scale_factor: 0.18215
|
18 |
+
use_ema: False
|
19 |
+
only_mid_control: False
|
20 |
+
|
21 |
+
control_stage_config:
|
22 |
+
target: cldm.cldm.ControlNet
|
23 |
+
params:
|
24 |
+
image_size: 32 # unused
|
25 |
+
in_channels: 4
|
26 |
+
hint_channels: 3
|
27 |
+
model_channels: 320
|
28 |
+
attention_resolutions: [ 4, 2, 1 ]
|
29 |
+
num_res_blocks: 2
|
30 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
31 |
+
num_heads: 8
|
32 |
+
use_spatial_transformer: True
|
33 |
+
transformer_depth: 1
|
34 |
+
context_dim: 768
|
35 |
+
use_checkpoint: True
|
36 |
+
legacy: False
|
37 |
+
|
38 |
+
unet_config:
|
39 |
+
target: cldm.cldm.ControlledUnetModel
|
40 |
+
params:
|
41 |
+
image_size: 32 # unused
|
42 |
+
in_channels: 4
|
43 |
+
out_channels: 4
|
44 |
+
model_channels: 320
|
45 |
+
attention_resolutions: [ 4, 2, 1 ]
|
46 |
+
num_res_blocks: 2
|
47 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
48 |
+
num_heads: 8
|
49 |
+
use_spatial_transformer: True
|
50 |
+
transformer_depth: 1
|
51 |
+
context_dim: 768
|
52 |
+
use_checkpoint: True
|
53 |
+
legacy: False
|
54 |
+
|
55 |
+
first_stage_config:
|
56 |
+
target: ldm.models.autoencoder.AutoencoderKL
|
57 |
+
params:
|
58 |
+
embed_dim: 4
|
59 |
+
monitor: val/rec_loss
|
60 |
+
ddconfig:
|
61 |
+
double_z: true
|
62 |
+
z_channels: 4
|
63 |
+
resolution: 256
|
64 |
+
in_channels: 3
|
65 |
+
out_ch: 3
|
66 |
+
ch: 128
|
67 |
+
ch_mult:
|
68 |
+
- 1
|
69 |
+
- 2
|
70 |
+
- 4
|
71 |
+
- 4
|
72 |
+
num_res_blocks: 2
|
73 |
+
attn_resolutions: []
|
74 |
+
dropout: 0.0
|
75 |
+
lossconfig:
|
76 |
+
target: torch.nn.Identity
|
77 |
+
|
78 |
+
cond_stage_config:
|
79 |
+
target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
|
1.1/control_v11p_sd15_mlsd.fp16.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ffedf5319792a7ae36209529170bea7bcb76a3cdd2390d35609ccf670814c2e
|
3 |
+
size 722598642
|
1.1/control_v11p_sd15_mlsd.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:acfafe4d73e85b06b6858afd3db4226f65acc60f7cb420fec723ca3372d1f01c
|
3 |
+
size 1445157124
|
1.1/control_v11p_sd15_mlsd.yaml
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model:
|
2 |
+
target: cldm.cldm.ControlLDM
|
3 |
+
params:
|
4 |
+
linear_start: 0.00085
|
5 |
+
linear_end: 0.0120
|
6 |
+
num_timesteps_cond: 1
|
7 |
+
log_every_t: 200
|
8 |
+
timesteps: 1000
|
9 |
+
first_stage_key: "jpg"
|
10 |
+
cond_stage_key: "txt"
|
11 |
+
control_key: "hint"
|
12 |
+
image_size: 64
|
13 |
+
channels: 4
|
14 |
+
cond_stage_trainable: false
|
15 |
+
conditioning_key: crossattn
|
16 |
+
monitor: val/loss_simple_ema
|
17 |
+
scale_factor: 0.18215
|
18 |
+
use_ema: False
|
19 |
+
only_mid_control: False
|
20 |
+
|
21 |
+
control_stage_config:
|
22 |
+
target: cldm.cldm.ControlNet
|
23 |
+
params:
|
24 |
+
image_size: 32 # unused
|
25 |
+
in_channels: 4
|
26 |
+
hint_channels: 3
|
27 |
+
model_channels: 320
|
28 |
+
attention_resolutions: [ 4, 2, 1 ]
|
29 |
+
num_res_blocks: 2
|
30 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
31 |
+
num_heads: 8
|
32 |
+
use_spatial_transformer: True
|
33 |
+
transformer_depth: 1
|
34 |
+
context_dim: 768
|
35 |
+
use_checkpoint: True
|
36 |
+
legacy: False
|
37 |
+
|
38 |
+
unet_config:
|
39 |
+
target: cldm.cldm.ControlledUnetModel
|
40 |
+
params:
|
41 |
+
image_size: 32 # unused
|
42 |
+
in_channels: 4
|
43 |
+
out_channels: 4
|
44 |
+
model_channels: 320
|
45 |
+
attention_resolutions: [ 4, 2, 1 ]
|
46 |
+
num_res_blocks: 2
|
47 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
48 |
+
num_heads: 8
|
49 |
+
use_spatial_transformer: True
|
50 |
+
transformer_depth: 1
|
51 |
+
context_dim: 768
|
52 |
+
use_checkpoint: True
|
53 |
+
legacy: False
|
54 |
+
|
55 |
+
first_stage_config:
|
56 |
+
target: ldm.models.autoencoder.AutoencoderKL
|
57 |
+
params:
|
58 |
+
embed_dim: 4
|
59 |
+
monitor: val/rec_loss
|
60 |
+
ddconfig:
|
61 |
+
double_z: true
|
62 |
+
z_channels: 4
|
63 |
+
resolution: 256
|
64 |
+
in_channels: 3
|
65 |
+
out_ch: 3
|
66 |
+
ch: 128
|
67 |
+
ch_mult:
|
68 |
+
- 1
|
69 |
+
- 2
|
70 |
+
- 4
|
71 |
+
- 4
|
72 |
+
num_res_blocks: 2
|
73 |
+
attn_resolutions: []
|
74 |
+
dropout: 0.0
|
75 |
+
lossconfig:
|
76 |
+
target: torch.nn.Identity
|
77 |
+
|
78 |
+
cond_stage_config:
|
79 |
+
target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
|
1.1/control_v11p_sd15_normalbae.fp16.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66a52e9e9c0d371796175e35351a8d5794ce0cce7b7e72f31d9785df591110ed
|
3 |
+
size 722598642
|
1.1/control_v11p_sd15_normalbae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6c3772b35e5cb1869beca97a6ade6e8e5283310462297b10d129e25351983d7
|
3 |
+
size 1445157124
|
1.1/control_v11p_sd15_normalbae.yaml
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model:
|
2 |
+
target: cldm.cldm.ControlLDM
|
3 |
+
params:
|
4 |
+
linear_start: 0.00085
|
5 |
+
linear_end: 0.0120
|
6 |
+
num_timesteps_cond: 1
|
7 |
+
log_every_t: 200
|
8 |
+
timesteps: 1000
|
9 |
+
first_stage_key: "jpg"
|
10 |
+
cond_stage_key: "txt"
|
11 |
+
control_key: "hint"
|
12 |
+
image_size: 64
|
13 |
+
channels: 4
|
14 |
+
cond_stage_trainable: false
|
15 |
+
conditioning_key: crossattn
|
16 |
+
monitor: val/loss_simple_ema
|
17 |
+
scale_factor: 0.18215
|
18 |
+
use_ema: False
|
19 |
+
only_mid_control: False
|
20 |
+
|
21 |
+
control_stage_config:
|
22 |
+
target: cldm.cldm.ControlNet
|
23 |
+
params:
|
24 |
+
image_size: 32 # unused
|
25 |
+
in_channels: 4
|
26 |
+
hint_channels: 3
|
27 |
+
model_channels: 320
|
28 |
+
attention_resolutions: [ 4, 2, 1 ]
|
29 |
+
num_res_blocks: 2
|
30 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
31 |
+
num_heads: 8
|
32 |
+
use_spatial_transformer: True
|
33 |
+
transformer_depth: 1
|
34 |
+
context_dim: 768
|
35 |
+
use_checkpoint: True
|
36 |
+
legacy: False
|
37 |
+
|
38 |
+
unet_config:
|
39 |
+
target: cldm.cldm.ControlledUnetModel
|
40 |
+
params:
|
41 |
+
image_size: 32 # unused
|
42 |
+
in_channels: 4
|
43 |
+
out_channels: 4
|
44 |
+
model_channels: 320
|
45 |
+
attention_resolutions: [ 4, 2, 1 ]
|
46 |
+
num_res_blocks: 2
|
47 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
48 |
+
num_heads: 8
|
49 |
+
use_spatial_transformer: True
|
50 |
+
transformer_depth: 1
|
51 |
+
context_dim: 768
|
52 |
+
use_checkpoint: True
|
53 |
+
legacy: False
|
54 |
+
|
55 |
+
first_stage_config:
|
56 |
+
target: ldm.models.autoencoder.AutoencoderKL
|
57 |
+
params:
|
58 |
+
embed_dim: 4
|
59 |
+
monitor: val/rec_loss
|
60 |
+
ddconfig:
|
61 |
+
double_z: true
|
62 |
+
z_channels: 4
|
63 |
+
resolution: 256
|
64 |
+
in_channels: 3
|
65 |
+
out_ch: 3
|
66 |
+
ch: 128
|
67 |
+
ch_mult:
|
68 |
+
- 1
|
69 |
+
- 2
|
70 |
+
- 4
|
71 |
+
- 4
|
72 |
+
num_res_blocks: 2
|
73 |
+
attn_resolutions: []
|
74 |
+
dropout: 0.0
|
75 |
+
lossconfig:
|
76 |
+
target: torch.nn.Identity
|
77 |
+
|
78 |
+
cond_stage_config:
|
79 |
+
target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
|
1.1/control_v11p_sd15_openpose.fp16.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b25b1125e870275550b2a7de289056cb3c236c01c293bd5ba883657b1c006e3e
|
3 |
+
size 722598642
|
1.1/control_v11p_sd15_openpose.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46b10abb28f3750aba7eea208e188539f7945d9256de9a248cbb9902f2276988
|
3 |
+
size 1445157124
|
1.1/control_v11p_sd15_openpose.yaml
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model:
|
2 |
+
target: cldm.cldm.ControlLDM
|
3 |
+
params:
|
4 |
+
linear_start: 0.00085
|
5 |
+
linear_end: 0.0120
|
6 |
+
num_timesteps_cond: 1
|
7 |
+
log_every_t: 200
|
8 |
+
timesteps: 1000
|
9 |
+
first_stage_key: "jpg"
|
10 |
+
cond_stage_key: "txt"
|
11 |
+
control_key: "hint"
|
12 |
+
image_size: 64
|
13 |
+
channels: 4
|
14 |
+
cond_stage_trainable: false
|
15 |
+
conditioning_key: crossattn
|
16 |
+
monitor: val/loss_simple_ema
|
17 |
+
scale_factor: 0.18215
|
18 |
+
use_ema: False
|
19 |
+
only_mid_control: False
|
20 |
+
|
21 |
+
control_stage_config:
|
22 |
+
target: cldm.cldm.ControlNet
|
23 |
+
params:
|
24 |
+
image_size: 32 # unused
|
25 |
+
in_channels: 4
|
26 |
+
hint_channels: 3
|
27 |
+
model_channels: 320
|
28 |
+
attention_resolutions: [ 4, 2, 1 ]
|
29 |
+
num_res_blocks: 2
|
30 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
31 |
+
num_heads: 8
|
32 |
+
use_spatial_transformer: True
|
33 |
+
transformer_depth: 1
|
34 |
+
context_dim: 768
|
35 |
+
use_checkpoint: True
|
36 |
+
legacy: False
|
37 |
+
|
38 |
+
unet_config:
|
39 |
+
target: cldm.cldm.ControlledUnetModel
|
40 |
+
params:
|
41 |
+
image_size: 32 # unused
|
42 |
+
in_channels: 4
|
43 |
+
out_channels: 4
|
44 |
+
model_channels: 320
|
45 |
+
attention_resolutions: [ 4, 2, 1 ]
|
46 |
+
num_res_blocks: 2
|
47 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
48 |
+
num_heads: 8
|
49 |
+
use_spatial_transformer: True
|
50 |
+
transformer_depth: 1
|
51 |
+
context_dim: 768
|
52 |
+
use_checkpoint: True
|
53 |
+
legacy: False
|
54 |
+
|
55 |
+
first_stage_config:
|
56 |
+
target: ldm.models.autoencoder.AutoencoderKL
|
57 |
+
params:
|
58 |
+
embed_dim: 4
|
59 |
+
monitor: val/rec_loss
|
60 |
+
ddconfig:
|
61 |
+
double_z: true
|
62 |
+
z_channels: 4
|
63 |
+
resolution: 256
|
64 |
+
in_channels: 3
|
65 |
+
out_ch: 3
|
66 |
+
ch: 128
|
67 |
+
ch_mult:
|
68 |
+
- 1
|
69 |
+
- 2
|
70 |
+
- 4
|
71 |
+
- 4
|
72 |
+
num_res_blocks: 2
|
73 |
+
attn_resolutions: []
|
74 |
+
dropout: 0.0
|
75 |
+
lossconfig:
|
76 |
+
target: torch.nn.Identity
|
77 |
+
|
78 |
+
cond_stage_config:
|
79 |
+
target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
|
1.1/control_v11p_sd15_scribble.fp16.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46be9cc44d61ed0a0bed0b34746541c1b274e7194f2262dca7671048562bdfa1
|
3 |
+
size 722598642
|
1.1/control_v11p_sd15_scribble.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:387ade970396c191da0fd229d26bbb64f80b8b3c501a774d9838e54638deccb1
|
3 |
+
size 1445157124
|
1.1/control_v11p_sd15_scribble.yaml
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model:
|
2 |
+
target: cldm.cldm.ControlLDM
|
3 |
+
params:
|
4 |
+
linear_start: 0.00085
|
5 |
+
linear_end: 0.0120
|
6 |
+
num_timesteps_cond: 1
|
7 |
+
log_every_t: 200
|
8 |
+
timesteps: 1000
|
9 |
+
first_stage_key: "jpg"
|
10 |
+
cond_stage_key: "txt"
|
11 |
+
control_key: "hint"
|
12 |
+
image_size: 64
|
13 |
+
channels: 4
|
14 |
+
cond_stage_trainable: false
|
15 |
+
conditioning_key: crossattn
|
16 |
+
monitor: val/loss_simple_ema
|
17 |
+
scale_factor: 0.18215
|
18 |
+
use_ema: False
|
19 |
+
only_mid_control: False
|
20 |
+
|
21 |
+
control_stage_config:
|
22 |
+
target: cldm.cldm.ControlNet
|
23 |
+
params:
|
24 |
+
image_size: 32 # unused
|
25 |
+
in_channels: 4
|
26 |
+
hint_channels: 3
|
27 |
+
model_channels: 320
|
28 |
+
attention_resolutions: [ 4, 2, 1 ]
|
29 |
+
num_res_blocks: 2
|
30 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
31 |
+
num_heads: 8
|
32 |
+
use_spatial_transformer: True
|
33 |
+
transformer_depth: 1
|
34 |
+
context_dim: 768
|
35 |
+
use_checkpoint: True
|
36 |
+
legacy: False
|
37 |
+
|
38 |
+
unet_config:
|
39 |
+
target: cldm.cldm.ControlledUnetModel
|
40 |
+
params:
|
41 |
+
image_size: 32 # unused
|
42 |
+
in_channels: 4
|
43 |
+
out_channels: 4
|
44 |
+
model_channels: 320
|
45 |
+
attention_resolutions: [ 4, 2, 1 ]
|
46 |
+
num_res_blocks: 2
|
47 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
48 |
+
num_heads: 8
|
49 |
+
use_spatial_transformer: True
|
50 |
+
transformer_depth: 1
|
51 |
+
context_dim: 768
|
52 |
+
use_checkpoint: True
|
53 |
+
legacy: False
|
54 |
+
|
55 |
+
first_stage_config:
|
56 |
+
target: ldm.models.autoencoder.AutoencoderKL
|
57 |
+
params:
|
58 |
+
embed_dim: 4
|
59 |
+
monitor: val/rec_loss
|
60 |
+
ddconfig:
|
61 |
+
double_z: true
|
62 |
+
z_channels: 4
|
63 |
+
resolution: 256
|
64 |
+
in_channels: 3
|
65 |
+
out_ch: 3
|
66 |
+
ch: 128
|
67 |
+
ch_mult:
|
68 |
+
- 1
|
69 |
+
- 2
|
70 |
+
- 4
|
71 |
+
- 4
|
72 |
+
num_res_blocks: 2
|
73 |
+
attn_resolutions: []
|
74 |
+
dropout: 0.0
|
75 |
+
lossconfig:
|
76 |
+
target: torch.nn.Identity
|
77 |
+
|
78 |
+
cond_stage_config:
|
79 |
+
target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
|
1.1/control_v11p_sd15_seg.fp16.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23809d298edb48bf97ce95c6b8c4a69a749e57bf3838d866fbdc45f8fa11f6f4
|
3 |
+
size 722598642
|
1.1/control_v11p_sd15_seg.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68291d493d0d75df67fe931c753c9b204cf3863509d5471b5eb64e5f0f2b4b38
|
3 |
+
size 1445157124
|
1.1/control_v11p_sd15_seg.yaml
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model:
|
2 |
+
target: cldm.cldm.ControlLDM
|
3 |
+
params:
|
4 |
+
linear_start: 0.00085
|
5 |
+
linear_end: 0.0120
|
6 |
+
num_timesteps_cond: 1
|
7 |
+
log_every_t: 200
|
8 |
+
timesteps: 1000
|
9 |
+
first_stage_key: "jpg"
|
10 |
+
cond_stage_key: "txt"
|
11 |
+
control_key: "hint"
|
12 |
+
image_size: 64
|
13 |
+
channels: 4
|
14 |
+
cond_stage_trainable: false
|
15 |
+
conditioning_key: crossattn
|
16 |
+
monitor: val/loss_simple_ema
|
17 |
+
scale_factor: 0.18215
|
18 |
+
use_ema: False
|
19 |
+
only_mid_control: False
|
20 |
+
|
21 |
+
control_stage_config:
|
22 |
+
target: cldm.cldm.ControlNet
|
23 |
+
params:
|
24 |
+
image_size: 32 # unused
|
25 |
+
in_channels: 4
|
26 |
+
hint_channels: 3
|
27 |
+
model_channels: 320
|
28 |
+
attention_resolutions: [ 4, 2, 1 ]
|
29 |
+
num_res_blocks: 2
|
30 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
31 |
+
num_heads: 8
|
32 |
+
use_spatial_transformer: True
|
33 |
+
transformer_depth: 1
|
34 |
+
context_dim: 768
|
35 |
+
use_checkpoint: True
|
36 |
+
legacy: False
|
37 |
+
|
38 |
+
unet_config:
|
39 |
+
target: cldm.cldm.ControlledUnetModel
|
40 |
+
params:
|
41 |
+
image_size: 32 # unused
|
42 |
+
in_channels: 4
|
43 |
+
out_channels: 4
|
44 |
+
model_channels: 320
|
45 |
+
attention_resolutions: [ 4, 2, 1 ]
|
46 |
+
num_res_blocks: 2
|
47 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
48 |
+
num_heads: 8
|
49 |
+
use_spatial_transformer: True
|
50 |
+
transformer_depth: 1
|
51 |
+
context_dim: 768
|
52 |
+
use_checkpoint: True
|
53 |
+
legacy: False
|
54 |
+
|
55 |
+
first_stage_config:
|
56 |
+
target: ldm.models.autoencoder.AutoencoderKL
|
57 |
+
params:
|
58 |
+
embed_dim: 4
|
59 |
+
monitor: val/rec_loss
|
60 |
+
ddconfig:
|
61 |
+
double_z: true
|
62 |
+
z_channels: 4
|
63 |
+
resolution: 256
|
64 |
+
in_channels: 3
|
65 |
+
out_ch: 3
|
66 |
+
ch: 128
|
67 |
+
ch_mult:
|
68 |
+
- 1
|
69 |
+
- 2
|
70 |
+
- 4
|
71 |
+
- 4
|
72 |
+
num_res_blocks: 2
|
73 |
+
attn_resolutions: []
|
74 |
+
dropout: 0.0
|
75 |
+
lossconfig:
|
76 |
+
target: torch.nn.Identity
|
77 |
+
|
78 |
+
cond_stage_config:
|
79 |
+
target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
|
1.1/control_v11p_sd15_softedge.fp16.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d85c42195534f6cb2c8c194630446d5fee754b0cfd1aef46c900bd6c38974b3
|
3 |
+
size 722598642
|
1.1/control_v11p_sd15_softedge.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d1b47a80e58fb8be102efbb574068afde3f2a7d0d72d96f70fa67f1ea9f72a9
|
3 |
+
size 1445157124
|
1.1/control_v11p_sd15_softedge.yaml
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model:
|
2 |
+
target: cldm.cldm.ControlLDM
|
3 |
+
params:
|
4 |
+
linear_start: 0.00085
|
5 |
+
linear_end: 0.0120
|
6 |
+
num_timesteps_cond: 1
|
7 |
+
log_every_t: 200
|
8 |
+
timesteps: 1000
|
9 |
+
first_stage_key: "jpg"
|
10 |
+
cond_stage_key: "txt"
|
11 |
+
control_key: "hint"
|
12 |
+
image_size: 64
|
13 |
+
channels: 4
|
14 |
+
cond_stage_trainable: false
|
15 |
+
conditioning_key: crossattn
|
16 |
+
monitor: val/loss_simple_ema
|
17 |
+
scale_factor: 0.18215
|
18 |
+
use_ema: False
|
19 |
+
only_mid_control: False
|
20 |
+
|
21 |
+
control_stage_config:
|
22 |
+
target: cldm.cldm.ControlNet
|
23 |
+
params:
|
24 |
+
image_size: 32 # unused
|
25 |
+
in_channels: 4
|
26 |
+
hint_channels: 3
|
27 |
+
model_channels: 320
|
28 |
+
attention_resolutions: [ 4, 2, 1 ]
|
29 |
+
num_res_blocks: 2
|
30 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
31 |
+
num_heads: 8
|
32 |
+
use_spatial_transformer: True
|
33 |
+
transformer_depth: 1
|
34 |
+
context_dim: 768
|
35 |
+
use_checkpoint: True
|
36 |
+
legacy: False
|
37 |
+
|
38 |
+
unet_config:
|
39 |
+
target: cldm.cldm.ControlledUnetModel
|
40 |
+
params:
|
41 |
+
image_size: 32 # unused
|
42 |
+
in_channels: 4
|
43 |
+
out_channels: 4
|
44 |
+
model_channels: 320
|
45 |
+
attention_resolutions: [ 4, 2, 1 ]
|
46 |
+
num_res_blocks: 2
|
47 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
48 |
+
num_heads: 8
|
49 |
+
use_spatial_transformer: True
|
50 |
+
transformer_depth: 1
|
51 |
+
context_dim: 768
|
52 |
+
use_checkpoint: True
|
53 |
+
legacy: False
|
54 |
+
|
55 |
+
first_stage_config:
|
56 |
+
target: ldm.models.autoencoder.AutoencoderKL
|
57 |
+
params:
|
58 |
+
embed_dim: 4
|
59 |
+
monitor: val/rec_loss
|
60 |
+
ddconfig:
|
61 |
+
double_z: true
|
62 |
+
z_channels: 4
|
63 |
+
resolution: 256
|
64 |
+
in_channels: 3
|
65 |
+
out_ch: 3
|
66 |
+
ch: 128
|
67 |
+
ch_mult:
|
68 |
+
- 1
|
69 |
+
- 2
|
70 |
+
- 4
|
71 |
+
- 4
|
72 |
+
num_res_blocks: 2
|
73 |
+
attn_resolutions: []
|
74 |
+
dropout: 0.0
|
75 |
+
lossconfig:
|
76 |
+
target: torch.nn.Identity
|
77 |
+
|
78 |
+
cond_stage_config:
|
79 |
+
target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
|
1.1/control_v11p_sd15s2_lineart_anime.fp16.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9619372316fb8ade82353be0c2b7821fa1da60ae3842d8c99afacf1d45ff73f
|
3 |
+
size 722598642
|
1.1/control_v11p_sd15s2_lineart_anime.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d55de2de4cd8813b88e0fb603e52616c9e5ee6cb019e28f0f64ef370e363fff
|
3 |
+
size 1445157124
|
1.1/control_v11p_sd15s2_lineart_anime.yaml
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model:
|
2 |
+
target: cldm.cldm.ControlLDM
|
3 |
+
params:
|
4 |
+
linear_start: 0.00085
|
5 |
+
linear_end: 0.0120
|
6 |
+
num_timesteps_cond: 1
|
7 |
+
log_every_t: 200
|
8 |
+
timesteps: 1000
|
9 |
+
first_stage_key: "jpg"
|
10 |
+
cond_stage_key: "txt"
|
11 |
+
control_key: "hint"
|
12 |
+
image_size: 64
|
13 |
+
channels: 4
|
14 |
+
cond_stage_trainable: false
|
15 |
+
conditioning_key: crossattn
|
16 |
+
monitor: val/loss_simple_ema
|
17 |
+
scale_factor: 0.18215
|
18 |
+
use_ema: False
|
19 |
+
only_mid_control: False
|
20 |
+
|
21 |
+
control_stage_config:
|
22 |
+
target: cldm.cldm.ControlNet
|
23 |
+
params:
|
24 |
+
image_size: 32 # unused
|
25 |
+
in_channels: 4
|
26 |
+
hint_channels: 3
|
27 |
+
model_channels: 320
|
28 |
+
attention_resolutions: [ 4, 2, 1 ]
|
29 |
+
num_res_blocks: 2
|
30 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
31 |
+
num_heads: 8
|
32 |
+
use_spatial_transformer: True
|
33 |
+
transformer_depth: 1
|
34 |
+
context_dim: 768
|
35 |
+
use_checkpoint: True
|
36 |
+
legacy: False
|
37 |
+
|
38 |
+
unet_config:
|
39 |
+
target: cldm.cldm.ControlledUnetModel
|
40 |
+
params:
|
41 |
+
image_size: 32 # unused
|
42 |
+
in_channels: 4
|
43 |
+
out_channels: 4
|
44 |
+
model_channels: 320
|
45 |
+
attention_resolutions: [ 4, 2, 1 ]
|
46 |
+
num_res_blocks: 2
|
47 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
48 |
+
num_heads: 8
|
49 |
+
use_spatial_transformer: True
|
50 |
+
transformer_depth: 1
|
51 |
+
context_dim: 768
|
52 |
+
use_checkpoint: True
|
53 |
+
legacy: False
|
54 |
+
|
55 |
+
first_stage_config:
|
56 |
+
target: ldm.models.autoencoder.AutoencoderKL
|
57 |
+
params:
|
58 |
+
embed_dim: 4
|
59 |
+
monitor: val/rec_loss
|
60 |
+
ddconfig:
|
61 |
+
double_z: true
|
62 |
+
z_channels: 4
|
63 |
+
resolution: 256
|
64 |
+
in_channels: 3
|
65 |
+
out_ch: 3
|
66 |
+
ch: 128
|
67 |
+
ch_mult:
|
68 |
+
- 1
|
69 |
+
- 2
|
70 |
+
- 4
|
71 |
+
- 4
|
72 |
+
num_res_blocks: 2
|
73 |
+
attn_resolutions: []
|
74 |
+
dropout: 0.0
|
75 |
+
lossconfig:
|
76 |
+
target: torch.nn.Identity
|
77 |
+
|
78 |
+
cond_stage_config:
|
79 |
+
target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
|