kmpartner commited on
Commit
7298e92
1 Parent(s): 0158cc1

Upload folder using huggingface_hub

Browse files
Files changed (25) hide show
  1. controlnet/config.json +10 -10
  2. controlnet/diffusion_pytorch_model.safetensors +2 -2
  3. logs/text2image-fine-tune/1717891836.1011167/events.out.tfevents.1717891836.a8343c8809c1.14061.1 +3 -0
  4. logs/text2image-fine-tune/1717891836.1036482/hparams.yml +47 -0
  5. logs/text2image-fine-tune/1717891979.0476391/events.out.tfevents.1717891979.a8343c8809c1.14739.1 +3 -0
  6. logs/text2image-fine-tune/1717891979.0580502/hparams.yml +47 -0
  7. logs/text2image-fine-tune/1717893073.4145217/events.out.tfevents.1717893073.a8343c8809c1.18650.1 +3 -0
  8. logs/text2image-fine-tune/1717893073.4179482/hparams.yml +47 -0
  9. logs/text2image-fine-tune/1717893880.4456894/events.out.tfevents.1717893880.a8343c8809c1.22493.1 +3 -0
  10. logs/text2image-fine-tune/1717893880.4486566/hparams.yml +47 -0
  11. logs/text2image-fine-tune/1717895383.2177856/events.out.tfevents.1717895383.a8343c8809c1.28476.1 +3 -0
  12. logs/text2image-fine-tune/1717895383.2211077/hparams.yml +47 -0
  13. logs/text2image-fine-tune/events.out.tfevents.1717891826.a8343c8809c1.14061.0 +3 -0
  14. logs/text2image-fine-tune/events.out.tfevents.1717891969.a8343c8809c1.14739.0 +3 -0
  15. logs/text2image-fine-tune/events.out.tfevents.1717892979.a8343c8809c1.18650.0 +3 -0
  16. logs/text2image-fine-tune/events.out.tfevents.1717893862.a8343c8809c1.22493.0 +3 -0
  17. logs/text2image-fine-tune/events.out.tfevents.1717895332.a8343c8809c1.28476.0 +3 -0
  18. model_index.json +4 -4
  19. text_encoder/config.json +6 -6
  20. text_encoder/model.safetensors +2 -2
  21. tokenizer/special_tokens_map.json +1 -1
  22. unet/config.json +8 -7
  23. unet/diffusion_pytorch_model.safetensors +2 -2
  24. vae/config.json +2 -2
  25. vae/diffusion_pytorch_model.safetensors +1 -1
controlnet/config.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
  "_class_name": "ControlNetModel",
3
- "_diffusers_version": "0.28.2",
4
- "_name_or_path": "lllyasviel/control_v11p_sd15_scribble",
5
  "act_fn": "silu",
6
  "addition_embed_type": null,
7
  "addition_embed_type_num_heads": 64,
@@ -10,7 +9,6 @@
10
  "block_out_channels": [
11
  320,
12
  640,
13
- 1280,
14
  1280
15
  ],
16
  "class_embed_type": null,
@@ -24,10 +22,9 @@
24
  "controlnet_conditioning_channel_order": "rgb",
25
  "cross_attention_dim": 768,
26
  "down_block_types": [
 
27
  "CrossAttnDownBlock2D",
28
- "CrossAttnDownBlock2D",
29
- "CrossAttnDownBlock2D",
30
- "DownBlock2D"
31
  ],
32
  "downsample_padding": 1,
33
  "encoder_hid_dim": null,
@@ -36,17 +33,20 @@
36
  "freq_shift": 0,
37
  "global_pool_conditions": false,
38
  "in_channels": 4,
39
- "layers_per_block": 2,
40
  "mid_block_scale_factor": 1,
41
- "mid_block_type": "UNetMidBlock2DCrossAttn",
42
  "norm_eps": 1e-05,
43
  "norm_num_groups": 32,
44
  "num_attention_heads": null,
45
  "num_class_embeds": null,
46
- "only_cross_attention": false,
 
 
 
 
47
  "projection_class_embeddings_input_dim": null,
48
  "resnet_time_scale_shift": "default",
49
  "transformer_layers_per_block": 1,
50
  "upcast_attention": false,
51
  "use_linear_projection": false
52
- }
 
1
  {
2
  "_class_name": "ControlNetModel",
3
+ "_diffusers_version": "0.23.1",
 
4
  "act_fn": "silu",
5
  "addition_embed_type": null,
6
  "addition_embed_type_num_heads": 64,
 
9
  "block_out_channels": [
10
  320,
11
  640,
 
12
  1280
13
  ],
14
  "class_embed_type": null,
 
22
  "controlnet_conditioning_channel_order": "rgb",
23
  "cross_attention_dim": 768,
24
  "down_block_types": [
25
+ "DownBlock2D",
26
  "CrossAttnDownBlock2D",
27
+ "CrossAttnDownBlock2D"
 
 
28
  ],
29
  "downsample_padding": 1,
30
  "encoder_hid_dim": null,
 
33
  "freq_shift": 0,
34
  "global_pool_conditions": false,
35
  "in_channels": 4,
36
+ "layers_per_block": 1,
37
  "mid_block_scale_factor": 1,
 
38
  "norm_eps": 1e-05,
39
  "norm_num_groups": 32,
40
  "num_attention_heads": null,
41
  "num_class_embeds": null,
42
+ "only_cross_attention": [
43
+ true,
44
+ false,
45
+ false
46
+ ],
47
  "projection_class_embeddings_input_dim": null,
48
  "resnet_time_scale_shift": "default",
49
  "transformer_layers_per_block": 1,
50
  "upcast_attention": false,
51
  "use_linear_projection": false
52
+ }
controlnet/diffusion_pytorch_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8199ce735834688ef40e758d7e7086b69361da4bb41dbcbbdd90b0172fcae69a
3
- size 722598648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d005da74cf9e639b3b9d6a49ff845690fd2081fead531c714c98d2694e490c96
3
+ size 728496840
logs/text2image-fine-tune/1717891836.1011167/events.out.tfevents.1717891836.a8343c8809c1.14061.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a8e64e850b157d87f33faa2a9b2af0def9995035385e707fc97517cc1cf456c
3
+ size 2457
logs/text2image-fine-tune/1717891836.1036482/hparams.yml ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_beta1: 0.9
2
+ adam_beta2: 0.999
3
+ adam_epsilon: 1.0e-08
4
+ adam_weight_decay: 0.01
5
+ allow_tf32: false
6
+ cache_dir: null
7
+ center_crop: true
8
+ checkpointing_steps: 5000
9
+ checkpoints_total_limit: null
10
+ dataloader_num_workers: 0
11
+ dataset_config_name: null
12
+ enable_xformers_memory_efficient_attention: false
13
+ gradient_accumulation_steps: 4
14
+ gradient_checkpointing: true
15
+ lambda_kd_feat: 1.0
16
+ lambda_kd_output: 1.0
17
+ lambda_sd: 1.0
18
+ learning_rate: 5.0e-05
19
+ local_rank: -1
20
+ logging_dir: logs
21
+ lr_scheduler: constant
22
+ lr_warmup_steps: 0
23
+ max_grad_norm: 1.0
24
+ max_train_samples: null
25
+ max_train_steps: 400000
26
+ mixed_precision: fp16
27
+ non_ema_revision: null
28
+ num_train_epochs: 4939
29
+ num_valid_images: 2
30
+ output_dir: ./results/v2-base_kd_bk_tiny
31
+ pretrained_model_name_or_path: stabilityai/stable-diffusion-2-1-base
32
+ random_flip: true
33
+ report_to: all
34
+ resolution: 512
35
+ resume_from_checkpoint: null
36
+ revision: null
37
+ scale_lr: false
38
+ seed: 1234
39
+ train_batch_size: 32
40
+ train_data_dir: ./data/laion_aes/preprocessed_11k
41
+ unet_config_name: bk_tiny
42
+ unet_config_path: ./src/unet_config_v2-base
43
+ use_8bit_adam: false
44
+ use_copy_weight_from_teacher: true
45
+ use_ema: true
46
+ valid_prompt: a golden vase with different flowers
47
+ valid_steps: 500
logs/text2image-fine-tune/1717891979.0476391/events.out.tfevents.1717891979.a8343c8809c1.14739.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76614d8dbc8489000da018488807c6f040af5051fb5119365dad25c8e17894a7
3
+ size 2457
logs/text2image-fine-tune/1717891979.0580502/hparams.yml ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_beta1: 0.9
2
+ adam_beta2: 0.999
3
+ adam_epsilon: 1.0e-08
4
+ adam_weight_decay: 0.01
5
+ allow_tf32: false
6
+ cache_dir: null
7
+ center_crop: true
8
+ checkpointing_steps: 5000
9
+ checkpoints_total_limit: null
10
+ dataloader_num_workers: 0
11
+ dataset_config_name: null
12
+ enable_xformers_memory_efficient_attention: false
13
+ gradient_accumulation_steps: 4
14
+ gradient_checkpointing: true
15
+ lambda_kd_feat: 1.0
16
+ lambda_kd_output: 1.0
17
+ lambda_sd: 1.0
18
+ learning_rate: 5.0e-05
19
+ local_rank: -1
20
+ logging_dir: logs
21
+ lr_scheduler: constant
22
+ lr_warmup_steps: 0
23
+ max_grad_norm: 1.0
24
+ max_train_samples: null
25
+ max_train_steps: 0
26
+ mixed_precision: fp16
27
+ non_ema_revision: null
28
+ num_train_epochs: 0
29
+ num_valid_images: 2
30
+ output_dir: ./results/v2-base_kd_bk_tiny
31
+ pretrained_model_name_or_path: stabilityai/stable-diffusion-2-1-base
32
+ random_flip: true
33
+ report_to: all
34
+ resolution: 512
35
+ resume_from_checkpoint: null
36
+ revision: null
37
+ scale_lr: false
38
+ seed: 1234
39
+ train_batch_size: 32
40
+ train_data_dir: ./data/laion_aes/preprocessed_11k
41
+ unet_config_name: bk_tiny
42
+ unet_config_path: ./src/unet_config_v2-base
43
+ use_8bit_adam: false
44
+ use_copy_weight_from_teacher: true
45
+ use_ema: true
46
+ valid_prompt: a golden vase with different flowers
47
+ valid_steps: 500
logs/text2image-fine-tune/1717893073.4145217/events.out.tfevents.1717893073.a8343c8809c1.18650.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0125e74a77defb57f223e6550fd2d98a7f5cbe4b98ebf1bd191ae3def7cd417
3
+ size 2457
logs/text2image-fine-tune/1717893073.4179482/hparams.yml ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_beta1: 0.9
2
+ adam_beta2: 0.999
3
+ adam_epsilon: 1.0e-08
4
+ adam_weight_decay: 0.01
5
+ allow_tf32: false
6
+ cache_dir: null
7
+ center_crop: true
8
+ checkpointing_steps: 5000
9
+ checkpoints_total_limit: null
10
+ dataloader_num_workers: 0
11
+ dataset_config_name: null
12
+ enable_xformers_memory_efficient_attention: false
13
+ gradient_accumulation_steps: 4
14
+ gradient_checkpointing: true
15
+ lambda_kd_feat: 1.0
16
+ lambda_kd_output: 1.0
17
+ lambda_sd: 1.0
18
+ learning_rate: 5.0e-05
19
+ local_rank: -1
20
+ logging_dir: logs
21
+ lr_scheduler: constant
22
+ lr_warmup_steps: 0
23
+ max_grad_norm: 1.0
24
+ max_train_samples: null
25
+ max_train_steps: 0
26
+ mixed_precision: fp16
27
+ non_ema_revision: null
28
+ num_train_epochs: 0
29
+ num_valid_images: 2
30
+ output_dir: ./results/v2-base_kd_bk_tiny
31
+ pretrained_model_name_or_path: stabilityai/stable-diffusion-2-1-base
32
+ random_flip: true
33
+ report_to: all
34
+ resolution: 512
35
+ resume_from_checkpoint: null
36
+ revision: null
37
+ scale_lr: false
38
+ seed: 1234
39
+ train_batch_size: 32
40
+ train_data_dir: ./data/laion_aes/preprocessed_11k
41
+ unet_config_name: bk_tiny
42
+ unet_config_path: ./src/unet_config_v2-base
43
+ use_8bit_adam: false
44
+ use_copy_weight_from_teacher: true
45
+ use_ema: true
46
+ valid_prompt: a golden vase with different flowers
47
+ valid_steps: 500
logs/text2image-fine-tune/1717893880.4456894/events.out.tfevents.1717893880.a8343c8809c1.22493.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2b6011f7bcca594668a35d88f35e1e3197d1f44c1a6415e9aecf13905ce52ff
3
+ size 2457
logs/text2image-fine-tune/1717893880.4486566/hparams.yml ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_beta1: 0.9
2
+ adam_beta2: 0.999
3
+ adam_epsilon: 1.0e-08
4
+ adam_weight_decay: 0.01
5
+ allow_tf32: false
6
+ cache_dir: null
7
+ center_crop: true
8
+ checkpointing_steps: 5000
9
+ checkpoints_total_limit: null
10
+ dataloader_num_workers: 0
11
+ dataset_config_name: null
12
+ enable_xformers_memory_efficient_attention: false
13
+ gradient_accumulation_steps: 4
14
+ gradient_checkpointing: true
15
+ lambda_kd_feat: 1.0
16
+ lambda_kd_output: 1.0
17
+ lambda_sd: 1.0
18
+ learning_rate: 5.0e-05
19
+ local_rank: -1
20
+ logging_dir: logs
21
+ lr_scheduler: constant
22
+ lr_warmup_steps: 0
23
+ max_grad_norm: 1.0
24
+ max_train_samples: null
25
+ max_train_steps: 0
26
+ mixed_precision: fp16
27
+ non_ema_revision: null
28
+ num_train_epochs: 0
29
+ num_valid_images: 2
30
+ output_dir: ./results/v2-base_kd_bk_tiny
31
+ pretrained_model_name_or_path: stabilityai/stable-diffusion-2-1-base
32
+ random_flip: true
33
+ report_to: all
34
+ resolution: 512
35
+ resume_from_checkpoint: null
36
+ revision: null
37
+ scale_lr: false
38
+ seed: 1234
39
+ train_batch_size: 32
40
+ train_data_dir: ./data/laion_aes/preprocessed_11k
41
+ unet_config_name: bk_tiny
42
+ unet_config_path: ./src/unet_config_v2-base
43
+ use_8bit_adam: false
44
+ use_copy_weight_from_teacher: true
45
+ use_ema: true
46
+ valid_prompt: a golden vase with different flowers
47
+ valid_steps: 500
logs/text2image-fine-tune/1717895383.2177856/events.out.tfevents.1717895383.a8343c8809c1.28476.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eeedce53c0773941d57e221739c105f5ab5364fba7eca7fd0aee99419e50dd1
3
+ size 2457
logs/text2image-fine-tune/1717895383.2211077/hparams.yml ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_beta1: 0.9
2
+ adam_beta2: 0.999
3
+ adam_epsilon: 1.0e-08
4
+ adam_weight_decay: 0.01
5
+ allow_tf32: false
6
+ cache_dir: null
7
+ center_crop: true
8
+ checkpointing_steps: 5000
9
+ checkpoints_total_limit: null
10
+ dataloader_num_workers: 0
11
+ dataset_config_name: null
12
+ enable_xformers_memory_efficient_attention: false
13
+ gradient_accumulation_steps: 4
14
+ gradient_checkpointing: true
15
+ lambda_kd_feat: 1.0
16
+ lambda_kd_output: 1.0
17
+ lambda_sd: 1.0
18
+ learning_rate: 5.0e-05
19
+ local_rank: -1
20
+ logging_dir: logs
21
+ lr_scheduler: constant
22
+ lr_warmup_steps: 0
23
+ max_grad_norm: 1.0
24
+ max_train_samples: null
25
+ max_train_steps: 0
26
+ mixed_precision: fp16
27
+ non_ema_revision: null
28
+ num_train_epochs: 0
29
+ num_valid_images: 2
30
+ output_dir: ./results/v2-base_kd_bk_tiny
31
+ pretrained_model_name_or_path: stabilityai/stable-diffusion-2-1-base
32
+ random_flip: true
33
+ report_to: all
34
+ resolution: 512
35
+ resume_from_checkpoint: null
36
+ revision: null
37
+ scale_lr: false
38
+ seed: 1234
39
+ train_batch_size: 32
40
+ train_data_dir: ./data/laion_aes/preprocessed_11k
41
+ unet_config_name: bk_tiny
42
+ unet_config_path: ./src/unet_config_v2-base
43
+ use_8bit_adam: false
44
+ use_copy_weight_from_teacher: true
45
+ use_ema: true
46
+ valid_prompt: a golden vase with different flowers
47
+ valid_steps: 500
logs/text2image-fine-tune/events.out.tfevents.1717891826.a8343c8809c1.14061.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0993749572b8d4492840b40a252b58bc6fff959df7b7b5612ce4b26c8fa9b3be
3
+ size 88
logs/text2image-fine-tune/events.out.tfevents.1717891969.a8343c8809c1.14739.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:334b181de4118f862b5697938b2a1b7dd558db7d878569dedbee12829fa3e6a1
3
+ size 88
logs/text2image-fine-tune/events.out.tfevents.1717892979.a8343c8809c1.18650.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03f3f85998300f5d293fa468aa5c0bac1b5bb1fc4876f2babf54d4fbd502e2f0
3
+ size 88
logs/text2image-fine-tune/events.out.tfevents.1717893862.a8343c8809c1.22493.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:285e14b72f5161fbe9a5220697a36f984882ae3f91cce389f20df1a272e84f4c
3
+ size 88
logs/text2image-fine-tune/events.out.tfevents.1717895332.a8343c8809c1.28476.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a7c252deb57685f5a199756c29586144c15cded161eba117c8796a0d6c12782
3
+ size 88
model_index.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "_class_name": "StableDiffusionControlNetPipeline",
3
  "_diffusers_version": "0.28.2",
4
- "_name_or_path": "CompVis/stable-diffusion-v1-4",
5
  "controlnet": [
6
  "diffusers",
7
  "ControlNetModel"
@@ -14,10 +14,10 @@
14
  null,
15
  null
16
  ],
17
- "requires_safety_checker": true,
18
  "safety_checker": [
19
- "stable_diffusion",
20
- "StableDiffusionSafetyChecker"
21
  ],
22
  "scheduler": [
23
  "diffusers",
 
1
  {
2
  "_class_name": "StableDiffusionControlNetPipeline",
3
  "_diffusers_version": "0.28.2",
4
+ "_name_or_path": "stabilityai/stable-diffusion-2-1-base",
5
  "controlnet": [
6
  "diffusers",
7
  "ControlNetModel"
 
14
  null,
15
  null
16
  ],
17
+ "requires_safety_checker": false,
18
  "safety_checker": [
19
+ null,
20
+ null
21
  ],
22
  "scheduler": [
23
  "diffusers",
text_encoder/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "CompVis/stable-diffusion-v1-4",
3
  "architectures": [
4
  "CLIPTextModel"
5
  ],
@@ -7,16 +7,16 @@
7
  "bos_token_id": 0,
8
  "dropout": 0.0,
9
  "eos_token_id": 2,
10
- "hidden_act": "quick_gelu",
11
- "hidden_size": 768,
12
  "initializer_factor": 1.0,
13
  "initializer_range": 0.02,
14
- "intermediate_size": 3072,
15
  "layer_norm_eps": 1e-05,
16
  "max_position_embeddings": 77,
17
  "model_type": "clip_text_model",
18
- "num_attention_heads": 12,
19
- "num_hidden_layers": 12,
20
  "pad_token_id": 1,
21
  "projection_dim": 512,
22
  "torch_dtype": "float16",
 
1
  {
2
+ "_name_or_path": "stabilityai/stable-diffusion-2-1-base",
3
  "architectures": [
4
  "CLIPTextModel"
5
  ],
 
7
  "bos_token_id": 0,
8
  "dropout": 0.0,
9
  "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_size": 1024,
12
  "initializer_factor": 1.0,
13
  "initializer_range": 0.02,
14
+ "intermediate_size": 4096,
15
  "layer_norm_eps": 1e-05,
16
  "max_position_embeddings": 77,
17
  "model_type": "clip_text_model",
18
+ "num_attention_heads": 16,
19
+ "num_hidden_layers": 23,
20
  "pad_token_id": 1,
21
  "projection_dim": 512,
22
  "torch_dtype": "float16",
text_encoder/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77795e2023adcf39bc29a884661950380bd093cf0750a966d473d1718dc9ef4e
3
- size 246144864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:681c555376658c81dc273f2d737a2aeb23ddb6d1d8e5b3a7064636d359a22668
3
+ size 680821096
tokenizer/special_tokens_map.json CHANGED
@@ -13,7 +13,7 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "<|endoftext|>",
17
  "unk_token": {
18
  "content": "<|endoftext|>",
19
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": "!",
17
  "unk_token": {
18
  "content": "<|endoftext|>",
19
  "lstrip": false,
unet/config.json CHANGED
@@ -5,12 +5,15 @@
5
  "addition_embed_type": null,
6
  "addition_embed_type_num_heads": 64,
7
  "addition_time_embed_dim": null,
8
- "attention_head_dim": 8,
 
 
 
 
9
  "attention_type": "default",
10
  "block_out_channels": [
11
  320,
12
  640,
13
- 1280,
14
  1280
15
  ],
16
  "center_input_sample": false,
@@ -18,13 +21,12 @@
18
  "class_embeddings_concat": false,
19
  "conv_in_kernel": 3,
20
  "conv_out_kernel": 3,
21
- "cross_attention_dim": 768,
22
  "cross_attention_norm": null,
23
  "down_block_types": [
24
  "CrossAttnDownBlock2D",
25
  "CrossAttnDownBlock2D",
26
- "CrossAttnDownBlock2D",
27
- "DownBlock2D"
28
  ],
29
  "downsample_padding": 1,
30
  "dropout": 0.0,
@@ -57,11 +59,10 @@
57
  "timestep_post_act": null,
58
  "transformer_layers_per_block": 1,
59
  "up_block_types": [
60
- "UpBlock2D",
61
  "CrossAttnUpBlock2D",
62
  "CrossAttnUpBlock2D",
63
  "CrossAttnUpBlock2D"
64
  ],
65
  "upcast_attention": false,
66
- "use_linear_projection": false
67
  }
 
5
  "addition_embed_type": null,
6
  "addition_embed_type_num_heads": 64,
7
  "addition_time_embed_dim": null,
8
+ "attention_head_dim": [
9
+ 5,
10
+ 10,
11
+ 20
12
+ ],
13
  "attention_type": "default",
14
  "block_out_channels": [
15
  320,
16
  640,
 
17
  1280
18
  ],
19
  "center_input_sample": false,
 
21
  "class_embeddings_concat": false,
22
  "conv_in_kernel": 3,
23
  "conv_out_kernel": 3,
24
+ "cross_attention_dim": 1024,
25
  "cross_attention_norm": null,
26
  "down_block_types": [
27
  "CrossAttnDownBlock2D",
28
  "CrossAttnDownBlock2D",
29
+ "CrossAttnDownBlock2D"
 
30
  ],
31
  "downsample_padding": 1,
32
  "dropout": 0.0,
 
59
  "timestep_post_act": null,
60
  "transformer_layers_per_block": 1,
61
  "up_block_types": [
 
62
  "CrossAttnUpBlock2D",
63
  "CrossAttnUpBlock2D",
64
  "CrossAttnUpBlock2D"
65
  ],
66
  "upcast_attention": false,
67
+ "use_linear_projection": true
68
  }
unet/diffusion_pytorch_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9e9d51e8c2e414f5e4ef31164276798dcad2761559e290ed2c80d9900fe7c77
3
- size 1929435608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1381a43284ede537a46fbfb17380eb80c5bbed310fee24620064d22917690bbd
3
+ size 1307346120
vae/config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "_class_name": "AutoencoderKL",
3
  "_diffusers_version": "0.28.2",
4
- "_name_or_path": "CompVis/stable-diffusion-v1-4",
5
  "act_fn": "silu",
6
  "block_out_channels": [
7
  128,
@@ -23,7 +23,7 @@
23
  "layers_per_block": 2,
24
  "norm_num_groups": 32,
25
  "out_channels": 3,
26
- "sample_size": 512,
27
  "scaling_factor": 0.18215,
28
  "up_block_types": [
29
  "UpDecoderBlock2D",
 
1
  {
2
  "_class_name": "AutoencoderKL",
3
  "_diffusers_version": "0.28.2",
4
+ "_name_or_path": "stabilityai/stable-diffusion-2-1-base",
5
  "act_fn": "silu",
6
  "block_out_channels": [
7
  128,
 
23
  "layers_per_block": 2,
24
  "norm_num_groups": 32,
25
  "out_channels": 3,
26
+ "sample_size": 768,
27
  "scaling_factor": 0.18215,
28
  "up_block_types": [
29
  "UpDecoderBlock2D",
vae/diffusion_pytorch_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fbcf0ebe55a0984f5a5e00d8c4521d52359af7229bb4d81890039d2aa16dd7c
3
  size 167335342
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e4c08995484ee61270175e9e7a072b66a6e4eeb5f0c266667fe1f45b90daf9a
3
  size 167335342