omkar1799 commited on
Commit
bce014a
·
verified ·
1 Parent(s): 082bc83

End of training

Browse files
Files changed (48) hide show
  1. README.md +66 -0
  2. checkpoint-1000/optimizer.bin +3 -0
  3. checkpoint-1000/random_states_0.pkl +3 -0
  4. checkpoint-1000/scaler.pt +3 -0
  5. checkpoint-1000/scheduler.bin +3 -0
  6. checkpoint-1000/unet/config.json +68 -0
  7. checkpoint-1000/unet/diffusion_pytorch_model.safetensors +3 -0
  8. checkpoint-1000/unet_ema/config.json +75 -0
  9. checkpoint-1000/unet_ema/diffusion_pytorch_model.safetensors +3 -0
  10. checkpoint-500/optimizer.bin +3 -0
  11. checkpoint-500/random_states_0.pkl +3 -0
  12. checkpoint-500/scaler.pt +3 -0
  13. checkpoint-500/scheduler.bin +3 -0
  14. checkpoint-500/unet/config.json +68 -0
  15. checkpoint-500/unet/diffusion_pytorch_model.safetensors +3 -0
  16. checkpoint-500/unet_ema/config.json +75 -0
  17. checkpoint-500/unet_ema/diffusion_pytorch_model.safetensors +3 -0
  18. feature_extractor/preprocessor_config.json +27 -0
  19. logs/text2image-fine-tune/1727631677.842755/events.out.tfevents.1727631677.dbc894fe697a.4992.1 +3 -0
  20. logs/text2image-fine-tune/1727631677.844447/hparams.yml +55 -0
  21. logs/text2image-fine-tune/1727632520.8069768/events.out.tfevents.1727632520.dbc894fe697a.8605.1 +3 -0
  22. logs/text2image-fine-tune/1727632520.8087685/hparams.yml +55 -0
  23. logs/text2image-fine-tune/1727634748.9299655/events.out.tfevents.1727634748.dbc894fe697a.17991.1 +3 -0
  24. logs/text2image-fine-tune/1727634748.9316866/hparams.yml +55 -0
  25. logs/text2image-fine-tune/1727635090.4648912/events.out.tfevents.1727635090.dbc894fe697a.19436.1 +3 -0
  26. logs/text2image-fine-tune/1727635090.4667041/hparams.yml +55 -0
  27. logs/text2image-fine-tune/1727637577.9019005/events.out.tfevents.1727637577.dbc894fe697a.29883.1 +3 -0
  28. logs/text2image-fine-tune/1727637577.9037259/hparams.yml +55 -0
  29. logs/text2image-fine-tune/events.out.tfevents.1727631677.dbc894fe697a.4992.0 +3 -0
  30. logs/text2image-fine-tune/events.out.tfevents.1727632520.dbc894fe697a.8605.0 +3 -0
  31. logs/text2image-fine-tune/events.out.tfevents.1727634748.dbc894fe697a.17991.0 +3 -0
  32. logs/text2image-fine-tune/events.out.tfevents.1727635090.dbc894fe697a.19436.0 +3 -0
  33. logs/text2image-fine-tune/events.out.tfevents.1727637577.dbc894fe697a.29883.0 +3 -0
  34. model_index.json +38 -0
  35. safety_checker/config.json +28 -0
  36. safety_checker/model.safetensors +3 -0
  37. scheduler/scheduler_config.json +15 -0
  38. text_encoder/config.json +25 -0
  39. text_encoder/model.safetensors +3 -0
  40. tokenizer/merges.txt +0 -0
  41. tokenizer/special_tokens_map.json +24 -0
  42. tokenizer/tokenizer_config.json +30 -0
  43. tokenizer/vocab.json +0 -0
  44. unet/config.json +68 -0
  45. unet/diffusion_pytorch_model.safetensors +3 -0
  46. vae/config.json +38 -0
  47. vae/diffusion_pytorch_model.safetensors +3 -0
  48. val_imgs_grid.png +0 -0
README.md ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: runwayml/stable-diffusion-v1-5
3
+ library_name: diffusers
4
+ license: creativeml-openrail-m
5
+ tags:
6
+ - stable-diffusion
7
+ - stable-diffusion-diffusers
8
+ - text-to-image
9
+ - diffusers
10
+ - diffusers-training
11
+ inference: true
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the training script had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+
18
+ # Text-to-image finetuning - omkar1799/script-sd-city-scape-prints-model
19
+
20
+ This pipeline was finetuned from **runwayml/stable-diffusion-v1-5** on the **omkar1799/city-scape-prints-dataset** dataset. Below are some example images generated with the finetuned pipeline using the following prompts: ['An embroidered, hand-stitched pillow design featuring Atlanta, showcasing landmarks like Coca Cola, Mercedes Benz Stadium, Civil Rights Museum, SunTrust Park, and Stone Mountain. Includes symbols like a panda, airplanes, KFC, trees, and the ferris wheel. The style is playful, colorful, and folk-art inspired with text labels for each location and decorative elements throughout.', 'An embroidered, hand-stitched pillow design featuring Lisbon, showcasing landmarks like Belém Tower, Jerónimos Monastery, São Jorge Castle, and the 25 de Abril Bridge. Includes symbols like trams, pastel de nata, sardines, tiles, and guitars. The style is playful, colorful, and folk-art inspired with text labels for each location and decorative elements throughout.']:
21
+
22
+ ![val_imgs_grid](./val_imgs_grid.png)
23
+
24
+
25
+ ## Pipeline usage
26
+
27
+ You can use the pipeline like so:
28
+
29
+ ```python
30
+ from diffusers import DiffusionPipeline
31
+ import torch
32
+
33
+ pipeline = DiffusionPipeline.from_pretrained("omkar1799/script-sd-city-scape-prints-model", torch_dtype=torch.float16)
34
+ prompt = "An embroidered, hand-stitched pillow design featuring Atlanta, showcasing landmarks like Coca Cola, Mercedes Benz Stadium, Civil Rights Museum, SunTrust Park, and Stone Mountain. Includes symbols like a panda, airplanes, KFC, trees, and the ferris wheel. The style is playful, colorful, and folk-art inspired with text labels for each location and decorative elements throughout."
35
+ image = pipeline(prompt).images[0]
36
+ image.save("my_image.png")
37
+ ```
38
+
39
+ ## Training info
40
+
41
+ These are the key hyperparameters used during training:
42
+
43
+ * Epochs: 12
44
+ * Learning rate: 5e-06
45
+ * Batch size: 4
46
+ * Gradient accumulation steps: 2
47
+ * Image resolution: 512
48
+ * Mixed-precision: fp16
49
+
50
+
51
+
52
+ ## Intended uses & limitations
53
+
54
+ #### How to use
55
+
56
+ ```python
57
+ # TODO: add an example code snippet for running this diffusion pipeline
58
+ ```
59
+
60
+ #### Limitations and bias
61
+
62
+ [TODO: provide examples of latent issues and potential remediations]
63
+
64
+ ## Training details
65
+
66
+ [TODO: describe the data used to train the model]
checkpoint-1000/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b57ef503ce4814c2914a8cf50316b2c124fcc5522df3572afa00cbcc51cab4da
3
+ size 6876750164
checkpoint-1000/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d46b0140f5ccade79104df81df2c429e33d507cff0bb51b191510b83197638ab
3
+ size 14344
checkpoint-1000/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d8fdcd0311eba9854fff738038ed4c1a269832665b4d88ba4e4e3d02a1a7e0e
3
+ size 988
checkpoint-1000/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32a3406d2ef50f9b0ec90e408a1c13d2c8958de7599603cc4c0cc7154da6ebe9
3
+ size 1000
checkpoint-1000/unet/config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.31.0.dev0",
4
+ "_name_or_path": "runwayml/stable-diffusion-v1-5",
5
+ "act_fn": "silu",
6
+ "addition_embed_type": null,
7
+ "addition_embed_type_num_heads": 64,
8
+ "addition_time_embed_dim": null,
9
+ "attention_head_dim": 8,
10
+ "attention_type": "default",
11
+ "block_out_channels": [
12
+ 320,
13
+ 640,
14
+ 1280,
15
+ 1280
16
+ ],
17
+ "center_input_sample": false,
18
+ "class_embed_type": null,
19
+ "class_embeddings_concat": false,
20
+ "conv_in_kernel": 3,
21
+ "conv_out_kernel": 3,
22
+ "cross_attention_dim": 768,
23
+ "cross_attention_norm": null,
24
+ "down_block_types": [
25
+ "CrossAttnDownBlock2D",
26
+ "CrossAttnDownBlock2D",
27
+ "CrossAttnDownBlock2D",
28
+ "DownBlock2D"
29
+ ],
30
+ "downsample_padding": 1,
31
+ "dropout": 0.0,
32
+ "dual_cross_attention": false,
33
+ "encoder_hid_dim": null,
34
+ "encoder_hid_dim_type": null,
35
+ "flip_sin_to_cos": true,
36
+ "freq_shift": 0,
37
+ "in_channels": 4,
38
+ "layers_per_block": 2,
39
+ "mid_block_only_cross_attention": null,
40
+ "mid_block_scale_factor": 1,
41
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
42
+ "norm_eps": 1e-05,
43
+ "norm_num_groups": 32,
44
+ "num_attention_heads": null,
45
+ "num_class_embeds": null,
46
+ "only_cross_attention": false,
47
+ "out_channels": 4,
48
+ "projection_class_embeddings_input_dim": null,
49
+ "resnet_out_scale_factor": 1.0,
50
+ "resnet_skip_time_act": false,
51
+ "resnet_time_scale_shift": "default",
52
+ "reverse_transformer_layers_per_block": null,
53
+ "sample_size": 64,
54
+ "time_cond_proj_dim": null,
55
+ "time_embedding_act_fn": null,
56
+ "time_embedding_dim": null,
57
+ "time_embedding_type": "positional",
58
+ "timestep_post_act": null,
59
+ "transformer_layers_per_block": 1,
60
+ "up_block_types": [
61
+ "UpBlock2D",
62
+ "CrossAttnUpBlock2D",
63
+ "CrossAttnUpBlock2D",
64
+ "CrossAttnUpBlock2D"
65
+ ],
66
+ "upcast_attention": false,
67
+ "use_linear_projection": false
68
+ }
checkpoint-1000/unet/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:202819d5e7e58ba5ddde6bbf68371c41db1ab916c9e9d6b63a038b35861768b7
3
+ size 3438167536
checkpoint-1000/unet_ema/config.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.31.0.dev0",
4
+ "_name_or_path": "runwayml/stable-diffusion-v1-5",
5
+ "act_fn": "silu",
6
+ "addition_embed_type": null,
7
+ "addition_embed_type_num_heads": 64,
8
+ "addition_time_embed_dim": null,
9
+ "attention_head_dim": 8,
10
+ "attention_type": "default",
11
+ "block_out_channels": [
12
+ 320,
13
+ 640,
14
+ 1280,
15
+ 1280
16
+ ],
17
+ "center_input_sample": false,
18
+ "class_embed_type": null,
19
+ "class_embeddings_concat": false,
20
+ "conv_in_kernel": 3,
21
+ "conv_out_kernel": 3,
22
+ "cross_attention_dim": 768,
23
+ "cross_attention_norm": null,
24
+ "decay": 0.9999,
25
+ "down_block_types": [
26
+ "CrossAttnDownBlock2D",
27
+ "CrossAttnDownBlock2D",
28
+ "CrossAttnDownBlock2D",
29
+ "DownBlock2D"
30
+ ],
31
+ "downsample_padding": 1,
32
+ "dropout": 0.0,
33
+ "dual_cross_attention": false,
34
+ "encoder_hid_dim": null,
35
+ "encoder_hid_dim_type": null,
36
+ "flip_sin_to_cos": true,
37
+ "freq_shift": 0,
38
+ "in_channels": 4,
39
+ "inv_gamma": 1.0,
40
+ "layers_per_block": 2,
41
+ "mid_block_only_cross_attention": null,
42
+ "mid_block_scale_factor": 1,
43
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
44
+ "min_decay": 0.0,
45
+ "norm_eps": 1e-05,
46
+ "norm_num_groups": 32,
47
+ "num_attention_heads": null,
48
+ "num_class_embeds": null,
49
+ "only_cross_attention": false,
50
+ "optimization_step": 1000,
51
+ "out_channels": 4,
52
+ "power": 0.6666666666666666,
53
+ "projection_class_embeddings_input_dim": null,
54
+ "resnet_out_scale_factor": 1.0,
55
+ "resnet_skip_time_act": false,
56
+ "resnet_time_scale_shift": "default",
57
+ "reverse_transformer_layers_per_block": null,
58
+ "sample_size": 64,
59
+ "time_cond_proj_dim": null,
60
+ "time_embedding_act_fn": null,
61
+ "time_embedding_dim": null,
62
+ "time_embedding_type": "positional",
63
+ "timestep_post_act": null,
64
+ "transformer_layers_per_block": 1,
65
+ "up_block_types": [
66
+ "UpBlock2D",
67
+ "CrossAttnUpBlock2D",
68
+ "CrossAttnUpBlock2D",
69
+ "CrossAttnUpBlock2D"
70
+ ],
71
+ "upcast_attention": false,
72
+ "update_after_step": 0,
73
+ "use_ema_warmup": false,
74
+ "use_linear_projection": false
75
+ }
checkpoint-1000/unet_ema/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:188f0fc704b90c94a011b323bbb27d75e83174c01add5a0fb7804bd90633e9e0
3
+ size 3438167536
checkpoint-500/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb77f1a4f20cf0f25081c842a428f7b60dba9f9bcb24cc41b22c073fa0cec8ab
3
+ size 6876750164
checkpoint-500/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2e15ec87c7da91f9717f57cb97ed6dad060ec7eaa364fec8369917841b0c08c
3
+ size 14344
checkpoint-500/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18b984273ea2d45b7ffb1d047bb359d93111e41fcad70d16a1b453fd38f72636
3
+ size 988
checkpoint-500/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65115106fbd9463fa6c7133b543dece71c31fcba363ff15b8c73f8c7eb0739c5
3
+ size 1000
checkpoint-500/unet/config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.31.0.dev0",
4
+ "_name_or_path": "runwayml/stable-diffusion-v1-5",
5
+ "act_fn": "silu",
6
+ "addition_embed_type": null,
7
+ "addition_embed_type_num_heads": 64,
8
+ "addition_time_embed_dim": null,
9
+ "attention_head_dim": 8,
10
+ "attention_type": "default",
11
+ "block_out_channels": [
12
+ 320,
13
+ 640,
14
+ 1280,
15
+ 1280
16
+ ],
17
+ "center_input_sample": false,
18
+ "class_embed_type": null,
19
+ "class_embeddings_concat": false,
20
+ "conv_in_kernel": 3,
21
+ "conv_out_kernel": 3,
22
+ "cross_attention_dim": 768,
23
+ "cross_attention_norm": null,
24
+ "down_block_types": [
25
+ "CrossAttnDownBlock2D",
26
+ "CrossAttnDownBlock2D",
27
+ "CrossAttnDownBlock2D",
28
+ "DownBlock2D"
29
+ ],
30
+ "downsample_padding": 1,
31
+ "dropout": 0.0,
32
+ "dual_cross_attention": false,
33
+ "encoder_hid_dim": null,
34
+ "encoder_hid_dim_type": null,
35
+ "flip_sin_to_cos": true,
36
+ "freq_shift": 0,
37
+ "in_channels": 4,
38
+ "layers_per_block": 2,
39
+ "mid_block_only_cross_attention": null,
40
+ "mid_block_scale_factor": 1,
41
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
42
+ "norm_eps": 1e-05,
43
+ "norm_num_groups": 32,
44
+ "num_attention_heads": null,
45
+ "num_class_embeds": null,
46
+ "only_cross_attention": false,
47
+ "out_channels": 4,
48
+ "projection_class_embeddings_input_dim": null,
49
+ "resnet_out_scale_factor": 1.0,
50
+ "resnet_skip_time_act": false,
51
+ "resnet_time_scale_shift": "default",
52
+ "reverse_transformer_layers_per_block": null,
53
+ "sample_size": 64,
54
+ "time_cond_proj_dim": null,
55
+ "time_embedding_act_fn": null,
56
+ "time_embedding_dim": null,
57
+ "time_embedding_type": "positional",
58
+ "timestep_post_act": null,
59
+ "transformer_layers_per_block": 1,
60
+ "up_block_types": [
61
+ "UpBlock2D",
62
+ "CrossAttnUpBlock2D",
63
+ "CrossAttnUpBlock2D",
64
+ "CrossAttnUpBlock2D"
65
+ ],
66
+ "upcast_attention": false,
67
+ "use_linear_projection": false
68
+ }
checkpoint-500/unet/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bda8a62948dcd72c7ed947d189de7d2cd9dc63dc0d7d434d3a6735913c88f075
3
+ size 3438167536
checkpoint-500/unet_ema/config.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.31.0.dev0",
4
+ "_name_or_path": "runwayml/stable-diffusion-v1-5",
5
+ "act_fn": "silu",
6
+ "addition_embed_type": null,
7
+ "addition_embed_type_num_heads": 64,
8
+ "addition_time_embed_dim": null,
9
+ "attention_head_dim": 8,
10
+ "attention_type": "default",
11
+ "block_out_channels": [
12
+ 320,
13
+ 640,
14
+ 1280,
15
+ 1280
16
+ ],
17
+ "center_input_sample": false,
18
+ "class_embed_type": null,
19
+ "class_embeddings_concat": false,
20
+ "conv_in_kernel": 3,
21
+ "conv_out_kernel": 3,
22
+ "cross_attention_dim": 768,
23
+ "cross_attention_norm": null,
24
+ "decay": 0.9999,
25
+ "down_block_types": [
26
+ "CrossAttnDownBlock2D",
27
+ "CrossAttnDownBlock2D",
28
+ "CrossAttnDownBlock2D",
29
+ "DownBlock2D"
30
+ ],
31
+ "downsample_padding": 1,
32
+ "dropout": 0.0,
33
+ "dual_cross_attention": false,
34
+ "encoder_hid_dim": null,
35
+ "encoder_hid_dim_type": null,
36
+ "flip_sin_to_cos": true,
37
+ "freq_shift": 0,
38
+ "in_channels": 4,
39
+ "inv_gamma": 1.0,
40
+ "layers_per_block": 2,
41
+ "mid_block_only_cross_attention": null,
42
+ "mid_block_scale_factor": 1,
43
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
44
+ "min_decay": 0.0,
45
+ "norm_eps": 1e-05,
46
+ "norm_num_groups": 32,
47
+ "num_attention_heads": null,
48
+ "num_class_embeds": null,
49
+ "only_cross_attention": false,
50
+ "optimization_step": 500,
51
+ "out_channels": 4,
52
+ "power": 0.6666666666666666,
53
+ "projection_class_embeddings_input_dim": null,
54
+ "resnet_out_scale_factor": 1.0,
55
+ "resnet_skip_time_act": false,
56
+ "resnet_time_scale_shift": "default",
57
+ "reverse_transformer_layers_per_block": null,
58
+ "sample_size": 64,
59
+ "time_cond_proj_dim": null,
60
+ "time_embedding_act_fn": null,
61
+ "time_embedding_dim": null,
62
+ "time_embedding_type": "positional",
63
+ "timestep_post_act": null,
64
+ "transformer_layers_per_block": 1,
65
+ "up_block_types": [
66
+ "UpBlock2D",
67
+ "CrossAttnUpBlock2D",
68
+ "CrossAttnUpBlock2D",
69
+ "CrossAttnUpBlock2D"
70
+ ],
71
+ "upcast_attention": false,
72
+ "update_after_step": 0,
73
+ "use_ema_warmup": false,
74
+ "use_linear_projection": false
75
+ }
checkpoint-500/unet_ema/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5402576521b8f0d064bebe70bd021bba09557936cc4a4d5ef7984492f57b438f
3
+ size 3438167536
feature_extractor/preprocessor_config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.48145466,
13
+ 0.4578275,
14
+ 0.40821073
15
+ ],
16
+ "image_processor_type": "CLIPImageProcessor",
17
+ "image_std": [
18
+ 0.26862954,
19
+ 0.26130258,
20
+ 0.27577711
21
+ ],
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "shortest_edge": 224
26
+ }
27
+ }
logs/text2image-fine-tune/1727631677.842755/events.out.tfevents.1727631677.dbc894fe697a.4992.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bc15dde876597ae031d69c5c550defd76363c6db6cd73dae7db6e1fe3bab10d
3
+ size 2491
logs/text2image-fine-tune/1727631677.844447/hparams.yml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_beta1: 0.9
2
+ adam_beta2: 0.999
3
+ adam_epsilon: 1.0e-08
4
+ adam_weight_decay: 0.01
5
+ allow_tf32: false
6
+ cache_dir: null
7
+ caption_column: prompt
8
+ center_crop: true
9
+ checkpointing_steps: 500
10
+ checkpoints_total_limit: null
11
+ dataloader_num_workers: 0
12
+ dataset_config_name: null
13
+ dataset_name: omkar1799/city-scape-prints-dataset
14
+ dream_detail_preservation: 1.0
15
+ dream_training: false
16
+ enable_xformers_memory_efficient_attention: true
17
+ foreach_ema: false
18
+ gradient_accumulation_steps: 4
19
+ gradient_checkpointing: true
20
+ hub_model_id: null
21
+ hub_token: null
22
+ image_column: image
23
+ input_perturbation: 0
24
+ learning_rate: 1.0e-05
25
+ local_rank: -1
26
+ logging_dir: logs
27
+ lr_scheduler: constant
28
+ lr_warmup_steps: 0
29
+ max_grad_norm: 1.0
30
+ max_train_samples: null
31
+ max_train_steps: 15000
32
+ mixed_precision: fp16
33
+ noise_offset: 0
34
+ non_ema_revision: null
35
+ num_train_epochs: 834
36
+ offload_ema: false
37
+ output_dir: script-sd-city-scape-prints-model
38
+ prediction_type: null
39
+ pretrained_model_name_or_path: runwayml/stable-diffusion-v1-5
40
+ push_to_hub: true
41
+ random_flip: false
42
+ report_to: tensorboard
43
+ resolution: 512
44
+ resume_from_checkpoint: null
45
+ revision: null
46
+ scale_lr: false
47
+ seed: null
48
+ snr_gamma: null
49
+ tracker_project_name: text2image-fine-tune
50
+ train_batch_size: 1
51
+ train_data_dir: null
52
+ use_8bit_adam: false
53
+ use_ema: true
54
+ validation_epochs: 5
55
+ variant: null
logs/text2image-fine-tune/1727632520.8069768/events.out.tfevents.1727632520.dbc894fe697a.8605.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bd52aec825f06bf7504c12aa8e1944e373dfcd9524c308eec9a0d343a9bd0cd
3
+ size 2491
logs/text2image-fine-tune/1727632520.8087685/hparams.yml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_beta1: 0.9
2
+ adam_beta2: 0.999
3
+ adam_epsilon: 1.0e-08
4
+ adam_weight_decay: 0.01
5
+ allow_tf32: false
6
+ cache_dir: null
7
+ caption_column: prompt
8
+ center_crop: true
9
+ checkpointing_steps: 500
10
+ checkpoints_total_limit: null
11
+ dataloader_num_workers: 0
12
+ dataset_config_name: null
13
+ dataset_name: omkar1799/city-scape-prints-dataset
14
+ dream_detail_preservation: 1.0
15
+ dream_training: false
16
+ enable_xformers_memory_efficient_attention: true
17
+ foreach_ema: false
18
+ gradient_accumulation_steps: 2
19
+ gradient_checkpointing: true
20
+ hub_model_id: null
21
+ hub_token: null
22
+ image_column: image
23
+ input_perturbation: 0
24
+ learning_rate: 5.0e-06
25
+ local_rank: -1
26
+ logging_dir: logs
27
+ lr_scheduler: constant
28
+ lr_warmup_steps: 0
29
+ max_grad_norm: 1.0
30
+ max_train_samples: null
31
+ max_train_steps: 1000
32
+ mixed_precision: fp16
33
+ noise_offset: 0
34
+ non_ema_revision: null
35
+ num_train_epochs: 112
36
+ offload_ema: false
37
+ output_dir: script-sd-city-scape-prints-model
38
+ prediction_type: null
39
+ pretrained_model_name_or_path: runwayml/stable-diffusion-v1-5
40
+ push_to_hub: true
41
+ random_flip: false
42
+ report_to: tensorboard
43
+ resolution: 512
44
+ resume_from_checkpoint: null
45
+ revision: null
46
+ scale_lr: false
47
+ seed: null
48
+ snr_gamma: null
49
+ tracker_project_name: text2image-fine-tune
50
+ train_batch_size: 4
51
+ train_data_dir: null
52
+ use_8bit_adam: false
53
+ use_ema: true
54
+ validation_epochs: 5
55
+ variant: null
logs/text2image-fine-tune/1727634748.9299655/events.out.tfevents.1727634748.dbc894fe697a.17991.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4746748c59d55aa972c66dd6f2fec560b9fdab1574acd563263bc07d35990fc
3
+ size 2491
logs/text2image-fine-tune/1727634748.9316866/hparams.yml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_beta1: 0.9
2
+ adam_beta2: 0.999
3
+ adam_epsilon: 1.0e-08
4
+ adam_weight_decay: 0.01
5
+ allow_tf32: false
6
+ cache_dir: null
7
+ caption_column: prompt
8
+ center_crop: true
9
+ checkpointing_steps: 500
10
+ checkpoints_total_limit: null
11
+ dataloader_num_workers: 0
12
+ dataset_config_name: null
13
+ dataset_name: omkar1799/city-scape-prints-dataset
14
+ dream_detail_preservation: 1.0
15
+ dream_training: false
16
+ enable_xformers_memory_efficient_attention: true
17
+ foreach_ema: false
18
+ gradient_accumulation_steps: 2
19
+ gradient_checkpointing: true
20
+ hub_model_id: null
21
+ hub_token: null
22
+ image_column: image
23
+ input_perturbation: 0
24
+ learning_rate: 5.0e-06
25
+ local_rank: -1
26
+ logging_dir: logs
27
+ lr_scheduler: constant
28
+ lr_warmup_steps: 0
29
+ max_grad_norm: 1.0
30
+ max_train_samples: null
31
+ max_train_steps: 1000
32
+ mixed_precision: fp16
33
+ noise_offset: 0
34
+ non_ema_revision: null
35
+ num_train_epochs: 112
36
+ offload_ema: false
37
+ output_dir: script-sd-city-scape-prints-model
38
+ prediction_type: null
39
+ pretrained_model_name_or_path: runwayml/stable-diffusion-v1-5
40
+ push_to_hub: true
41
+ random_flip: false
42
+ report_to: tensorboard
43
+ resolution: 512
44
+ resume_from_checkpoint: null
45
+ revision: null
46
+ scale_lr: false
47
+ seed: null
48
+ snr_gamma: null
49
+ tracker_project_name: text2image-fine-tune
50
+ train_batch_size: 4
51
+ train_data_dir: null
52
+ use_8bit_adam: false
53
+ use_ema: true
54
+ validation_epochs: 5
55
+ variant: null
logs/text2image-fine-tune/1727635090.4648912/events.out.tfevents.1727635090.dbc894fe697a.19436.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21270a38bec2b5523c2ff48cf074ebfb697759946d6958783aa969a5740ff238
3
+ size 2491
logs/text2image-fine-tune/1727635090.4667041/hparams.yml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_beta1: 0.9
2
+ adam_beta2: 0.999
3
+ adam_epsilon: 1.0e-08
4
+ adam_weight_decay: 0.01
5
+ allow_tf32: false
6
+ cache_dir: null
7
+ caption_column: prompt
8
+ center_crop: true
9
+ checkpointing_steps: 500
10
+ checkpoints_total_limit: null
11
+ dataloader_num_workers: 0
12
+ dataset_config_name: null
13
+ dataset_name: omkar1799/city-scape-prints-dataset
14
+ dream_detail_preservation: 1.0
15
+ dream_training: false
16
+ enable_xformers_memory_efficient_attention: true
17
+ foreach_ema: false
18
+ gradient_accumulation_steps: 2
19
+ gradient_checkpointing: true
20
+ hub_model_id: null
21
+ hub_token: null
22
+ image_column: image
23
+ input_perturbation: 0
24
+ learning_rate: 5.0e-06
25
+ local_rank: -1
26
+ logging_dir: logs
27
+ lr_scheduler: constant
28
+ lr_warmup_steps: 0
29
+ max_grad_norm: 1.0
30
+ max_train_samples: null
31
+ max_train_steps: 1000
32
+ mixed_precision: fp16
33
+ noise_offset: 0
34
+ non_ema_revision: null
35
+ num_train_epochs: 112
36
+ offload_ema: false
37
+ output_dir: script-sd-city-scape-prints-model
38
+ prediction_type: null
39
+ pretrained_model_name_or_path: runwayml/stable-diffusion-v1-5
40
+ push_to_hub: true
41
+ random_flip: false
42
+ report_to: tensorboard
43
+ resolution: 512
44
+ resume_from_checkpoint: null
45
+ revision: null
46
+ scale_lr: false
47
+ seed: null
48
+ snr_gamma: null
49
+ tracker_project_name: text2image-fine-tune
50
+ train_batch_size: 4
51
+ train_data_dir: null
52
+ use_8bit_adam: false
53
+ use_ema: true
54
+ validation_epochs: 5
55
+ variant: null
logs/text2image-fine-tune/1727637577.9019005/events.out.tfevents.1727637577.dbc894fe697a.29883.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1070dfb034c598cb5c15d167ec3da1a0e3ccdcd35e050dcc245044ea10e05829
3
+ size 2491
logs/text2image-fine-tune/1727637577.9037259/hparams.yml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_beta1: 0.9
2
+ adam_beta2: 0.999
3
+ adam_epsilon: 1.0e-08
4
+ adam_weight_decay: 0.01
5
+ allow_tf32: false
6
+ cache_dir: null
7
+ caption_column: prompt
8
+ center_crop: true
9
+ checkpointing_steps: 500
10
+ checkpoints_total_limit: null
11
+ dataloader_num_workers: 0
12
+ dataset_config_name: null
13
+ dataset_name: omkar1799/city-scape-prints-dataset
14
+ dream_detail_preservation: 1.0
15
+ dream_training: false
16
+ enable_xformers_memory_efficient_attention: true
17
+ foreach_ema: false
18
+ gradient_accumulation_steps: 2
19
+ gradient_checkpointing: true
20
+ hub_model_id: null
21
+ hub_token: null
22
+ image_column: image
23
+ input_perturbation: 0
24
+ learning_rate: 5.0e-06
25
+ local_rank: -1
26
+ logging_dir: logs
27
+ lr_scheduler: constant
28
+ lr_warmup_steps: 0
29
+ max_grad_norm: 1.0
30
+ max_train_samples: null
31
+ max_train_steps: 100
32
+ mixed_precision: fp16
33
+ noise_offset: 0
34
+ non_ema_revision: null
35
+ num_train_epochs: 12
36
+ offload_ema: false
37
+ output_dir: script-sd-city-scape-prints-model
38
+ prediction_type: null
39
+ pretrained_model_name_or_path: runwayml/stable-diffusion-v1-5
40
+ push_to_hub: true
41
+ random_flip: false
42
+ report_to: tensorboard
43
+ resolution: 512
44
+ resume_from_checkpoint: null
45
+ revision: null
46
+ scale_lr: false
47
+ seed: null
48
+ snr_gamma: null
49
+ tracker_project_name: text2image-fine-tune
50
+ train_batch_size: 4
51
+ train_data_dir: null
52
+ use_8bit_adam: false
53
+ use_ema: true
54
+ validation_epochs: 5
55
+ variant: null
logs/text2image-fine-tune/events.out.tfevents.1727631677.dbc894fe697a.4992.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d88f2d806dbcf3ccb13e7741346e089f542ac12e9889e8d605c9c5a7559cf574
3
+ size 17993
logs/text2image-fine-tune/events.out.tfevents.1727632520.dbc894fe697a.8605.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:643bb57fdff8671f6d5a42d8acb167a23f0440499223051e444ca08c1973e9d1
3
+ size 48961
logs/text2image-fine-tune/events.out.tfevents.1727634748.dbc894fe697a.17991.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:134604ac80773d6c7876af599175570978f34dabd43ed5e70600b6d46b41a81a
3
+ size 1006948
logs/text2image-fine-tune/events.out.tfevents.1727635090.dbc894fe697a.19436.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba3dcd33558cfbb3a28a2307f8cab5302f1e043a2fc34b421250f9132c6d6c22
3
+ size 21916800
logs/text2image-fine-tune/events.out.tfevents.1727637577.dbc894fe697a.29883.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35e6f568d2b825caf82d0c31e7bf1dbad8c1b3810f6532315fbfe7d702d124b1
3
+ size 2976650
model_index.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableDiffusionPipeline",
3
+ "_diffusers_version": "0.31.0.dev0",
4
+ "_name_or_path": "runwayml/stable-diffusion-v1-5",
5
+ "feature_extractor": [
6
+ "transformers",
7
+ "CLIPImageProcessor"
8
+ ],
9
+ "image_encoder": [
10
+ null,
11
+ null
12
+ ],
13
+ "requires_safety_checker": true,
14
+ "safety_checker": [
15
+ "stable_diffusion",
16
+ "StableDiffusionSafetyChecker"
17
+ ],
18
+ "scheduler": [
19
+ "diffusers",
20
+ "PNDMScheduler"
21
+ ],
22
+ "text_encoder": [
23
+ "transformers",
24
+ "CLIPTextModel"
25
+ ],
26
+ "tokenizer": [
27
+ "transformers",
28
+ "CLIPTokenizer"
29
+ ],
30
+ "unet": [
31
+ "diffusers",
32
+ "UNet2DConditionModel"
33
+ ],
34
+ "vae": [
35
+ "diffusers",
36
+ "AutoencoderKL"
37
+ ]
38
+ }
safety_checker/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/root/.cache/huggingface/hub/models--runwayml--stable-diffusion-v1-5/snapshots/f03de327dd89b501a01da37fc5240cf4fdba85a1/safety_checker",
3
+ "architectures": [
4
+ "StableDiffusionSafetyChecker"
5
+ ],
6
+ "initializer_factor": 1.0,
7
+ "logit_scale_init_value": 2.6592,
8
+ "model_type": "clip",
9
+ "projection_dim": 768,
10
+ "text_config": {
11
+ "dropout": 0.0,
12
+ "hidden_size": 768,
13
+ "intermediate_size": 3072,
14
+ "model_type": "clip_text_model",
15
+ "num_attention_heads": 12
16
+ },
17
+ "torch_dtype": "float32",
18
+ "transformers_version": "4.44.2",
19
+ "vision_config": {
20
+ "dropout": 0.0,
21
+ "hidden_size": 1024,
22
+ "intermediate_size": 4096,
23
+ "model_type": "clip_vision_model",
24
+ "num_attention_heads": 16,
25
+ "num_hidden_layers": 24,
26
+ "patch_size": 14
27
+ }
28
+ }
safety_checker/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb351a5ded815c3ff744968ad9c6b218d071b9d313d04f35e813b84b4c0ffde8
3
+ size 1215979664
scheduler/scheduler_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "PNDMScheduler",
3
+ "_diffusers_version": "0.31.0.dev0",
4
+ "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.00085,
7
+ "clip_sample": false,
8
+ "num_train_timesteps": 1000,
9
+ "prediction_type": "epsilon",
10
+ "set_alpha_to_one": false,
11
+ "skip_prk_steps": true,
12
+ "steps_offset": 1,
13
+ "timestep_spacing": "leading",
14
+ "trained_betas": null
15
+ }
text_encoder/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "runwayml/stable-diffusion-v1-5",
3
+ "architectures": [
4
+ "CLIPTextModel"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "dropout": 0.0,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "quick_gelu",
11
+ "hidden_size": 768,
12
+ "initializer_factor": 1.0,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 77,
17
+ "model_type": "clip_text_model",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "projection_dim": 768,
22
+ "torch_dtype": "float16",
23
+ "transformers_version": "4.44.2",
24
+ "vocab_size": 49408
25
+ }
text_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:660c6f5b1abae9dc498ac2d21e1347d2abdb0cf6c0c0c8576cd796491d9a6cdd
3
+ size 246144152
tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "49406": {
5
+ "content": "<|startoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "49407": {
13
+ "content": "<|endoftext|>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ }
20
+ },
21
+ "bos_token": "<|startoftext|>",
22
+ "clean_up_tokenization_spaces": true,
23
+ "do_lower_case": true,
24
+ "eos_token": "<|endoftext|>",
25
+ "errors": "replace",
26
+ "model_max_length": 77,
27
+ "pad_token": "<|endoftext|>",
28
+ "tokenizer_class": "CLIPTokenizer",
29
+ "unk_token": "<|endoftext|>"
30
+ }
tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
unet/config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.31.0.dev0",
4
+ "_name_or_path": "runwayml/stable-diffusion-v1-5",
5
+ "act_fn": "silu",
6
+ "addition_embed_type": null,
7
+ "addition_embed_type_num_heads": 64,
8
+ "addition_time_embed_dim": null,
9
+ "attention_head_dim": 8,
10
+ "attention_type": "default",
11
+ "block_out_channels": [
12
+ 320,
13
+ 640,
14
+ 1280,
15
+ 1280
16
+ ],
17
+ "center_input_sample": false,
18
+ "class_embed_type": null,
19
+ "class_embeddings_concat": false,
20
+ "conv_in_kernel": 3,
21
+ "conv_out_kernel": 3,
22
+ "cross_attention_dim": 768,
23
+ "cross_attention_norm": null,
24
+ "down_block_types": [
25
+ "CrossAttnDownBlock2D",
26
+ "CrossAttnDownBlock2D",
27
+ "CrossAttnDownBlock2D",
28
+ "DownBlock2D"
29
+ ],
30
+ "downsample_padding": 1,
31
+ "dropout": 0.0,
32
+ "dual_cross_attention": false,
33
+ "encoder_hid_dim": null,
34
+ "encoder_hid_dim_type": null,
35
+ "flip_sin_to_cos": true,
36
+ "freq_shift": 0,
37
+ "in_channels": 4,
38
+ "layers_per_block": 2,
39
+ "mid_block_only_cross_attention": null,
40
+ "mid_block_scale_factor": 1,
41
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
42
+ "norm_eps": 1e-05,
43
+ "norm_num_groups": 32,
44
+ "num_attention_heads": null,
45
+ "num_class_embeds": null,
46
+ "only_cross_attention": false,
47
+ "out_channels": 4,
48
+ "projection_class_embeddings_input_dim": null,
49
+ "resnet_out_scale_factor": 1.0,
50
+ "resnet_skip_time_act": false,
51
+ "resnet_time_scale_shift": "default",
52
+ "reverse_transformer_layers_per_block": null,
53
+ "sample_size": 64,
54
+ "time_cond_proj_dim": null,
55
+ "time_embedding_act_fn": null,
56
+ "time_embedding_dim": null,
57
+ "time_embedding_type": "positional",
58
+ "timestep_post_act": null,
59
+ "transformer_layers_per_block": 1,
60
+ "up_block_types": [
61
+ "UpBlock2D",
62
+ "CrossAttnUpBlock2D",
63
+ "CrossAttnUpBlock2D",
64
+ "CrossAttnUpBlock2D"
65
+ ],
66
+ "upcast_attention": false,
67
+ "use_linear_projection": false
68
+ }
unet/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46715ee2929656e1bf956a5241cedffe70513bb76df7d6d6a4d353beb317cdfb
3
+ size 3438167536
vae/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.31.0.dev0",
4
+ "_name_or_path": "runwayml/stable-diffusion-v1-5",
5
+ "act_fn": "silu",
6
+ "block_out_channels": [
7
+ 128,
8
+ 256,
9
+ 512,
10
+ 512
11
+ ],
12
+ "down_block_types": [
13
+ "DownEncoderBlock2D",
14
+ "DownEncoderBlock2D",
15
+ "DownEncoderBlock2D",
16
+ "DownEncoderBlock2D"
17
+ ],
18
+ "force_upcast": true,
19
+ "in_channels": 3,
20
+ "latent_channels": 4,
21
+ "latents_mean": null,
22
+ "latents_std": null,
23
+ "layers_per_block": 2,
24
+ "mid_block_add_attention": true,
25
+ "norm_num_groups": 32,
26
+ "out_channels": 3,
27
+ "sample_size": 512,
28
+ "scaling_factor": 0.18215,
29
+ "shift_factor": null,
30
+ "up_block_types": [
31
+ "UpDecoderBlock2D",
32
+ "UpDecoderBlock2D",
33
+ "UpDecoderBlock2D",
34
+ "UpDecoderBlock2D"
35
+ ],
36
+ "use_post_quant_conv": true,
37
+ "use_quant_conv": true
38
+ }
vae/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fbcf0ebe55a0984f5a5e00d8c4521d52359af7229bb4d81890039d2aa16dd7c
3
+ size 167335342
val_imgs_grid.png ADDED