JAWCF commited on
Commit
ef1b7df
·
1 Parent(s): 37c2f42
logs/text2image-fine-tune/1688268525.9759955/events.out.tfevents.1688268525.instance-2.1597.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb406acc3ddddc664a802c8df4dbf5e90505f5e02d3dfec504163b3b794a24a7
3
+ size 2201
logs/text2image-fine-tune/1688268525.9774733/hparams.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_beta1: 0.9
2
+ adam_beta2: 0.999
3
+ adam_epsilon: 1.0e-08
4
+ adam_weight_decay: 0.01
5
+ allow_tf32: false
6
+ cache_dir: null
7
+ caption_column: text
8
+ center_crop: true
9
+ checkpointing_steps: 500
10
+ checkpoints_total_limit: null
11
+ dataloader_num_workers: 0
12
+ dataset_config_name: null
13
+ dataset_name: JAWCF/characters
14
+ enable_xformers_memory_efficient_attention: false
15
+ gradient_accumulation_steps: 4
16
+ gradient_checkpointing: false
17
+ hub_model_id: null
18
+ hub_token: null
19
+ image_column: image
20
+ input_perturbation: 0
21
+ learning_rate: 1.0e-05
22
+ local_rank: -1
23
+ logging_dir: logs
24
+ lr_scheduler: constant
25
+ lr_warmup_steps: 0
26
+ max_grad_norm: 1.0
27
+ max_train_samples: null
28
+ max_train_steps: 5000
29
+ mixed_precision: null
30
+ noise_offset: 0
31
+ non_ema_revision: null
32
+ num_train_epochs: 200
33
+ output_dir: sd-pokemon-model
34
+ prediction_type: null
35
+ pretrained_model_name_or_path: stabilityai/stable-diffusion-2-1
36
+ push_to_hub: false
37
+ random_flip: true
38
+ report_to: tensorboard
39
+ resolution: 768
40
+ resume_from_checkpoint: null
41
+ revision: null
42
+ scale_lr: false
43
+ seed: null
44
+ snr_gamma: null
45
+ tracker_project_name: text2image-fine-tune
46
+ train_batch_size: 1
47
+ train_data_dir: null
48
+ use_8bit_adam: false
49
+ use_ema: true
50
+ validation_epochs: 5
logs/text2image-fine-tune/events.out.tfevents.1688268525.instance-2.1597.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d37ac6478a46508c2f30a1beb3d20089d2f0982253e5fbb900f302381c0d7071
3
+ size 244961
model_index.json CHANGED
@@ -5,14 +5,14 @@
5
  "transformers",
6
  "CLIPImageProcessor"
7
  ],
8
- "requires_safety_checker": true,
9
  "safety_checker": [
10
- "stable_diffusion",
11
- "StableDiffusionSafetyChecker"
12
  ],
13
  "scheduler": [
14
  "diffusers",
15
- "PNDMScheduler"
16
  ],
17
  "text_encoder": [
18
  "transformers",
 
5
  "transformers",
6
  "CLIPImageProcessor"
7
  ],
8
+ "requires_safety_checker": false,
9
  "safety_checker": [
10
+ null,
11
+ null
12
  ],
13
  "scheduler": [
14
  "diffusers",
15
+ "DDIMScheduler"
16
  ],
17
  "text_encoder": [
18
  "transformers",
scheduler/scheduler_config.json CHANGED
@@ -1,14 +1,20 @@
1
  {
2
- "_class_name": "PNDMScheduler",
3
  "_diffusers_version": "0.18.0.dev0",
4
  "beta_end": 0.012,
5
  "beta_schedule": "scaled_linear",
6
  "beta_start": 0.00085,
7
  "clip_sample": false,
 
 
8
  "num_train_timesteps": 1000,
9
- "prediction_type": "epsilon",
 
 
10
  "set_alpha_to_one": false,
11
  "skip_prk_steps": true,
12
  "steps_offset": 1,
 
 
13
  "trained_betas": null
14
  }
 
1
  {
2
+ "_class_name": "DDIMScheduler",
3
  "_diffusers_version": "0.18.0.dev0",
4
  "beta_end": 0.012,
5
  "beta_schedule": "scaled_linear",
6
  "beta_start": 0.00085,
7
  "clip_sample": false,
8
+ "clip_sample_range": 1.0,
9
+ "dynamic_thresholding_ratio": 0.995,
10
  "num_train_timesteps": 1000,
11
+ "prediction_type": "v_prediction",
12
+ "rescale_betas_zero_snr": false,
13
+ "sample_max_value": 1.0,
14
  "set_alpha_to_one": false,
15
  "skip_prk_steps": true,
16
  "steps_offset": 1,
17
+ "thresholding": false,
18
+ "timestep_spacing": "leading",
19
  "trained_betas": null
20
  }
text_encoder/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "CompVis/stable-diffusion-v1-4",
3
  "architectures": [
4
  "CLIPTextModel"
5
  ],
@@ -7,16 +7,16 @@
7
  "bos_token_id": 0,
8
  "dropout": 0.0,
9
  "eos_token_id": 2,
10
- "hidden_act": "quick_gelu",
11
- "hidden_size": 768,
12
  "initializer_factor": 1.0,
13
  "initializer_range": 0.02,
14
- "intermediate_size": 3072,
15
  "layer_norm_eps": 1e-05,
16
  "max_position_embeddings": 77,
17
  "model_type": "clip_text_model",
18
- "num_attention_heads": 12,
19
- "num_hidden_layers": 12,
20
  "pad_token_id": 1,
21
  "projection_dim": 512,
22
  "torch_dtype": "float16",
 
1
  {
2
+ "_name_or_path": "stabilityai/stable-diffusion-2-1",
3
  "architectures": [
4
  "CLIPTextModel"
5
  ],
 
7
  "bos_token_id": 0,
8
  "dropout": 0.0,
9
  "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_size": 1024,
12
  "initializer_factor": 1.0,
13
  "initializer_range": 0.02,
14
+ "intermediate_size": 4096,
15
  "layer_norm_eps": 1e-05,
16
  "max_position_embeddings": 77,
17
  "model_type": "clip_text_model",
18
+ "num_attention_heads": 16,
19
+ "num_hidden_layers": 23,
20
  "pad_token_id": 1,
21
  "projection_dim": 512,
22
  "torch_dtype": "float16",
text_encoder/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a34f30098988d85dc0fb0fc272a842ebcf552e2ebc6ce4adbcf3695d08e8a90
3
- size 246188833
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2a06cf32cf585d03b55fef302142a5321b761ec440113925f64f4ceaffc7730
3
+ size 680904225
tokenizer/special_tokens_map.json CHANGED
@@ -13,7 +13,7 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "<|endoftext|>",
17
  "unk_token": {
18
  "content": "<|endoftext|>",
19
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": "!",
17
  "unk_token": {
18
  "content": "<|endoftext|>",
19
  "lstrip": false,
unet/config.json CHANGED
@@ -1,11 +1,16 @@
1
  {
2
  "_class_name": "UNet2DConditionModel",
3
  "_diffusers_version": "0.18.0.dev0",
4
- "_name_or_path": "CompVis/stable-diffusion-v1-4",
5
  "act_fn": "silu",
6
  "addition_embed_type": null,
7
  "addition_embed_type_num_heads": 64,
8
- "attention_head_dim": 8,
 
 
 
 
 
9
  "block_out_channels": [
10
  320,
11
  640,
@@ -17,7 +22,7 @@
17
  "class_embeddings_concat": false,
18
  "conv_in_kernel": 3,
19
  "conv_out_kernel": 3,
20
- "cross_attention_dim": 768,
21
  "cross_attention_norm": null,
22
  "down_block_types": [
23
  "CrossAttnDownBlock2D",
@@ -45,7 +50,7 @@
45
  "resnet_out_scale_factor": 1.0,
46
  "resnet_skip_time_act": false,
47
  "resnet_time_scale_shift": "default",
48
- "sample_size": 64,
49
  "time_cond_proj_dim": null,
50
  "time_embedding_act_fn": null,
51
  "time_embedding_dim": null,
@@ -57,6 +62,6 @@
57
  "CrossAttnUpBlock2D",
58
  "CrossAttnUpBlock2D"
59
  ],
60
- "upcast_attention": false,
61
- "use_linear_projection": false
62
  }
 
1
  {
2
  "_class_name": "UNet2DConditionModel",
3
  "_diffusers_version": "0.18.0.dev0",
4
+ "_name_or_path": "stabilityai/stable-diffusion-2-1",
5
  "act_fn": "silu",
6
  "addition_embed_type": null,
7
  "addition_embed_type_num_heads": 64,
8
+ "attention_head_dim": [
9
+ 5,
10
+ 10,
11
+ 20,
12
+ 20
13
+ ],
14
  "block_out_channels": [
15
  320,
16
  640,
 
22
  "class_embeddings_concat": false,
23
  "conv_in_kernel": 3,
24
  "conv_out_kernel": 3,
25
+ "cross_attention_dim": 1024,
26
  "cross_attention_norm": null,
27
  "down_block_types": [
28
  "CrossAttnDownBlock2D",
 
50
  "resnet_out_scale_factor": 1.0,
51
  "resnet_skip_time_act": false,
52
  "resnet_time_scale_shift": "default",
53
+ "sample_size": 96,
54
  "time_cond_proj_dim": null,
55
  "time_embedding_act_fn": null,
56
  "time_embedding_dim": null,
 
62
  "CrossAttnUpBlock2D",
63
  "CrossAttnUpBlock2D"
64
  ],
65
+ "upcast_attention": true,
66
+ "use_linear_projection": true
67
  }
unet/diffusion_pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d28540e6d1458dc018ba11277f20dc1ac38b328e356453317fe97c151d469c1
3
- size 3438375973
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:712ef4aed1c65cca86898dbf038b9e0f1bf362232a15e7003e1f28fb355b4045
3
+ size 3463934693
vae/config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "_class_name": "AutoencoderKL",
3
  "_diffusers_version": "0.18.0.dev0",
4
- "_name_or_path": "CompVis/stable-diffusion-v1-4",
5
  "act_fn": "silu",
6
  "block_out_channels": [
7
  128,
@@ -20,7 +20,7 @@
20
  "layers_per_block": 2,
21
  "norm_num_groups": 32,
22
  "out_channels": 3,
23
- "sample_size": 512,
24
  "scaling_factor": 0.18215,
25
  "up_block_types": [
26
  "UpDecoderBlock2D",
 
1
  {
2
  "_class_name": "AutoencoderKL",
3
  "_diffusers_version": "0.18.0.dev0",
4
+ "_name_or_path": "stabilityai/stable-diffusion-2-1",
5
  "act_fn": "silu",
6
  "block_out_channels": [
7
  128,
 
20
  "layers_per_block": 2,
21
  "norm_num_groups": 32,
22
  "out_channels": 3,
23
+ "sample_size": 768,
24
  "scaling_factor": 0.18215,
25
  "up_block_types": [
26
  "UpDecoderBlock2D",
vae/diffusion_pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa8c1b74b3e2781e4347b9b350203597674d8860a4338b46431de760c3a5dd22
3
  size 167407857
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cfdd672df17db3283633acb3721afc7735927293c2d3bd2bf64939a6dcd950e
3
  size 167407857