pxovela commited on
Commit
9264448
·
1 Parent(s): ad73752

training parameters

Browse files
Training Parameters/events.out.tfevents.1684502811.irakli-pc.22776.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18f36d9dccfa3ec97d33f6d5fbee74c807f042b57c094a5c88fb5d70fbc463e9
3
+ size 69271642
Training Parameters/optimizer.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "doc": {
3
+ "optimizer": "adamw, adamw8bit, lion",
4
+ "optimizer_desc": "'adamw' in standard 32bit, 'adamw8bit' is bitsandbytes, 'lion' is lucidrains",
5
+ "lr": "learning rate, if null wil use CLI or main JSON config value",
6
+ "betas": "exponential decay rates for the moment estimates",
7
+ "epsilon": "value added to denominator for numerical stability, unused for lion",
8
+ "weight_decay": "weight decay (L2 penalty)",
9
+ "text_encoder_lr_scale": "scale the text encoder LR relative to the Unet LR. for example, if `lr` is 2e-6 and `text_encoder_lr_scale` is 0.5, the text encoder's LR will be set to `1e-6`."
10
+ },
11
+ "optimizer": "adamw8bit",
12
+ "lr": 1e-6,
13
+ "betas": [0.9, 0.999],
14
+ "epsilon": 1e-8,
15
+ "weight_decay": 0.010,
16
+ "text_encoder_lr_scale": 1.0
17
+ }
Training Parameters/train.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "amp": false,
3
+ "batch_size": 4,
4
+ "ckpt_every_n_minutes": null,
5
+ "clip_grad_norm": null,
6
+ "clip_skip": 0,
7
+ "cond_dropout": 0.05,
8
+ "data_root": "D:\\ED2\\EveryDream2trainer\\input\\vokda_v3_training",
9
+ "disable_textenc_training": false,
10
+ "disable_xformers": false,
11
+ "flip_p": 0.0,
12
+ "gpuid": 0,
13
+ "gradient_checkpointing": true,
14
+ "grad_accum": 1,
15
+ "logdir": "logs",
16
+ "log_step": 1,
17
+ "lowvram": false,
18
+ "lr": 1.5e-07,
19
+ "lr_decay_steps": 0,
20
+ "lr_scheduler": "constant",
21
+ "lr_warmup_steps": 0,
22
+ "max_epochs": 100,
23
+ "notebook": false,
24
+ "optimizer_config": "optimizer.json",
25
+ "project_name": "vodka_v3",
26
+ "resolution": 512,
27
+ "resume_ckpt": "sd_v1-5_vae",
28
+ "run_name": null,
29
+ "sample_prompts": "sample_prompts.txt",
30
+ "sample_steps": 5000000,
31
+ "save_ckpt_dir": null,
32
+ "save_ckpts_from_n_epochs": 0,
33
+ "save_every_n_epochs": 25,
34
+ "save_optimizer": false,
35
+ "scale_lr": false,
36
+ "seed": 555,
37
+ "shuffle_tags": false,
38
+ "validation_config": "validation_default.json",
39
+ "wandb": false,
40
+ "write_schedule": false,
41
+ "rated_dataset": false,
42
+ "rated_dataset_target_dropout_percent": 50,
43
+ "zero_frequency_noise_ratio": 0.00,
44
+ "save_full_precision": false,
45
+ "disable_unet_training": false,
46
+ "rated_dataset_target_dropout_rate": 50,
47
+ "disable_amp": false,
48
+ "useadam8bit": false
49
+ }
Training Parameters/validation_default.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "documentation": {
3
+ "validate_training": "If true, validate the training using a separate set of image/caption pairs, and log the results as `loss/val`. The curve will trend downwards as the model trains, then flatten and start to trend upwards as effective training finishes and the model begins to overfit the training data. Very useful for preventing overfitting, for checking if your learning rate is too low or too high, and for deciding when to stop training.",
4
+ "val_split_mode": "Either 'automatic' or 'manual', ignored if validate_training is false. 'automatic' val_split_mode picks a random subset of the training set (the number of items is controlled by val_split_proportion) and removes them from training to use as a validation set. 'manual' val_split_mode lets you provide your own folder of validation items (images+captions), specified using 'val_data_root'.",
5
+ "val_split_proportion": "For 'automatic' val_split_mode, how much of the train dataset that should be removed to use for validation. Typical values are 0.15-0.2 (15-20% of the total dataset). Higher is more accurate but slower.",
6
+ "val_data_root": "For 'manual' val_split_mode, the path to a folder containing validation items.",
7
+ "stabilize_training_loss": "If true, stabilize the train loss curves for `loss/epoch` and `loss/log step` by re-calculating training loss with a fixed random seed, and log the results as `loss/train-stabilized`. This more clearly shows the training progress, but it is not enough alone to tell you if you're overfitting.",
8
+ "stabilize_split_proportion": "For stabilize_training_loss, the proportion of the train dataset to overlap for stabilizing the train loss graph. Typical values are 0.15-0.2 (15-20% of the total dataset). Higher is more accurate but slower.",
9
+ "every_n_epochs": "How often to run validation (1=every epoch).",
10
+ "seed": "The seed to use when running validation and stabilization passes."
11
+ },
12
+ "validate_training": true,
13
+ "val_split_mode": "automatic",
14
+ "val_data_root": null,
15
+ "val_split_proportion": 0.15,
16
+ "stabilize_training_loss": true,
17
+ "stabilize_split_proportion": 0.15,
18
+ "every_n_epochs": 1,
19
+ "seed": 555
20
+ }
Training Parameters/vodka_v3-20230519-212641.log ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 05/19/2023 09:26:41 PM Seed: 555
2
+ 05/19/2023 09:26:41 PM unet attention_head_dim: 8
3
+ 05/19/2023 09:26:41 PM Inferred yaml: v1-inference.yaml, attn: sd1, prediction_type: epsilon
4
+ 05/19/2023 09:26:50 PM Enabled xformers
5
+ 05/19/2023 09:26:51 PM Overriding LR from optimizer config with main config/cli LR setting: 1.5e-07
6
+ 05/19/2023 09:26:51 PM * Loaded optimizer args from optimizer.json *
7
+ 05/19/2023 09:26:51 PM  * Training Text and Unet *
8
+ 05/19/2023 09:26:51 PM  * Optimizer: AdamW8bit *
9
+ 05/19/2023 09:26:51 PM  unet lr: 1.5e-07, text encoder lr: 1.5e-07, betas: [0.9, 0.999], epsilon: 1e-08, weight_decay: 0.01 *
10
+ 05/19/2023 09:26:51 PM * DLMA resolution 512, buckets: [[512, 512], [576, 448], [448, 576], [640, 384], [384, 640], [768, 320], [320, 768], [896, 256], [256, 896], [1024, 256], [256, 1024]]
11
+ 05/19/2023 09:26:51 PM Preloading images...
12
+ 05/19/2023 09:27:50 PM * Removed 1629 images from the training set to use for validation
13
+ 05/19/2023 09:27:50 PM * DLMA initialized with 1629 images.
14
+ 05/19/2023 09:27:50 PM ** Dataset 'val': 410 batches, num_images: 1640, batch_size: 4
15
+ 05/19/2023 09:27:50 PM * DLMA initialized with 1385 images.
16
+ 05/19/2023 09:27:50 PM ** Dataset 'stabilize-train': 348 batches, num_images: 1392, batch_size: 4
17
+ 05/19/2023 09:27:50 PM * DLMA initialized with 9227 images.
18
+ 05/19/2023 09:27:51 PM ** Dataset 'train': 2311 batches, num_images: 9244, batch_size: 4
19
+ 05/19/2023 09:27:51 PM Pretraining GPU Memory: 5971 / 24576 MB
20
+ 05/19/2023 09:27:51 PM saving ckpts every 1000000000.0 minutes
21
+ 05/19/2023 09:27:51 PM saving ckpts every 25 epochs
22
+ 05/19/2023 09:27:51 PM unet device: cuda:0, precision: torch.float32, training: True
23
+ 05/19/2023 09:27:51 PM text_encoder device: cuda:0, precision: torch.float32, training: True
24
+ 05/19/2023 09:27:51 PM vae device: cuda:0, precision: torch.float16, training: False
25
+ 05/19/2023 09:27:51 PM scheduler: <class 'diffusers.schedulers.scheduling_ddpm.DDPMScheduler'>
26
+ 05/19/2023 09:27:51 PM Project name: vodka_v3
27
+ 05/19/2023 09:27:51 PM grad_accum: 1
28
+ 05/19/2023 09:27:51 PM batch_size: 4
29
+ 05/19/2023 09:27:51 PM epoch_len: 2311
30
+ 05/19/2023 09:27:51 PM Grad scaler enabled: True (amp mode)
31
+ 05/20/2023 10:37:37 AM Saving model, 25 epochs at step 57775
32
+ 05/20/2023 10:37:37 AM * Saving diffusers model to logs\vodka_v3_20230519-212641/ckpts/vodka_v3-ep25-gs57775
33
+ 05/20/2023 10:37:50 AM * Saving SD model to .\vodka_v3-ep25-gs57775.ckpt
34
+ 05/20/2023 11:50:02 PM Saving model, 25 epochs at step 115550
35
+ 05/20/2023 11:50:02 PM * Saving diffusers model to logs\vodka_v3_20230519-212641/ckpts/vodka_v3-ep50-gs115550
36
+ 05/20/2023 11:50:20 PM * Saving SD model to .\vodka_v3-ep50-gs115550.ckpt
37
+ 05/21/2023 09:22:18 AM Validation loss shows diverging
38
+ 05/21/2023 09:53:39 AM Validation loss shows diverging
39
+ 05/21/2023 01:03:47 PM Saving model, 25 epochs at step 173325
40
+ 05/21/2023 01:03:47 PM * Saving diffusers model to logs\vodka_v3_20230519-212641/ckpts/vodka_v3-ep75-gs173325
41
+ 05/21/2023 01:03:59 PM * Saving SD model to .\vodka_v3-ep75-gs173325.ckpt
42
+ 05/21/2023 07:23:34 PM Validation loss shows diverging
43
+ 05/21/2023 08:26:50 PM Validation loss shows diverging
44
+ 05/22/2023 01:14:39 AM Validation loss shows diverging
45
+ 05/22/2023 01:46:05 AM Validation loss shows diverging
46
+ 05/22/2023 02:17:23 AM Validation loss shows diverging
47
+ 05/22/2023 02:17:23 AM * Saving diffusers model to logs\vodka_v3_20230519-212641/ckpts/last-vodka_v3-ep99-gs231100
48
+ 05/22/2023 02:17:32 AM * Saving SD model to .\last-vodka_v3-ep99-gs231100.ckpt
49
+ 05/22/2023 02:17:43 AM Training complete
50
+ 05/22/2023 02:17:43 AM Total training time took 3169.86 minutes, total steps: 231100
51
+ 05/22/2023 02:17:43 AM Average epoch time: 28.11 minutes
52
+ 05/22/2023 02:17:43 AM  ***************************
53
+ 05/22/2023 02:17:43 AM  **** Finished training ****
54
+ 05/22/2023 02:17:43 AM  ***************************