|
{ |
|
"best_metric": 0.5740740740740741, |
|
"best_model_checkpoint": "./results/Vit-CBIS/checkpoint-330", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 495, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06060606060606061, |
|
"grad_norm": 0.9134721755981445, |
|
"learning_rate": 2.9393939393939394e-05, |
|
"loss": 0.6735, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12121212121212122, |
|
"grad_norm": 0.41935786604881287, |
|
"learning_rate": 2.8787878787878788e-05, |
|
"loss": 0.7149, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18181818181818182, |
|
"grad_norm": 2.244313955307007, |
|
"learning_rate": 2.8181818181818185e-05, |
|
"loss": 0.6885, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.24242424242424243, |
|
"grad_norm": 0.9235630035400391, |
|
"learning_rate": 2.7575757575757578e-05, |
|
"loss": 0.6889, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.30303030303030304, |
|
"grad_norm": 0.14859230816364288, |
|
"learning_rate": 2.696969696969697e-05, |
|
"loss": 0.697, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.36363636363636365, |
|
"grad_norm": 1.2009950876235962, |
|
"learning_rate": 2.6363636363636365e-05, |
|
"loss": 0.6882, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.42424242424242425, |
|
"grad_norm": 0.5788372159004211, |
|
"learning_rate": 2.575757575757576e-05, |
|
"loss": 0.6862, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.48484848484848486, |
|
"grad_norm": 0.324853777885437, |
|
"learning_rate": 2.5151515151515152e-05, |
|
"loss": 0.6898, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5454545454545454, |
|
"grad_norm": 0.8690597414970398, |
|
"learning_rate": 2.454545454545455e-05, |
|
"loss": 0.6813, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6060606060606061, |
|
"grad_norm": 0.9127840995788574, |
|
"learning_rate": 2.3939393939393942e-05, |
|
"loss": 0.7099, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.8866621851921082, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 0.6999, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 0.705461323261261, |
|
"learning_rate": 2.272727272727273e-05, |
|
"loss": 0.6943, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7878787878787878, |
|
"grad_norm": 1.2404519319534302, |
|
"learning_rate": 2.212121212121212e-05, |
|
"loss": 0.6866, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8484848484848485, |
|
"grad_norm": 0.7197526693344116, |
|
"learning_rate": 2.1515151515151513e-05, |
|
"loss": 0.6921, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 0.5975974798202515, |
|
"learning_rate": 2.090909090909091e-05, |
|
"loss": 0.6876, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9696969696969697, |
|
"grad_norm": 0.814386248588562, |
|
"learning_rate": 2.0303030303030303e-05, |
|
"loss": 0.7022, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4470899470899471, |
|
"eval_loss": 0.6970731616020203, |
|
"eval_runtime": 35.3817, |
|
"eval_samples_per_second": 10.683, |
|
"eval_steps_per_second": 1.357, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.0303030303030303, |
|
"grad_norm": 0.6936383843421936, |
|
"learning_rate": 1.9696969696969697e-05, |
|
"loss": 0.6918, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.0909090909090908, |
|
"grad_norm": 0.5651612281799316, |
|
"learning_rate": 1.909090909090909e-05, |
|
"loss": 0.6903, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.1515151515151516, |
|
"grad_norm": 0.1796492338180542, |
|
"learning_rate": 1.8484848484848484e-05, |
|
"loss": 0.7095, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.2121212121212122, |
|
"grad_norm": 0.6916122436523438, |
|
"learning_rate": 1.7878787878787877e-05, |
|
"loss": 0.6983, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.2727272727272727, |
|
"grad_norm": 1.9430723190307617, |
|
"learning_rate": 1.7272727272727274e-05, |
|
"loss": 0.6975, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 0.5348889827728271, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.6886, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.393939393939394, |
|
"grad_norm": 0.644706130027771, |
|
"learning_rate": 1.606060606060606e-05, |
|
"loss": 0.6841, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.4545454545454546, |
|
"grad_norm": 2.1170523166656494, |
|
"learning_rate": 1.5454545454545454e-05, |
|
"loss": 0.7109, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.5151515151515151, |
|
"grad_norm": 0.6115465760231018, |
|
"learning_rate": 1.484848484848485e-05, |
|
"loss": 0.6882, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.5757575757575757, |
|
"grad_norm": 0.8241686820983887, |
|
"learning_rate": 1.4242424242424243e-05, |
|
"loss": 0.6981, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.6363636363636362, |
|
"grad_norm": 1.836000680923462, |
|
"learning_rate": 1.3636363636363637e-05, |
|
"loss": 0.6897, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.696969696969697, |
|
"grad_norm": 0.6261163949966431, |
|
"learning_rate": 1.3030303030303032e-05, |
|
"loss": 0.6932, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.7575757575757576, |
|
"grad_norm": 0.7731136679649353, |
|
"learning_rate": 1.2424242424242425e-05, |
|
"loss": 0.6859, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.8181818181818183, |
|
"grad_norm": 0.28496983647346497, |
|
"learning_rate": 1.1818181818181819e-05, |
|
"loss": 0.6845, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.878787878787879, |
|
"grad_norm": 0.30313462018966675, |
|
"learning_rate": 1.1212121212121212e-05, |
|
"loss": 0.6861, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.9393939393939394, |
|
"grad_norm": 0.7996814846992493, |
|
"learning_rate": 1.0606060606060606e-05, |
|
"loss": 0.6988, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.020075798034668, |
|
"learning_rate": 9.999999999999999e-06, |
|
"loss": 0.6895, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5740740740740741, |
|
"eval_loss": 0.6877079606056213, |
|
"eval_runtime": 35.5495, |
|
"eval_samples_per_second": 10.633, |
|
"eval_steps_per_second": 1.35, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.0606060606060606, |
|
"grad_norm": 0.3215593695640564, |
|
"learning_rate": 9.393939393939394e-06, |
|
"loss": 0.7026, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.121212121212121, |
|
"grad_norm": 0.6477654576301575, |
|
"learning_rate": 8.787878787878788e-06, |
|
"loss": 0.6873, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.1818181818181817, |
|
"grad_norm": 0.27149632573127747, |
|
"learning_rate": 8.181818181818181e-06, |
|
"loss": 0.6823, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.242424242424242, |
|
"grad_norm": 0.7159335017204285, |
|
"learning_rate": 7.5757575757575764e-06, |
|
"loss": 0.7014, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.303030303030303, |
|
"grad_norm": 0.2240850031375885, |
|
"learning_rate": 6.96969696969697e-06, |
|
"loss": 0.6903, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.3636363636363638, |
|
"grad_norm": 1.4085216522216797, |
|
"learning_rate": 6.363636363636364e-06, |
|
"loss": 0.7011, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.4242424242424243, |
|
"grad_norm": 0.6638109087944031, |
|
"learning_rate": 5.757575757575758e-06, |
|
"loss": 0.6842, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.484848484848485, |
|
"grad_norm": 0.7225008606910706, |
|
"learning_rate": 5.151515151515151e-06, |
|
"loss": 0.6887, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.5454545454545454, |
|
"grad_norm": 0.23257039487361908, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"loss": 0.6993, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.606060606060606, |
|
"grad_norm": 0.1906505525112152, |
|
"learning_rate": 3.93939393939394e-06, |
|
"loss": 0.6972, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 0.487804651260376, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.6879, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.7272727272727275, |
|
"grad_norm": 0.8791880011558533, |
|
"learning_rate": 2.7272727272727272e-06, |
|
"loss": 0.6917, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.787878787878788, |
|
"grad_norm": 0.1857946664094925, |
|
"learning_rate": 2.121212121212121e-06, |
|
"loss": 0.6911, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.8484848484848486, |
|
"grad_norm": 1.3687998056411743, |
|
"learning_rate": 1.5151515151515152e-06, |
|
"loss": 0.6889, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.909090909090909, |
|
"grad_norm": 0.6968662738800049, |
|
"learning_rate": 9.090909090909091e-07, |
|
"loss": 0.6969, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.9696969696969697, |
|
"grad_norm": 0.19741572439670563, |
|
"learning_rate": 3.0303030303030305e-07, |
|
"loss": 0.6969, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5264550264550265, |
|
"eval_loss": 0.6918376684188843, |
|
"eval_runtime": 35.4925, |
|
"eval_samples_per_second": 10.65, |
|
"eval_steps_per_second": 1.352, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 495, |
|
"total_flos": 3.064033269360968e+17, |
|
"train_loss": 0.6929814497629802, |
|
"train_runtime": 561.8993, |
|
"train_samples_per_second": 7.037, |
|
"train_steps_per_second": 0.881 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 495, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.064033269360968e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|