Image-classification / trainer_state.json
ILT37's picture
Upload 8 files
9ac98af verified
{
"best_metric": 0.9142091152815014,
"best_model_checkpoint": "pokemon_models\\checkpoint-1610",
"epoch": 23.0,
"eval_steps": 500,
"global_step": 1610,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.29,
"learning_rate": 5e-06,
"loss": 5.0145,
"step": 20
},
{
"epoch": 0.57,
"learning_rate": 1e-05,
"loss": 5.0039,
"step": 40
},
{
"epoch": 0.86,
"learning_rate": 1.5e-05,
"loss": 4.9942,
"step": 60
},
{
"epoch": 1.0,
"eval_accuracy": 0.01876675603217158,
"eval_loss": 4.973499298095703,
"eval_runtime": 102.0829,
"eval_samples_per_second": 10.962,
"eval_steps_per_second": 0.686,
"step": 70
},
{
"epoch": 1.14,
"learning_rate": 2e-05,
"loss": 4.97,
"step": 80
},
{
"epoch": 1.43,
"learning_rate": 2.5e-05,
"loss": 4.9313,
"step": 100
},
{
"epoch": 1.71,
"learning_rate": 3e-05,
"loss": 4.893,
"step": 120
},
{
"epoch": 2.0,
"learning_rate": 3.5e-05,
"loss": 4.8374,
"step": 140
},
{
"epoch": 2.0,
"eval_accuracy": 0.20196604110813227,
"eval_loss": 4.816006660461426,
"eval_runtime": 124.2897,
"eval_samples_per_second": 9.003,
"eval_steps_per_second": 0.563,
"step": 140
},
{
"epoch": 2.29,
"learning_rate": 4e-05,
"loss": 4.7329,
"step": 160
},
{
"epoch": 2.57,
"learning_rate": 4.5e-05,
"loss": 4.6472,
"step": 180
},
{
"epoch": 2.86,
"learning_rate": 5e-05,
"loss": 4.541,
"step": 200
},
{
"epoch": 3.0,
"eval_accuracy": 0.5495978552278821,
"eval_loss": 4.4448018074035645,
"eval_runtime": 101.0357,
"eval_samples_per_second": 11.075,
"eval_steps_per_second": 0.693,
"step": 210
},
{
"epoch": 3.14,
"learning_rate": 4.9444444444444446e-05,
"loss": 4.4117,
"step": 220
},
{
"epoch": 3.43,
"learning_rate": 4.888888888888889e-05,
"loss": 4.2454,
"step": 240
},
{
"epoch": 3.71,
"learning_rate": 4.8333333333333334e-05,
"loss": 4.1227,
"step": 260
},
{
"epoch": 4.0,
"learning_rate": 4.7777777777777784e-05,
"loss": 4.0198,
"step": 280
},
{
"epoch": 4.0,
"eval_accuracy": 0.7042001787310098,
"eval_loss": 4.0061211585998535,
"eval_runtime": 100.1956,
"eval_samples_per_second": 11.168,
"eval_steps_per_second": 0.699,
"step": 280
},
{
"epoch": 4.29,
"learning_rate": 4.722222222222222e-05,
"loss": 3.84,
"step": 300
},
{
"epoch": 4.57,
"learning_rate": 4.666666666666667e-05,
"loss": 3.757,
"step": 320
},
{
"epoch": 4.86,
"learning_rate": 4.6111111111111115e-05,
"loss": 3.6626,
"step": 340
},
{
"epoch": 5.0,
"eval_accuracy": 0.7605004468275246,
"eval_loss": 3.630556106567383,
"eval_runtime": 100.0509,
"eval_samples_per_second": 11.184,
"eval_steps_per_second": 0.7,
"step": 350
},
{
"epoch": 5.14,
"learning_rate": 4.555555555555556e-05,
"loss": 3.5477,
"step": 360
},
{
"epoch": 5.43,
"learning_rate": 4.5e-05,
"loss": 3.3914,
"step": 380
},
{
"epoch": 5.71,
"learning_rate": 4.4444444444444447e-05,
"loss": 3.3164,
"step": 400
},
{
"epoch": 6.0,
"learning_rate": 4.388888888888889e-05,
"loss": 3.2654,
"step": 420
},
{
"epoch": 6.0,
"eval_accuracy": 0.7971403038427167,
"eval_loss": 3.3061511516571045,
"eval_runtime": 99.8013,
"eval_samples_per_second": 11.212,
"eval_steps_per_second": 0.701,
"step": 420
},
{
"epoch": 6.29,
"learning_rate": 4.3333333333333334e-05,
"loss": 3.1041,
"step": 440
},
{
"epoch": 6.57,
"learning_rate": 4.277777777777778e-05,
"loss": 3.0193,
"step": 460
},
{
"epoch": 6.86,
"learning_rate": 4.222222222222222e-05,
"loss": 2.9314,
"step": 480
},
{
"epoch": 7.0,
"eval_accuracy": 0.8310991957104558,
"eval_loss": 2.994609832763672,
"eval_runtime": 106.5638,
"eval_samples_per_second": 10.501,
"eval_steps_per_second": 0.657,
"step": 490
},
{
"epoch": 7.14,
"learning_rate": 4.166666666666667e-05,
"loss": 2.871,
"step": 500
},
{
"epoch": 7.43,
"learning_rate": 4.111111111111111e-05,
"loss": 2.7418,
"step": 520
},
{
"epoch": 7.71,
"learning_rate": 4.055555555555556e-05,
"loss": 2.6542,
"step": 540
},
{
"epoch": 8.0,
"learning_rate": 4e-05,
"loss": 2.5893,
"step": 560
},
{
"epoch": 8.0,
"eval_accuracy": 0.8507596067917784,
"eval_loss": 2.7318336963653564,
"eval_runtime": 125.4233,
"eval_samples_per_second": 8.922,
"eval_steps_per_second": 0.558,
"step": 560
},
{
"epoch": 8.29,
"learning_rate": 3.944444444444445e-05,
"loss": 2.5106,
"step": 580
},
{
"epoch": 8.57,
"learning_rate": 3.888888888888889e-05,
"loss": 2.4358,
"step": 600
},
{
"epoch": 8.86,
"learning_rate": 3.8333333333333334e-05,
"loss": 2.3645,
"step": 620
},
{
"epoch": 9.0,
"eval_accuracy": 0.8579088471849866,
"eval_loss": 2.4826338291168213,
"eval_runtime": 121.4568,
"eval_samples_per_second": 9.213,
"eval_steps_per_second": 0.576,
"step": 630
},
{
"epoch": 9.14,
"learning_rate": 3.777777777777778e-05,
"loss": 2.2831,
"step": 640
},
{
"epoch": 9.43,
"learning_rate": 3.722222222222222e-05,
"loss": 2.2297,
"step": 660
},
{
"epoch": 9.71,
"learning_rate": 3.6666666666666666e-05,
"loss": 2.1367,
"step": 680
},
{
"epoch": 10.0,
"learning_rate": 3.611111111111111e-05,
"loss": 2.0793,
"step": 700
},
{
"epoch": 10.0,
"eval_accuracy": 0.871313672922252,
"eval_loss": 2.245124578475952,
"eval_runtime": 122.6079,
"eval_samples_per_second": 9.127,
"eval_steps_per_second": 0.571,
"step": 700
},
{
"epoch": 10.29,
"learning_rate": 3.555555555555556e-05,
"loss": 1.9796,
"step": 720
},
{
"epoch": 10.57,
"learning_rate": 3.5e-05,
"loss": 1.9471,
"step": 740
},
{
"epoch": 10.86,
"learning_rate": 3.444444444444445e-05,
"loss": 1.8754,
"step": 760
},
{
"epoch": 11.0,
"eval_accuracy": 0.871313672922252,
"eval_loss": 2.060222625732422,
"eval_runtime": 122.2722,
"eval_samples_per_second": 9.152,
"eval_steps_per_second": 0.572,
"step": 770
},
{
"epoch": 11.14,
"learning_rate": 3.388888888888889e-05,
"loss": 1.8259,
"step": 780
},
{
"epoch": 11.43,
"learning_rate": 3.3333333333333335e-05,
"loss": 1.7872,
"step": 800
},
{
"epoch": 11.71,
"learning_rate": 3.277777777777778e-05,
"loss": 1.6884,
"step": 820
},
{
"epoch": 12.0,
"learning_rate": 3.222222222222223e-05,
"loss": 1.6703,
"step": 840
},
{
"epoch": 12.0,
"eval_accuracy": 0.8811438784629133,
"eval_loss": 1.872039556503296,
"eval_runtime": 98.0421,
"eval_samples_per_second": 11.413,
"eval_steps_per_second": 0.714,
"step": 840
},
{
"epoch": 12.29,
"learning_rate": 3.1666666666666666e-05,
"loss": 1.6003,
"step": 860
},
{
"epoch": 12.57,
"learning_rate": 3.111111111111111e-05,
"loss": 1.5433,
"step": 880
},
{
"epoch": 12.86,
"learning_rate": 3.055555555555556e-05,
"loss": 1.5198,
"step": 900
},
{
"epoch": 13.0,
"eval_accuracy": 0.8900804289544236,
"eval_loss": 1.7361352443695068,
"eval_runtime": 97.0673,
"eval_samples_per_second": 11.528,
"eval_steps_per_second": 0.721,
"step": 910
},
{
"epoch": 13.14,
"learning_rate": 3e-05,
"loss": 1.4742,
"step": 920
},
{
"epoch": 13.43,
"learning_rate": 2.9444444444444448e-05,
"loss": 1.3876,
"step": 940
},
{
"epoch": 13.71,
"learning_rate": 2.8888888888888888e-05,
"loss": 1.3603,
"step": 960
},
{
"epoch": 14.0,
"learning_rate": 2.8333333333333335e-05,
"loss": 1.329,
"step": 980
},
{
"epoch": 14.0,
"eval_accuracy": 0.900804289544236,
"eval_loss": 1.563855528831482,
"eval_runtime": 97.4399,
"eval_samples_per_second": 11.484,
"eval_steps_per_second": 0.718,
"step": 980
},
{
"epoch": 14.29,
"learning_rate": 2.777777777777778e-05,
"loss": 1.2523,
"step": 1000
},
{
"epoch": 14.57,
"learning_rate": 2.7222222222222223e-05,
"loss": 1.2747,
"step": 1020
},
{
"epoch": 14.86,
"learning_rate": 2.6666666666666667e-05,
"loss": 1.203,
"step": 1040
},
{
"epoch": 15.0,
"eval_accuracy": 0.8927613941018767,
"eval_loss": 1.4685680866241455,
"eval_runtime": 96.9819,
"eval_samples_per_second": 11.538,
"eval_steps_per_second": 0.722,
"step": 1050
},
{
"epoch": 15.14,
"learning_rate": 2.6111111111111114e-05,
"loss": 1.1697,
"step": 1060
},
{
"epoch": 15.43,
"learning_rate": 2.5555555555555554e-05,
"loss": 1.0943,
"step": 1080
},
{
"epoch": 15.71,
"learning_rate": 2.5e-05,
"loss": 1.0947,
"step": 1100
},
{
"epoch": 16.0,
"learning_rate": 2.4444444444444445e-05,
"loss": 1.104,
"step": 1120
},
{
"epoch": 16.0,
"eval_accuracy": 0.8981233243967829,
"eval_loss": 1.3596620559692383,
"eval_runtime": 97.1177,
"eval_samples_per_second": 11.522,
"eval_steps_per_second": 0.721,
"step": 1120
},
{
"epoch": 16.29,
"learning_rate": 2.3888888888888892e-05,
"loss": 1.0113,
"step": 1140
},
{
"epoch": 16.57,
"learning_rate": 2.3333333333333336e-05,
"loss": 1.0285,
"step": 1160
},
{
"epoch": 16.86,
"learning_rate": 2.277777777777778e-05,
"loss": 0.9682,
"step": 1180
},
{
"epoch": 17.0,
"eval_accuracy": 0.8990169794459338,
"eval_loss": 1.2199994325637817,
"eval_runtime": 486.7671,
"eval_samples_per_second": 2.299,
"eval_steps_per_second": 0.144,
"step": 1190
},
{
"epoch": 17.14,
"learning_rate": 2.2222222222222223e-05,
"loss": 0.9578,
"step": 1200
},
{
"epoch": 17.43,
"learning_rate": 2.1666666666666667e-05,
"loss": 0.9403,
"step": 1220
},
{
"epoch": 17.71,
"learning_rate": 2.111111111111111e-05,
"loss": 0.8924,
"step": 1240
},
{
"epoch": 18.0,
"learning_rate": 2.0555555555555555e-05,
"loss": 0.872,
"step": 1260
},
{
"epoch": 18.0,
"eval_accuracy": 0.903485254691689,
"eval_loss": 1.1389293670654297,
"eval_runtime": 110.8112,
"eval_samples_per_second": 10.098,
"eval_steps_per_second": 0.632,
"step": 1260
},
{
"epoch": 18.29,
"learning_rate": 2e-05,
"loss": 0.8312,
"step": 1280
},
{
"epoch": 18.57,
"learning_rate": 1.9444444444444445e-05,
"loss": 0.8201,
"step": 1300
},
{
"epoch": 18.86,
"learning_rate": 1.888888888888889e-05,
"loss": 0.844,
"step": 1320
},
{
"epoch": 19.0,
"eval_accuracy": 0.9124218051831993,
"eval_loss": 1.0643764734268188,
"eval_runtime": 109.2391,
"eval_samples_per_second": 10.244,
"eval_steps_per_second": 0.641,
"step": 1330
},
{
"epoch": 19.14,
"learning_rate": 1.8333333333333333e-05,
"loss": 0.8116,
"step": 1340
},
{
"epoch": 19.43,
"learning_rate": 1.777777777777778e-05,
"loss": 0.7649,
"step": 1360
},
{
"epoch": 19.71,
"learning_rate": 1.7222222222222224e-05,
"loss": 0.7402,
"step": 1380
},
{
"epoch": 20.0,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.7605,
"step": 1400
},
{
"epoch": 20.0,
"eval_accuracy": 0.9088471849865952,
"eval_loss": 1.0364218950271606,
"eval_runtime": 108.8495,
"eval_samples_per_second": 10.28,
"eval_steps_per_second": 0.643,
"step": 1400
},
{
"epoch": 20.29,
"learning_rate": 1.6111111111111115e-05,
"loss": 0.7156,
"step": 1420
},
{
"epoch": 20.57,
"learning_rate": 1.5555555555555555e-05,
"loss": 0.7109,
"step": 1440
},
{
"epoch": 20.86,
"learning_rate": 1.5e-05,
"loss": 0.7244,
"step": 1460
},
{
"epoch": 21.0,
"eval_accuracy": 0.902591599642538,
"eval_loss": 0.9655722379684448,
"eval_runtime": 106.989,
"eval_samples_per_second": 10.459,
"eval_steps_per_second": 0.654,
"step": 1470
},
{
"epoch": 21.14,
"learning_rate": 1.4444444444444444e-05,
"loss": 0.6925,
"step": 1480
},
{
"epoch": 21.43,
"learning_rate": 1.388888888888889e-05,
"loss": 0.6687,
"step": 1500
},
{
"epoch": 21.71,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.658,
"step": 1520
},
{
"epoch": 22.0,
"learning_rate": 1.2777777777777777e-05,
"loss": 0.6595,
"step": 1540
},
{
"epoch": 22.0,
"eval_accuracy": 0.9133154602323503,
"eval_loss": 0.9125866889953613,
"eval_runtime": 106.9609,
"eval_samples_per_second": 10.462,
"eval_steps_per_second": 0.654,
"step": 1540
},
{
"epoch": 22.29,
"learning_rate": 1.2222222222222222e-05,
"loss": 0.6489,
"step": 1560
},
{
"epoch": 22.57,
"learning_rate": 1.1666666666666668e-05,
"loss": 0.6666,
"step": 1580
},
{
"epoch": 22.86,
"learning_rate": 1.1111111111111112e-05,
"loss": 0.6188,
"step": 1600
},
{
"epoch": 23.0,
"eval_accuracy": 0.9142091152815014,
"eval_loss": 0.8716733455657959,
"eval_runtime": 107.8489,
"eval_samples_per_second": 10.376,
"eval_steps_per_second": 0.649,
"step": 1610
}
],
"logging_steps": 20,
"max_steps": 2000,
"num_input_tokens_seen": 0,
"num_train_epochs": 29,
"save_steps": 500,
"total_flos": 7.982873471516332e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}