vit-base-food-items-v1 / trainer_state.json
heisenberg3376's picture
Training in progress, step 100
a29d4d3 verified
raw
history blame
12.9 kB
{
"best_metric": 0.33629149198532104,
"best_model_checkpoint": "vit-base-food-items-v1/checkpoint-400",
"epoch": 4.0,
"eval_steps": 100,
"global_step": 608,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06578947368421052,
"grad_norm": 2.054168224334717,
"learning_rate": 0.00019671052631578949,
"loss": 2.2227,
"step": 10
},
{
"epoch": 0.13157894736842105,
"grad_norm": 2.418569326400757,
"learning_rate": 0.00019342105263157894,
"loss": 1.7988,
"step": 20
},
{
"epoch": 0.19736842105263158,
"grad_norm": 2.0799572467803955,
"learning_rate": 0.00019013157894736844,
"loss": 1.3952,
"step": 30
},
{
"epoch": 0.2631578947368421,
"grad_norm": 2.5012855529785156,
"learning_rate": 0.00018684210526315792,
"loss": 1.0071,
"step": 40
},
{
"epoch": 0.32894736842105265,
"grad_norm": 1.610549807548523,
"learning_rate": 0.00018355263157894736,
"loss": 0.8514,
"step": 50
},
{
"epoch": 0.39473684210526316,
"grad_norm": 2.7514488697052,
"learning_rate": 0.00018026315789473684,
"loss": 0.6752,
"step": 60
},
{
"epoch": 0.4605263157894737,
"grad_norm": 5.107870101928711,
"learning_rate": 0.00017697368421052632,
"loss": 0.617,
"step": 70
},
{
"epoch": 0.5263157894736842,
"grad_norm": 1.621307611465454,
"learning_rate": 0.0001736842105263158,
"loss": 0.4383,
"step": 80
},
{
"epoch": 0.5921052631578947,
"grad_norm": 2.050955057144165,
"learning_rate": 0.00017039473684210527,
"loss": 0.4703,
"step": 90
},
{
"epoch": 0.6578947368421053,
"grad_norm": 3.5689868927001953,
"learning_rate": 0.00016710526315789475,
"loss": 0.4195,
"step": 100
},
{
"epoch": 0.6578947368421053,
"eval_accuracy": 0.9054545454545454,
"eval_loss": 0.5027927756309509,
"eval_runtime": 6.6566,
"eval_samples_per_second": 82.625,
"eval_steps_per_second": 10.366,
"step": 100
},
{
"epoch": 0.7236842105263158,
"grad_norm": 2.683819055557251,
"learning_rate": 0.00016381578947368422,
"loss": 0.3666,
"step": 110
},
{
"epoch": 0.7894736842105263,
"grad_norm": 2.7733426094055176,
"learning_rate": 0.0001605263157894737,
"loss": 0.3876,
"step": 120
},
{
"epoch": 0.8552631578947368,
"grad_norm": 3.341937303543091,
"learning_rate": 0.00015723684210526318,
"loss": 0.3778,
"step": 130
},
{
"epoch": 0.9210526315789473,
"grad_norm": 1.0890475511550903,
"learning_rate": 0.00015394736842105265,
"loss": 0.3368,
"step": 140
},
{
"epoch": 0.9868421052631579,
"grad_norm": 3.217635154724121,
"learning_rate": 0.0001506578947368421,
"loss": 0.2434,
"step": 150
},
{
"epoch": 1.0526315789473684,
"grad_norm": 1.1362298727035522,
"learning_rate": 0.00014736842105263158,
"loss": 0.1537,
"step": 160
},
{
"epoch": 1.118421052631579,
"grad_norm": 0.3043310344219208,
"learning_rate": 0.00014407894736842106,
"loss": 0.1786,
"step": 170
},
{
"epoch": 1.1842105263157894,
"grad_norm": 0.36744824051856995,
"learning_rate": 0.00014078947368421053,
"loss": 0.1534,
"step": 180
},
{
"epoch": 1.25,
"grad_norm": 0.4088458716869354,
"learning_rate": 0.0001375,
"loss": 0.1273,
"step": 190
},
{
"epoch": 1.3157894736842106,
"grad_norm": 0.22641144692897797,
"learning_rate": 0.00013421052631578948,
"loss": 0.1072,
"step": 200
},
{
"epoch": 1.3157894736842106,
"eval_accuracy": 0.8945454545454545,
"eval_loss": 0.37944725155830383,
"eval_runtime": 6.6833,
"eval_samples_per_second": 82.295,
"eval_steps_per_second": 10.324,
"step": 200
},
{
"epoch": 1.381578947368421,
"grad_norm": 0.14886893332004547,
"learning_rate": 0.00013092105263157893,
"loss": 0.0846,
"step": 210
},
{
"epoch": 1.4473684210526316,
"grad_norm": 0.17389647662639618,
"learning_rate": 0.00012763157894736844,
"loss": 0.0789,
"step": 220
},
{
"epoch": 1.513157894736842,
"grad_norm": 0.12492559105157852,
"learning_rate": 0.00012434210526315791,
"loss": 0.0605,
"step": 230
},
{
"epoch": 1.5789473684210527,
"grad_norm": 0.14732375741004944,
"learning_rate": 0.00012105263157894738,
"loss": 0.0867,
"step": 240
},
{
"epoch": 1.6447368421052633,
"grad_norm": 0.1113506406545639,
"learning_rate": 0.00011776315789473684,
"loss": 0.0436,
"step": 250
},
{
"epoch": 1.7105263157894737,
"grad_norm": 0.09813081473112106,
"learning_rate": 0.00011447368421052632,
"loss": 0.0416,
"step": 260
},
{
"epoch": 1.776315789473684,
"grad_norm": 6.826725006103516,
"learning_rate": 0.0001111842105263158,
"loss": 0.0514,
"step": 270
},
{
"epoch": 1.8421052631578947,
"grad_norm": 0.10619573295116425,
"learning_rate": 0.00010789473684210527,
"loss": 0.0601,
"step": 280
},
{
"epoch": 1.9078947368421053,
"grad_norm": 0.13959018886089325,
"learning_rate": 0.00010460526315789475,
"loss": 0.0454,
"step": 290
},
{
"epoch": 1.973684210526316,
"grad_norm": 0.08468258380889893,
"learning_rate": 0.00010131578947368421,
"loss": 0.0326,
"step": 300
},
{
"epoch": 1.973684210526316,
"eval_accuracy": 0.9054545454545454,
"eval_loss": 0.38323774933815,
"eval_runtime": 6.0691,
"eval_samples_per_second": 90.622,
"eval_steps_per_second": 11.369,
"step": 300
},
{
"epoch": 2.039473684210526,
"grad_norm": 0.07823757082223892,
"learning_rate": 9.802631578947369e-05,
"loss": 0.0392,
"step": 310
},
{
"epoch": 2.1052631578947367,
"grad_norm": 0.07656868547201157,
"learning_rate": 9.473684210526316e-05,
"loss": 0.0288,
"step": 320
},
{
"epoch": 2.1710526315789473,
"grad_norm": 0.07013211399316788,
"learning_rate": 9.144736842105264e-05,
"loss": 0.0313,
"step": 330
},
{
"epoch": 2.236842105263158,
"grad_norm": 0.07913695275783539,
"learning_rate": 8.81578947368421e-05,
"loss": 0.0378,
"step": 340
},
{
"epoch": 2.3026315789473686,
"grad_norm": 0.3869466483592987,
"learning_rate": 8.486842105263159e-05,
"loss": 0.0253,
"step": 350
},
{
"epoch": 2.3684210526315788,
"grad_norm": 0.06490592658519745,
"learning_rate": 8.157894736842105e-05,
"loss": 0.0241,
"step": 360
},
{
"epoch": 2.4342105263157894,
"grad_norm": 0.06631086021661758,
"learning_rate": 7.828947368421053e-05,
"loss": 0.0231,
"step": 370
},
{
"epoch": 2.5,
"grad_norm": 0.05489266291260719,
"learning_rate": 7.500000000000001e-05,
"loss": 0.0218,
"step": 380
},
{
"epoch": 2.5657894736842106,
"grad_norm": 0.07426982372999191,
"learning_rate": 7.171052631578947e-05,
"loss": 0.0215,
"step": 390
},
{
"epoch": 2.6315789473684212,
"grad_norm": 0.063384510576725,
"learning_rate": 6.842105263157895e-05,
"loss": 0.0207,
"step": 400
},
{
"epoch": 2.6315789473684212,
"eval_accuracy": 0.9236363636363636,
"eval_loss": 0.33629149198532104,
"eval_runtime": 6.0608,
"eval_samples_per_second": 90.746,
"eval_steps_per_second": 11.385,
"step": 400
},
{
"epoch": 2.6973684210526314,
"grad_norm": 0.05782260745763779,
"learning_rate": 6.513157894736842e-05,
"loss": 0.0201,
"step": 410
},
{
"epoch": 2.763157894736842,
"grad_norm": 0.05535552278161049,
"learning_rate": 6.18421052631579e-05,
"loss": 0.0194,
"step": 420
},
{
"epoch": 2.8289473684210527,
"grad_norm": 0.05756945163011551,
"learning_rate": 5.855263157894737e-05,
"loss": 0.0191,
"step": 430
},
{
"epoch": 2.8947368421052633,
"grad_norm": 0.05671467259526253,
"learning_rate": 5.526315789473685e-05,
"loss": 0.0188,
"step": 440
},
{
"epoch": 2.9605263157894735,
"grad_norm": 0.05619660019874573,
"learning_rate": 5.197368421052632e-05,
"loss": 0.0183,
"step": 450
},
{
"epoch": 3.026315789473684,
"grad_norm": 0.05277419090270996,
"learning_rate": 4.868421052631579e-05,
"loss": 0.0177,
"step": 460
},
{
"epoch": 3.0921052631578947,
"grad_norm": 0.05281645059585571,
"learning_rate": 4.539473684210527e-05,
"loss": 0.0174,
"step": 470
},
{
"epoch": 3.1578947368421053,
"grad_norm": 0.06867770105600357,
"learning_rate": 4.210526315789474e-05,
"loss": 0.017,
"step": 480
},
{
"epoch": 3.223684210526316,
"grad_norm": 0.047292064875364304,
"learning_rate": 3.8815789473684214e-05,
"loss": 0.0168,
"step": 490
},
{
"epoch": 3.2894736842105265,
"grad_norm": 0.043311525136232376,
"learning_rate": 3.5526315789473684e-05,
"loss": 0.0167,
"step": 500
},
{
"epoch": 3.2894736842105265,
"eval_accuracy": 0.9236363636363636,
"eval_loss": 0.33733832836151123,
"eval_runtime": 5.7257,
"eval_samples_per_second": 96.057,
"eval_steps_per_second": 12.051,
"step": 500
},
{
"epoch": 3.3552631578947367,
"grad_norm": 0.04796218127012253,
"learning_rate": 3.223684210526316e-05,
"loss": 0.0165,
"step": 510
},
{
"epoch": 3.4210526315789473,
"grad_norm": 0.048424966633319855,
"learning_rate": 2.8947368421052634e-05,
"loss": 0.0163,
"step": 520
},
{
"epoch": 3.486842105263158,
"grad_norm": 0.046178512275218964,
"learning_rate": 2.565789473684211e-05,
"loss": 0.0157,
"step": 530
},
{
"epoch": 3.5526315789473686,
"grad_norm": 0.04182315245270729,
"learning_rate": 2.236842105263158e-05,
"loss": 0.0156,
"step": 540
},
{
"epoch": 3.6184210526315788,
"grad_norm": 0.04811399057507515,
"learning_rate": 1.9078947368421056e-05,
"loss": 0.0157,
"step": 550
},
{
"epoch": 3.6842105263157894,
"grad_norm": 0.04523231461644173,
"learning_rate": 1.5789473684210526e-05,
"loss": 0.0157,
"step": 560
},
{
"epoch": 3.75,
"grad_norm": 0.04799880087375641,
"learning_rate": 1.25e-05,
"loss": 0.0155,
"step": 570
},
{
"epoch": 3.8157894736842106,
"grad_norm": 0.04668057709932327,
"learning_rate": 9.210526315789474e-06,
"loss": 0.0154,
"step": 580
},
{
"epoch": 3.8815789473684212,
"grad_norm": 0.044472016394138336,
"learning_rate": 5.921052631578948e-06,
"loss": 0.0154,
"step": 590
},
{
"epoch": 3.9473684210526314,
"grad_norm": 0.05030672252178192,
"learning_rate": 2.631578947368421e-06,
"loss": 0.0153,
"step": 600
},
{
"epoch": 3.9473684210526314,
"eval_accuracy": 0.9236363636363636,
"eval_loss": 0.33738574385643005,
"eval_runtime": 6.0053,
"eval_samples_per_second": 91.586,
"eval_steps_per_second": 11.49,
"step": 600
},
{
"epoch": 4.0,
"step": 608,
"total_flos": 7.501829674622976e+17,
"train_loss": 0.22265003621578217,
"train_runtime": 237.6059,
"train_samples_per_second": 40.74,
"train_steps_per_second": 2.559
}
],
"logging_steps": 10,
"max_steps": 608,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 7.501829674622976e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}