|
{ |
|
"best_metric": 0.33629149198532104, |
|
"best_model_checkpoint": "vit-base-food-items-v1/checkpoint-400", |
|
"epoch": 4.0, |
|
"eval_steps": 100, |
|
"global_step": 608, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06578947368421052, |
|
"grad_norm": 2.054168224334717, |
|
"learning_rate": 0.00019671052631578949, |
|
"loss": 2.2227, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13157894736842105, |
|
"grad_norm": 2.418569326400757, |
|
"learning_rate": 0.00019342105263157894, |
|
"loss": 1.7988, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19736842105263158, |
|
"grad_norm": 2.0799572467803955, |
|
"learning_rate": 0.00019013157894736844, |
|
"loss": 1.3952, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2631578947368421, |
|
"grad_norm": 2.5012855529785156, |
|
"learning_rate": 0.00018684210526315792, |
|
"loss": 1.0071, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.32894736842105265, |
|
"grad_norm": 1.610549807548523, |
|
"learning_rate": 0.00018355263157894736, |
|
"loss": 0.8514, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.39473684210526316, |
|
"grad_norm": 2.7514488697052, |
|
"learning_rate": 0.00018026315789473684, |
|
"loss": 0.6752, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4605263157894737, |
|
"grad_norm": 5.107870101928711, |
|
"learning_rate": 0.00017697368421052632, |
|
"loss": 0.617, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5263157894736842, |
|
"grad_norm": 1.621307611465454, |
|
"learning_rate": 0.0001736842105263158, |
|
"loss": 0.4383, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5921052631578947, |
|
"grad_norm": 2.050955057144165, |
|
"learning_rate": 0.00017039473684210527, |
|
"loss": 0.4703, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6578947368421053, |
|
"grad_norm": 3.5689868927001953, |
|
"learning_rate": 0.00016710526315789475, |
|
"loss": 0.4195, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6578947368421053, |
|
"eval_accuracy": 0.9054545454545454, |
|
"eval_loss": 0.5027927756309509, |
|
"eval_runtime": 6.6566, |
|
"eval_samples_per_second": 82.625, |
|
"eval_steps_per_second": 10.366, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7236842105263158, |
|
"grad_norm": 2.683819055557251, |
|
"learning_rate": 0.00016381578947368422, |
|
"loss": 0.3666, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7894736842105263, |
|
"grad_norm": 2.7733426094055176, |
|
"learning_rate": 0.0001605263157894737, |
|
"loss": 0.3876, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8552631578947368, |
|
"grad_norm": 3.341937303543091, |
|
"learning_rate": 0.00015723684210526318, |
|
"loss": 0.3778, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9210526315789473, |
|
"grad_norm": 1.0890475511550903, |
|
"learning_rate": 0.00015394736842105265, |
|
"loss": 0.3368, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9868421052631579, |
|
"grad_norm": 3.217635154724121, |
|
"learning_rate": 0.0001506578947368421, |
|
"loss": 0.2434, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0526315789473684, |
|
"grad_norm": 1.1362298727035522, |
|
"learning_rate": 0.00014736842105263158, |
|
"loss": 0.1537, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.118421052631579, |
|
"grad_norm": 0.3043310344219208, |
|
"learning_rate": 0.00014407894736842106, |
|
"loss": 0.1786, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.1842105263157894, |
|
"grad_norm": 0.36744824051856995, |
|
"learning_rate": 0.00014078947368421053, |
|
"loss": 0.1534, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.4088458716869354, |
|
"learning_rate": 0.0001375, |
|
"loss": 0.1273, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.3157894736842106, |
|
"grad_norm": 0.22641144692897797, |
|
"learning_rate": 0.00013421052631578948, |
|
"loss": 0.1072, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.3157894736842106, |
|
"eval_accuracy": 0.8945454545454545, |
|
"eval_loss": 0.37944725155830383, |
|
"eval_runtime": 6.6833, |
|
"eval_samples_per_second": 82.295, |
|
"eval_steps_per_second": 10.324, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.381578947368421, |
|
"grad_norm": 0.14886893332004547, |
|
"learning_rate": 0.00013092105263157893, |
|
"loss": 0.0846, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.4473684210526316, |
|
"grad_norm": 0.17389647662639618, |
|
"learning_rate": 0.00012763157894736844, |
|
"loss": 0.0789, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.513157894736842, |
|
"grad_norm": 0.12492559105157852, |
|
"learning_rate": 0.00012434210526315791, |
|
"loss": 0.0605, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.5789473684210527, |
|
"grad_norm": 0.14732375741004944, |
|
"learning_rate": 0.00012105263157894738, |
|
"loss": 0.0867, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.6447368421052633, |
|
"grad_norm": 0.1113506406545639, |
|
"learning_rate": 0.00011776315789473684, |
|
"loss": 0.0436, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.7105263157894737, |
|
"grad_norm": 0.09813081473112106, |
|
"learning_rate": 0.00011447368421052632, |
|
"loss": 0.0416, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.776315789473684, |
|
"grad_norm": 6.826725006103516, |
|
"learning_rate": 0.0001111842105263158, |
|
"loss": 0.0514, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.8421052631578947, |
|
"grad_norm": 0.10619573295116425, |
|
"learning_rate": 0.00010789473684210527, |
|
"loss": 0.0601, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.9078947368421053, |
|
"grad_norm": 0.13959018886089325, |
|
"learning_rate": 0.00010460526315789475, |
|
"loss": 0.0454, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.973684210526316, |
|
"grad_norm": 0.08468258380889893, |
|
"learning_rate": 0.00010131578947368421, |
|
"loss": 0.0326, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.973684210526316, |
|
"eval_accuracy": 0.9054545454545454, |
|
"eval_loss": 0.38323774933815, |
|
"eval_runtime": 6.0691, |
|
"eval_samples_per_second": 90.622, |
|
"eval_steps_per_second": 11.369, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.039473684210526, |
|
"grad_norm": 0.07823757082223892, |
|
"learning_rate": 9.802631578947369e-05, |
|
"loss": 0.0392, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.1052631578947367, |
|
"grad_norm": 0.07656868547201157, |
|
"learning_rate": 9.473684210526316e-05, |
|
"loss": 0.0288, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.1710526315789473, |
|
"grad_norm": 0.07013211399316788, |
|
"learning_rate": 9.144736842105264e-05, |
|
"loss": 0.0313, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.236842105263158, |
|
"grad_norm": 0.07913695275783539, |
|
"learning_rate": 8.81578947368421e-05, |
|
"loss": 0.0378, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.3026315789473686, |
|
"grad_norm": 0.3869466483592987, |
|
"learning_rate": 8.486842105263159e-05, |
|
"loss": 0.0253, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.3684210526315788, |
|
"grad_norm": 0.06490592658519745, |
|
"learning_rate": 8.157894736842105e-05, |
|
"loss": 0.0241, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.4342105263157894, |
|
"grad_norm": 0.06631086021661758, |
|
"learning_rate": 7.828947368421053e-05, |
|
"loss": 0.0231, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.05489266291260719, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.0218, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.5657894736842106, |
|
"grad_norm": 0.07426982372999191, |
|
"learning_rate": 7.171052631578947e-05, |
|
"loss": 0.0215, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.6315789473684212, |
|
"grad_norm": 0.063384510576725, |
|
"learning_rate": 6.842105263157895e-05, |
|
"loss": 0.0207, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.6315789473684212, |
|
"eval_accuracy": 0.9236363636363636, |
|
"eval_loss": 0.33629149198532104, |
|
"eval_runtime": 6.0608, |
|
"eval_samples_per_second": 90.746, |
|
"eval_steps_per_second": 11.385, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.6973684210526314, |
|
"grad_norm": 0.05782260745763779, |
|
"learning_rate": 6.513157894736842e-05, |
|
"loss": 0.0201, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.763157894736842, |
|
"grad_norm": 0.05535552278161049, |
|
"learning_rate": 6.18421052631579e-05, |
|
"loss": 0.0194, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.8289473684210527, |
|
"grad_norm": 0.05756945163011551, |
|
"learning_rate": 5.855263157894737e-05, |
|
"loss": 0.0191, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.8947368421052633, |
|
"grad_norm": 0.05671467259526253, |
|
"learning_rate": 5.526315789473685e-05, |
|
"loss": 0.0188, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.9605263157894735, |
|
"grad_norm": 0.05619660019874573, |
|
"learning_rate": 5.197368421052632e-05, |
|
"loss": 0.0183, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.026315789473684, |
|
"grad_norm": 0.05277419090270996, |
|
"learning_rate": 4.868421052631579e-05, |
|
"loss": 0.0177, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.0921052631578947, |
|
"grad_norm": 0.05281645059585571, |
|
"learning_rate": 4.539473684210527e-05, |
|
"loss": 0.0174, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 3.1578947368421053, |
|
"grad_norm": 0.06867770105600357, |
|
"learning_rate": 4.210526315789474e-05, |
|
"loss": 0.017, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.223684210526316, |
|
"grad_norm": 0.047292064875364304, |
|
"learning_rate": 3.8815789473684214e-05, |
|
"loss": 0.0168, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 3.2894736842105265, |
|
"grad_norm": 0.043311525136232376, |
|
"learning_rate": 3.5526315789473684e-05, |
|
"loss": 0.0167, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.2894736842105265, |
|
"eval_accuracy": 0.9236363636363636, |
|
"eval_loss": 0.33733832836151123, |
|
"eval_runtime": 5.7257, |
|
"eval_samples_per_second": 96.057, |
|
"eval_steps_per_second": 12.051, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.3552631578947367, |
|
"grad_norm": 0.04796218127012253, |
|
"learning_rate": 3.223684210526316e-05, |
|
"loss": 0.0165, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 3.4210526315789473, |
|
"grad_norm": 0.048424966633319855, |
|
"learning_rate": 2.8947368421052634e-05, |
|
"loss": 0.0163, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 3.486842105263158, |
|
"grad_norm": 0.046178512275218964, |
|
"learning_rate": 2.565789473684211e-05, |
|
"loss": 0.0157, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 3.5526315789473686, |
|
"grad_norm": 0.04182315245270729, |
|
"learning_rate": 2.236842105263158e-05, |
|
"loss": 0.0156, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.6184210526315788, |
|
"grad_norm": 0.04811399057507515, |
|
"learning_rate": 1.9078947368421056e-05, |
|
"loss": 0.0157, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.6842105263157894, |
|
"grad_norm": 0.04523231461644173, |
|
"learning_rate": 1.5789473684210526e-05, |
|
"loss": 0.0157, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"grad_norm": 0.04799880087375641, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0155, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.8157894736842106, |
|
"grad_norm": 0.04668057709932327, |
|
"learning_rate": 9.210526315789474e-06, |
|
"loss": 0.0154, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.8815789473684212, |
|
"grad_norm": 0.044472016394138336, |
|
"learning_rate": 5.921052631578948e-06, |
|
"loss": 0.0154, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.9473684210526314, |
|
"grad_norm": 0.05030672252178192, |
|
"learning_rate": 2.631578947368421e-06, |
|
"loss": 0.0153, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.9473684210526314, |
|
"eval_accuracy": 0.9236363636363636, |
|
"eval_loss": 0.33738574385643005, |
|
"eval_runtime": 6.0053, |
|
"eval_samples_per_second": 91.586, |
|
"eval_steps_per_second": 11.49, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 608, |
|
"total_flos": 7.501829674622976e+17, |
|
"train_loss": 0.22265003621578217, |
|
"train_runtime": 237.6059, |
|
"train_samples_per_second": 40.74, |
|
"train_steps_per_second": 2.559 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 608, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.501829674622976e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|