|
{ |
|
"best_metric": 0.6857386848847139, |
|
"best_model_checkpoint": "vet-sm/checkpoint-1035", |
|
"epoch": 4.9879518072289155, |
|
"eval_steps": 500, |
|
"global_step": 1035, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.454635500907898, |
|
"learning_rate": 4.807692307692308e-06, |
|
"loss": 2.0699, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.6190325021743774, |
|
"learning_rate": 9.615384615384616e-06, |
|
"loss": 2.0402, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.361482858657837, |
|
"learning_rate": 1.4423076923076923e-05, |
|
"loss": 1.9857, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.3599398136138916, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 1.9319, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.6369996070861816, |
|
"learning_rate": 2.4038461538461542e-05, |
|
"loss": 1.8814, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.192236065864563, |
|
"learning_rate": 2.8846153846153845e-05, |
|
"loss": 1.8459, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.7909432649612427, |
|
"learning_rate": 3.365384615384616e-05, |
|
"loss": 1.8128, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.8163617849349976, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 1.7424, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.151453733444214, |
|
"learning_rate": 4.326923076923077e-05, |
|
"loss": 1.6964, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.8711392879486084, |
|
"learning_rate": 4.8076923076923084e-05, |
|
"loss": 1.6367, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.7750871181488037, |
|
"learning_rate": 4.967776584317938e-05, |
|
"loss": 1.6045, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.7674392461776733, |
|
"learning_rate": 4.9140708915145005e-05, |
|
"loss": 1.626, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.8520715236663818, |
|
"learning_rate": 4.860365198711064e-05, |
|
"loss": 1.5904, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 2.2221274375915527, |
|
"learning_rate": 4.806659505907626e-05, |
|
"loss": 1.5845, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 2.5263822078704834, |
|
"learning_rate": 4.7529538131041896e-05, |
|
"loss": 1.5236, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 3.3485989570617676, |
|
"learning_rate": 4.699248120300752e-05, |
|
"loss": 1.4698, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 2.506840944290161, |
|
"learning_rate": 4.645542427497315e-05, |
|
"loss": 1.4167, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 3.2644572257995605, |
|
"learning_rate": 4.591836734693878e-05, |
|
"loss": 1.4426, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 2.8360097408294678, |
|
"learning_rate": 4.5381310418904406e-05, |
|
"loss": 1.3924, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 2.6382710933685303, |
|
"learning_rate": 4.484425349087004e-05, |
|
"loss": 1.3437, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5456874466268147, |
|
"eval_loss": 1.3443350791931152, |
|
"eval_runtime": 402.2174, |
|
"eval_samples_per_second": 2.911, |
|
"eval_steps_per_second": 0.184, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 3.3896210193634033, |
|
"learning_rate": 4.4307196562835664e-05, |
|
"loss": 1.413, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 6.438076972961426, |
|
"learning_rate": 4.3770139634801297e-05, |
|
"loss": 1.3462, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 3.4236090183258057, |
|
"learning_rate": 4.323308270676692e-05, |
|
"loss": 1.2821, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 1.8709990978240967, |
|
"learning_rate": 4.269602577873255e-05, |
|
"loss": 1.2731, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 2.8092925548553467, |
|
"learning_rate": 4.215896885069818e-05, |
|
"loss": 1.2207, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 3.147840976715088, |
|
"learning_rate": 4.1621911922663806e-05, |
|
"loss": 1.2131, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 3.0452332496643066, |
|
"learning_rate": 4.108485499462943e-05, |
|
"loss": 1.1956, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 3.2367162704467773, |
|
"learning_rate": 4.054779806659506e-05, |
|
"loss": 1.1639, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 3.8542873859405518, |
|
"learning_rate": 4.0010741138560684e-05, |
|
"loss": 1.1577, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 2.901261568069458, |
|
"learning_rate": 3.9473684210526316e-05, |
|
"loss": 1.1535, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 4.736692905426025, |
|
"learning_rate": 3.893662728249194e-05, |
|
"loss": 1.1863, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 2.820009708404541, |
|
"learning_rate": 3.8399570354457575e-05, |
|
"loss": 1.2104, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 4.279903888702393, |
|
"learning_rate": 3.78625134264232e-05, |
|
"loss": 1.162, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 4.341091156005859, |
|
"learning_rate": 3.732545649838883e-05, |
|
"loss": 1.1758, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 3.7722089290618896, |
|
"learning_rate": 3.678839957035446e-05, |
|
"loss": 1.153, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 2.462113857269287, |
|
"learning_rate": 3.6251342642320084e-05, |
|
"loss": 1.1653, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 3.2860846519470215, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 1.0895, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 4.360082626342773, |
|
"learning_rate": 3.517722878625134e-05, |
|
"loss": 1.0896, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 3.952023506164551, |
|
"learning_rate": 3.4640171858216975e-05, |
|
"loss": 1.1883, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 4.6793742179870605, |
|
"learning_rate": 3.41031149301826e-05, |
|
"loss": 1.0039, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 5.783022403717041, |
|
"learning_rate": 3.3566058002148234e-05, |
|
"loss": 1.0892, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.627668659265585, |
|
"eval_loss": 1.0833462476730347, |
|
"eval_runtime": 386.7346, |
|
"eval_samples_per_second": 3.028, |
|
"eval_steps_per_second": 0.191, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 2.7940785884857178, |
|
"learning_rate": 3.302900107411386e-05, |
|
"loss": 1.0866, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 2.254930257797241, |
|
"learning_rate": 3.2491944146079485e-05, |
|
"loss": 0.9175, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 4.631977558135986, |
|
"learning_rate": 3.195488721804512e-05, |
|
"loss": 0.8898, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 4.229618549346924, |
|
"learning_rate": 3.1417830290010743e-05, |
|
"loss": 0.9022, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 2.8936877250671387, |
|
"learning_rate": 3.0880773361976376e-05, |
|
"loss": 0.902, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 3.5928826332092285, |
|
"learning_rate": 3.0343716433942e-05, |
|
"loss": 0.8681, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 3.8170666694641113, |
|
"learning_rate": 2.980665950590763e-05, |
|
"loss": 0.7539, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 4.207719326019287, |
|
"learning_rate": 2.9269602577873257e-05, |
|
"loss": 0.9404, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 5.372262001037598, |
|
"learning_rate": 2.8732545649838882e-05, |
|
"loss": 0.8323, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 4.851659297943115, |
|
"learning_rate": 2.8195488721804515e-05, |
|
"loss": 0.8612, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 4.7718682289123535, |
|
"learning_rate": 2.765843179377014e-05, |
|
"loss": 0.8427, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 5.901302337646484, |
|
"learning_rate": 2.712137486573577e-05, |
|
"loss": 0.8489, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 5.132293224334717, |
|
"learning_rate": 2.6584317937701396e-05, |
|
"loss": 0.8522, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 4.27319860458374, |
|
"learning_rate": 2.6047261009667025e-05, |
|
"loss": 0.8314, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 5.610922336578369, |
|
"learning_rate": 2.5510204081632654e-05, |
|
"loss": 0.8804, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 2.8495535850524902, |
|
"learning_rate": 2.4973147153598283e-05, |
|
"loss": 0.7591, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 4.183429718017578, |
|
"learning_rate": 2.443609022556391e-05, |
|
"loss": 0.8702, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 5.251755237579346, |
|
"learning_rate": 2.3899033297529538e-05, |
|
"loss": 0.8275, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"grad_norm": 4.696440696716309, |
|
"learning_rate": 2.3361976369495167e-05, |
|
"loss": 0.8131, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 3.233407735824585, |
|
"learning_rate": 2.2824919441460796e-05, |
|
"loss": 0.7995, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"grad_norm": 6.206390857696533, |
|
"learning_rate": 2.2287862513426426e-05, |
|
"loss": 0.883, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6567036720751495, |
|
"eval_loss": 0.99440997838974, |
|
"eval_runtime": 391.9131, |
|
"eval_samples_per_second": 2.988, |
|
"eval_steps_per_second": 0.189, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 3.8997249603271484, |
|
"learning_rate": 2.1750805585392055e-05, |
|
"loss": 0.7696, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 3.5104353427886963, |
|
"learning_rate": 2.121374865735768e-05, |
|
"loss": 0.6644, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"grad_norm": 4.623937129974365, |
|
"learning_rate": 2.067669172932331e-05, |
|
"loss": 0.7019, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"grad_norm": 3.191859722137451, |
|
"learning_rate": 2.0139634801288935e-05, |
|
"loss": 0.6541, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"grad_norm": 3.7715423107147217, |
|
"learning_rate": 1.9602577873254565e-05, |
|
"loss": 0.5893, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"grad_norm": 3.1906309127807617, |
|
"learning_rate": 1.9065520945220194e-05, |
|
"loss": 0.5868, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"grad_norm": 6.5846099853515625, |
|
"learning_rate": 1.8528464017185823e-05, |
|
"loss": 0.6708, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"grad_norm": 3.970404863357544, |
|
"learning_rate": 1.7991407089151452e-05, |
|
"loss": 0.6117, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"grad_norm": 4.3821845054626465, |
|
"learning_rate": 1.7454350161117078e-05, |
|
"loss": 0.6104, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"grad_norm": 5.36595344543457, |
|
"learning_rate": 1.6917293233082707e-05, |
|
"loss": 0.5853, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"grad_norm": 4.7466301918029785, |
|
"learning_rate": 1.6380236305048336e-05, |
|
"loss": 0.5884, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"grad_norm": 3.9577629566192627, |
|
"learning_rate": 1.5843179377013965e-05, |
|
"loss": 0.5961, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"grad_norm": 5.096789360046387, |
|
"learning_rate": 1.5306122448979594e-05, |
|
"loss": 0.6155, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"grad_norm": 6.176750183105469, |
|
"learning_rate": 1.4769065520945222e-05, |
|
"loss": 0.5, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"grad_norm": 3.7734062671661377, |
|
"learning_rate": 1.4232008592910851e-05, |
|
"loss": 0.5895, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"grad_norm": 3.7732903957366943, |
|
"learning_rate": 1.3694951664876477e-05, |
|
"loss": 0.5221, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"grad_norm": 5.260674953460693, |
|
"learning_rate": 1.3157894736842106e-05, |
|
"loss": 0.6303, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"grad_norm": 4.469279766082764, |
|
"learning_rate": 1.2620837808807733e-05, |
|
"loss": 0.6151, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"grad_norm": 4.325742244720459, |
|
"learning_rate": 1.2083780880773363e-05, |
|
"loss": 0.5958, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"grad_norm": 3.5742740631103516, |
|
"learning_rate": 1.1546723952738992e-05, |
|
"loss": 0.5177, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 6.369570732116699, |
|
"learning_rate": 1.100966702470462e-05, |
|
"loss": 0.5199, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6754910333048676, |
|
"eval_loss": 0.9295239448547363, |
|
"eval_runtime": 398.874, |
|
"eval_samples_per_second": 2.936, |
|
"eval_steps_per_second": 0.186, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"grad_norm": 4.601716041564941, |
|
"learning_rate": 1.0472610096670248e-05, |
|
"loss": 0.4864, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"grad_norm": 2.6591596603393555, |
|
"learning_rate": 9.935553168635876e-06, |
|
"loss": 0.4057, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"grad_norm": 4.309298038482666, |
|
"learning_rate": 9.398496240601503e-06, |
|
"loss": 0.4228, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"grad_norm": 4.057296276092529, |
|
"learning_rate": 8.861439312567132e-06, |
|
"loss": 0.4299, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"grad_norm": 3.2583279609680176, |
|
"learning_rate": 8.324382384532762e-06, |
|
"loss": 0.4938, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"grad_norm": 4.6558451652526855, |
|
"learning_rate": 7.787325456498389e-06, |
|
"loss": 0.4269, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"grad_norm": 3.8659329414367676, |
|
"learning_rate": 7.250268528464017e-06, |
|
"loss": 0.4189, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"grad_norm": 2.4085872173309326, |
|
"learning_rate": 6.713211600429646e-06, |
|
"loss": 0.4376, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"grad_norm": 4.497287750244141, |
|
"learning_rate": 6.176154672395274e-06, |
|
"loss": 0.4347, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"grad_norm": 5.4745259284973145, |
|
"learning_rate": 5.639097744360902e-06, |
|
"loss": 0.3766, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"grad_norm": 3.8638925552368164, |
|
"learning_rate": 5.102040816326531e-06, |
|
"loss": 0.4002, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"grad_norm": 2.668515682220459, |
|
"learning_rate": 4.564983888292159e-06, |
|
"loss": 0.4154, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"grad_norm": 2.7704601287841797, |
|
"learning_rate": 4.027926960257788e-06, |
|
"loss": 0.377, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"grad_norm": 4.1865949630737305, |
|
"learning_rate": 3.490870032223416e-06, |
|
"loss": 0.375, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"grad_norm": 4.669921398162842, |
|
"learning_rate": 2.9538131041890443e-06, |
|
"loss": 0.3799, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"grad_norm": 4.289856433868408, |
|
"learning_rate": 2.4167561761546726e-06, |
|
"loss": 0.3652, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"grad_norm": 2.9988179206848145, |
|
"learning_rate": 1.8796992481203007e-06, |
|
"loss": 0.389, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"grad_norm": 1.7334405183792114, |
|
"learning_rate": 1.3426423200859292e-06, |
|
"loss": 0.3715, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"grad_norm": 4.97418737411499, |
|
"learning_rate": 8.055853920515575e-07, |
|
"loss": 0.4106, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"grad_norm": 6.089245319366455, |
|
"learning_rate": 2.6852846401718585e-07, |
|
"loss": 0.4526, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.6857386848847139, |
|
"eval_loss": 0.9283789396286011, |
|
"eval_runtime": 402.142, |
|
"eval_samples_per_second": 2.912, |
|
"eval_steps_per_second": 0.184, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"step": 1035, |
|
"total_flos": 2.5651227911307264e+18, |
|
"train_loss": 0.9461189188243111, |
|
"train_runtime": 36633.1283, |
|
"train_samples_per_second": 0.906, |
|
"train_steps_per_second": 0.028 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1035, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 2.5651227911307264e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|