nutrition-extractor / trainer_state.json
raphael0202's picture
End of training
7b686d4 verified
{
"best_metric": 0.9595749595749595,
"best_model_checkpoint": "ds-v6-large/checkpoint-2805",
"epoch": 33.287101248266296,
"eval_steps": 15,
"global_step": 3000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1664355062413315,
"grad_norm": 2.6665048599243164,
"learning_rate": 9.950000000000001e-06,
"loss": 1.9852,
"step": 15
},
{
"epoch": 0.1664355062413315,
"eval_accuracy": 0.8101443789541989,
"eval_f1": 0.0,
"eval_loss": 1.1499630212783813,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_runtime": 35.775,
"eval_samples_per_second": 5.563,
"eval_steps_per_second": 1.398,
"step": 15
},
{
"epoch": 0.332871012482663,
"grad_norm": 2.08683443069458,
"learning_rate": 9.9e-06,
"loss": 1.0244,
"step": 30
},
{
"epoch": 0.332871012482663,
"eval_accuracy": 0.8122532850267669,
"eval_f1": 0.012939749292357462,
"eval_loss": 0.834208607673645,
"eval_precision": 0.05,
"eval_recall": 0.0074314909428704135,
"eval_runtime": 35.1999,
"eval_samples_per_second": 5.653,
"eval_steps_per_second": 1.42,
"step": 30
},
{
"epoch": 0.49930651872399445,
"grad_norm": 2.1074297428131104,
"learning_rate": 9.85e-06,
"loss": 0.7826,
"step": 45
},
{
"epoch": 0.49930651872399445,
"eval_accuracy": 0.8479424647163791,
"eval_f1": 0.09320905459387482,
"eval_loss": 0.6794766187667847,
"eval_precision": 0.07893041237113402,
"eval_recall": 0.1137947050627032,
"eval_runtime": 35.557,
"eval_samples_per_second": 5.597,
"eval_steps_per_second": 1.406,
"step": 45
},
{
"epoch": 0.665742024965326,
"grad_norm": 1.8543498516082764,
"learning_rate": 9.800000000000001e-06,
"loss": 0.6767,
"step": 60
},
{
"epoch": 0.665742024965326,
"eval_accuracy": 0.8578381009030444,
"eval_f1": 0.13828125,
"eval_loss": 0.5963338613510132,
"eval_precision": 0.11931243680485339,
"eval_recall": 0.1644217371110079,
"eval_runtime": 35.7897,
"eval_samples_per_second": 5.56,
"eval_steps_per_second": 1.397,
"step": 60
},
{
"epoch": 0.8321775312066574,
"grad_norm": 1.9123793840408325,
"learning_rate": 9.75e-06,
"loss": 0.6031,
"step": 75
},
{
"epoch": 0.8321775312066574,
"eval_accuracy": 0.8690855999567404,
"eval_f1": 0.1916831683168317,
"eval_loss": 0.5405648946762085,
"eval_precision": 0.1670693821194339,
"eval_recall": 0.22480260102183,
"eval_runtime": 35.6034,
"eval_samples_per_second": 5.589,
"eval_steps_per_second": 1.404,
"step": 75
},
{
"epoch": 0.9986130374479889,
"grad_norm": 1.9384328126907349,
"learning_rate": 9.7e-06,
"loss": 0.5756,
"step": 90
},
{
"epoch": 0.9986130374479889,
"eval_accuracy": 0.8777915968204185,
"eval_f1": 0.26393539491825885,
"eval_loss": 0.49346938729286194,
"eval_precision": 0.22913816689466485,
"eval_recall": 0.3111936832326986,
"eval_runtime": 35.6547,
"eval_samples_per_second": 5.581,
"eval_steps_per_second": 1.402,
"step": 90
},
{
"epoch": 1.1650485436893203,
"grad_norm": 1.751382827758789,
"learning_rate": 9.65e-06,
"loss": 0.5215,
"step": 105
},
{
"epoch": 1.1650485436893203,
"eval_accuracy": 0.8904991077705078,
"eval_f1": 0.3575184016824396,
"eval_loss": 0.43015486001968384,
"eval_precision": 0.32667179093005383,
"eval_recall": 0.3947979563399907,
"eval_runtime": 35.5042,
"eval_samples_per_second": 5.605,
"eval_steps_per_second": 1.408,
"step": 105
},
{
"epoch": 1.331484049930652,
"grad_norm": 1.7430224418640137,
"learning_rate": 9.600000000000001e-06,
"loss": 0.4782,
"step": 120
},
{
"epoch": 1.331484049930652,
"eval_accuracy": 0.9020169793976099,
"eval_f1": 0.4266553119012136,
"eval_loss": 0.37819600105285645,
"eval_precision": 0.3938679245283019,
"eval_recall": 0.46539712029725966,
"eval_runtime": 35.9551,
"eval_samples_per_second": 5.535,
"eval_steps_per_second": 1.391,
"step": 120
},
{
"epoch": 1.4979195561719834,
"grad_norm": 2.754100799560547,
"learning_rate": 9.55e-06,
"loss": 0.4208,
"step": 135
},
{
"epoch": 1.4979195561719834,
"eval_accuracy": 0.9080733250419077,
"eval_f1": 0.44783505154639175,
"eval_loss": 0.34046444296836853,
"eval_precision": 0.40266963292547275,
"eval_recall": 0.5044124477473293,
"eval_runtime": 35.1015,
"eval_samples_per_second": 5.669,
"eval_steps_per_second": 1.424,
"step": 135
},
{
"epoch": 1.664355062413315,
"grad_norm": 1.3271350860595703,
"learning_rate": 9.5e-06,
"loss": 0.3532,
"step": 150
},
{
"epoch": 1.664355062413315,
"eval_accuracy": 0.9251608716811767,
"eval_f1": 0.5355845266082496,
"eval_loss": 0.2930045425891876,
"eval_precision": 0.49604117181314333,
"eval_recall": 0.5819786344635393,
"eval_runtime": 34.7775,
"eval_samples_per_second": 5.722,
"eval_steps_per_second": 1.438,
"step": 150
},
{
"epoch": 1.8307905686546464,
"grad_norm": 1.9117140769958496,
"learning_rate": 9.450000000000001e-06,
"loss": 0.3458,
"step": 165
},
{
"epoch": 1.8307905686546464,
"eval_accuracy": 0.9301357270318499,
"eval_f1": 0.5559597688850845,
"eval_loss": 0.2658008933067322,
"eval_precision": 0.5154761904761904,
"eval_recall": 0.6033441709242917,
"eval_runtime": 34.9787,
"eval_samples_per_second": 5.689,
"eval_steps_per_second": 1.429,
"step": 165
},
{
"epoch": 1.9972260748959778,
"grad_norm": 1.9700042009353638,
"learning_rate": 9.4e-06,
"loss": 0.302,
"step": 180
},
{
"epoch": 1.9972260748959778,
"eval_accuracy": 0.9474395717298437,
"eval_f1": 0.6529640848117698,
"eval_loss": 0.2320590764284134,
"eval_precision": 0.6111786148238153,
"eval_recall": 0.7008824895494659,
"eval_runtime": 35.1938,
"eval_samples_per_second": 5.654,
"eval_steps_per_second": 1.421,
"step": 180
},
{
"epoch": 2.163661581137309,
"grad_norm": 1.2119841575622559,
"learning_rate": 9.350000000000002e-06,
"loss": 0.2655,
"step": 195
},
{
"epoch": 2.163661581137309,
"eval_accuracy": 0.9519818309630671,
"eval_f1": 0.6844638949671772,
"eval_loss": 0.20933493971824646,
"eval_precision": 0.6470831609433182,
"eval_recall": 0.7264282396655829,
"eval_runtime": 35.2683,
"eval_samples_per_second": 5.642,
"eval_steps_per_second": 1.418,
"step": 195
},
{
"epoch": 2.3300970873786406,
"grad_norm": 2.050490617752075,
"learning_rate": 9.3e-06,
"loss": 0.2598,
"step": 210
},
{
"epoch": 2.3300970873786406,
"eval_accuracy": 0.9570107608284215,
"eval_f1": 0.7274759669125868,
"eval_loss": 0.1951305866241455,
"eval_precision": 0.7012931034482759,
"eval_recall": 0.7556897352531352,
"eval_runtime": 35.2296,
"eval_samples_per_second": 5.649,
"eval_steps_per_second": 1.419,
"step": 210
},
{
"epoch": 2.496532593619972,
"grad_norm": 2.1060705184936523,
"learning_rate": 9.250000000000001e-06,
"loss": 0.2364,
"step": 225
},
{
"epoch": 2.496532593619972,
"eval_accuracy": 0.9590115178716271,
"eval_f1": 0.7402309058614565,
"eval_loss": 0.17936836183071136,
"eval_precision": 0.7090599744789451,
"eval_recall": 0.7742684626103112,
"eval_runtime": 35.4911,
"eval_samples_per_second": 5.607,
"eval_steps_per_second": 1.409,
"step": 225
},
{
"epoch": 2.662968099861304,
"grad_norm": 1.8435375690460205,
"learning_rate": 9.200000000000002e-06,
"loss": 0.2218,
"step": 240
},
{
"epoch": 2.662968099861304,
"eval_accuracy": 0.9621478397231384,
"eval_f1": 0.7557522123893805,
"eval_loss": 0.1675911545753479,
"eval_precision": 0.721588508660752,
"eval_recall": 0.7933116581514166,
"eval_runtime": 35.3833,
"eval_samples_per_second": 5.624,
"eval_steps_per_second": 1.413,
"step": 240
},
{
"epoch": 2.8294036061026353,
"grad_norm": 2.065732479095459,
"learning_rate": 9.15e-06,
"loss": 0.206,
"step": 255
},
{
"epoch": 2.8294036061026353,
"eval_accuracy": 0.9650137890012437,
"eval_f1": 0.7758275938680294,
"eval_loss": 0.15723256766796112,
"eval_precision": 0.7436115843270868,
"eval_recall": 0.8109614491407339,
"eval_runtime": 35.4693,
"eval_samples_per_second": 5.61,
"eval_steps_per_second": 1.41,
"step": 255
},
{
"epoch": 2.9958391123439667,
"grad_norm": 2.1758480072021484,
"learning_rate": 9.100000000000001e-06,
"loss": 0.2053,
"step": 270
},
{
"epoch": 2.9958391123439667,
"eval_accuracy": 0.9640404477369816,
"eval_f1": 0.7730088495575222,
"eval_loss": 0.15795043110847473,
"eval_precision": 0.7380650612589776,
"eval_recall": 0.8114259173246633,
"eval_runtime": 35.4002,
"eval_samples_per_second": 5.621,
"eval_steps_per_second": 1.412,
"step": 270
},
{
"epoch": 3.162274618585298,
"grad_norm": 1.6404600143432617,
"learning_rate": 9.050000000000001e-06,
"loss": 0.1876,
"step": 285
},
{
"epoch": 3.162274618585298,
"eval_accuracy": 0.9687449305142486,
"eval_f1": 0.801343784994401,
"eval_loss": 0.1406078040599823,
"eval_precision": 0.7737889273356401,
"eval_recall": 0.8309335810496981,
"eval_runtime": 35.6674,
"eval_samples_per_second": 5.579,
"eval_steps_per_second": 1.402,
"step": 285
},
{
"epoch": 3.3287101248266295,
"grad_norm": 1.8430469036102295,
"learning_rate": 9e-06,
"loss": 0.1602,
"step": 300
},
{
"epoch": 3.3287101248266295,
"eval_accuracy": 0.9670686205591305,
"eval_f1": 0.7985659870042572,
"eval_loss": 0.14204147458076477,
"eval_precision": 0.7714285714285715,
"eval_recall": 0.8276823037621923,
"eval_runtime": 35.5741,
"eval_samples_per_second": 5.594,
"eval_steps_per_second": 1.406,
"step": 300
},
{
"epoch": 3.4951456310679614,
"grad_norm": 2.2237956523895264,
"learning_rate": 8.95e-06,
"loss": 0.1706,
"step": 315
},
{
"epoch": 3.4951456310679614,
"eval_accuracy": 0.969069377602336,
"eval_f1": 0.8149988705669754,
"eval_loss": 0.13229934871196747,
"eval_precision": 0.793315743183817,
"eval_recall": 0.8379006038086391,
"eval_runtime": 36.0972,
"eval_samples_per_second": 5.513,
"eval_steps_per_second": 1.385,
"step": 315
},
{
"epoch": 3.661581137309293,
"grad_norm": 2.04622220993042,
"learning_rate": 8.900000000000001e-06,
"loss": 0.1585,
"step": 330
},
{
"epoch": 3.661581137309293,
"eval_accuracy": 0.9700427188665982,
"eval_f1": 0.8298399819697994,
"eval_loss": 0.13131560385227203,
"eval_precision": 0.8060420315236427,
"eval_recall": 0.8550859266140269,
"eval_runtime": 35.6467,
"eval_samples_per_second": 5.583,
"eval_steps_per_second": 1.403,
"step": 330
},
{
"epoch": 3.828016643550624,
"grad_norm": 2.0790255069732666,
"learning_rate": 8.85e-06,
"loss": 0.1574,
"step": 345
},
{
"epoch": 3.828016643550624,
"eval_accuracy": 0.9717190288217163,
"eval_f1": 0.8376491781130375,
"eval_loss": 0.12674090266227722,
"eval_precision": 0.8129370629370629,
"eval_recall": 0.8639108221086855,
"eval_runtime": 35.5835,
"eval_samples_per_second": 5.592,
"eval_steps_per_second": 1.405,
"step": 345
},
{
"epoch": 3.9944521497919556,
"grad_norm": 2.3372180461883545,
"learning_rate": 8.8e-06,
"loss": 0.15,
"step": 360
},
{
"epoch": 3.9944521497919556,
"eval_accuracy": 0.97539609582004,
"eval_f1": 0.8535811423390752,
"eval_loss": 0.11569273471832275,
"eval_precision": 0.8335546702080566,
"eval_recall": 0.8745935903390618,
"eval_runtime": 35.7391,
"eval_samples_per_second": 5.568,
"eval_steps_per_second": 1.399,
"step": 360
},
{
"epoch": 4.160887656033287,
"grad_norm": 2.763075828552246,
"learning_rate": 8.750000000000001e-06,
"loss": 0.1192,
"step": 375
},
{
"epoch": 4.160887656033287,
"eval_accuracy": 0.9740983074676904,
"eval_f1": 0.8524664696521937,
"eval_loss": 0.11200679838657379,
"eval_precision": 0.8348174532502226,
"eval_recall": 0.8708778448676265,
"eval_runtime": 36.0008,
"eval_samples_per_second": 5.528,
"eval_steps_per_second": 1.389,
"step": 375
},
{
"epoch": 4.327323162274618,
"grad_norm": 1.7937551736831665,
"learning_rate": 8.700000000000001e-06,
"loss": 0.1313,
"step": 390
},
{
"epoch": 4.327323162274618,
"eval_accuracy": 0.9745309035851403,
"eval_f1": 0.8588929219600727,
"eval_loss": 0.1129654049873352,
"eval_precision": 0.8394678492239468,
"eval_recall": 0.8792382721783558,
"eval_runtime": 36.0013,
"eval_samples_per_second": 5.528,
"eval_steps_per_second": 1.389,
"step": 390
},
{
"epoch": 4.49375866851595,
"grad_norm": 1.4142848253250122,
"learning_rate": 8.65e-06,
"loss": 0.1179,
"step": 405
},
{
"epoch": 4.49375866851595,
"eval_accuracy": 0.9755042448494025,
"eval_f1": 0.8613303269447576,
"eval_loss": 0.109279565513134,
"eval_precision": 0.8369851007887817,
"eval_recall": 0.8871342313051556,
"eval_runtime": 36.0609,
"eval_samples_per_second": 5.518,
"eval_steps_per_second": 1.387,
"step": 405
},
{
"epoch": 4.660194174757281,
"grad_norm": 1.6794809103012085,
"learning_rate": 8.6e-06,
"loss": 0.1327,
"step": 420
},
{
"epoch": 4.660194174757281,
"eval_accuracy": 0.9745849780998216,
"eval_f1": 0.862053369516056,
"eval_loss": 0.11022669076919556,
"eval_precision": 0.8400176289114147,
"eval_recall": 0.885276358569438,
"eval_runtime": 36.0549,
"eval_samples_per_second": 5.519,
"eval_steps_per_second": 1.387,
"step": 420
},
{
"epoch": 4.826629680998613,
"grad_norm": 1.8358403444290161,
"learning_rate": 8.550000000000001e-06,
"loss": 0.1323,
"step": 435
},
{
"epoch": 4.826629680998613,
"eval_accuracy": 0.978207970583464,
"eval_f1": 0.8795454545454546,
"eval_loss": 0.09974753856658936,
"eval_precision": 0.8611481975967957,
"eval_recall": 0.8987459359033906,
"eval_runtime": 36.3053,
"eval_samples_per_second": 5.481,
"eval_steps_per_second": 1.377,
"step": 435
},
{
"epoch": 4.993065187239944,
"grad_norm": 2.1321513652801514,
"learning_rate": 8.5e-06,
"loss": 0.1254,
"step": 450
},
{
"epoch": 4.993065187239944,
"eval_accuracy": 0.9774509273779268,
"eval_f1": 0.8727683615819208,
"eval_loss": 0.094924695789814,
"eval_precision": 0.8499119718309859,
"eval_recall": 0.896888063167673,
"eval_runtime": 36.229,
"eval_samples_per_second": 5.493,
"eval_steps_per_second": 1.38,
"step": 450
},
{
"epoch": 5.159500693481276,
"grad_norm": 1.3562971353530884,
"learning_rate": 8.45e-06,
"loss": 0.0999,
"step": 465
},
{
"epoch": 5.159500693481276,
"eval_accuracy": 0.9797220569945385,
"eval_f1": 0.8822055137844612,
"eval_loss": 0.08469922095537186,
"eval_precision": 0.8658318425760286,
"eval_recall": 0.89921040408732,
"eval_runtime": 36.2946,
"eval_samples_per_second": 5.483,
"eval_steps_per_second": 1.378,
"step": 465
},
{
"epoch": 5.325936199722608,
"grad_norm": 1.683296799659729,
"learning_rate": 8.400000000000001e-06,
"loss": 0.1017,
"step": 480
},
{
"epoch": 5.325936199722608,
"eval_accuracy": 0.981019845346888,
"eval_f1": 0.8923777019340159,
"eval_loss": 0.08026640117168427,
"eval_precision": 0.8746654772524531,
"eval_recall": 0.910822108685555,
"eval_runtime": 36.4129,
"eval_samples_per_second": 5.465,
"eval_steps_per_second": 1.373,
"step": 480
},
{
"epoch": 5.492371705963939,
"grad_norm": 2.303062677383423,
"learning_rate": 8.35e-06,
"loss": 0.091,
"step": 495
},
{
"epoch": 5.492371705963939,
"eval_accuracy": 0.9805872492294382,
"eval_f1": 0.8918362680082322,
"eval_loss": 0.07959215342998505,
"eval_precision": 0.8783783783783784,
"eval_recall": 0.9057129586623316,
"eval_runtime": 36.4104,
"eval_samples_per_second": 5.465,
"eval_steps_per_second": 1.373,
"step": 495
},
{
"epoch": 5.658807212205271,
"grad_norm": 3.383983850479126,
"learning_rate": 8.3e-06,
"loss": 0.0979,
"step": 510
},
{
"epoch": 5.658807212205271,
"eval_accuracy": 0.9773427783485643,
"eval_f1": 0.8775045537340619,
"eval_loss": 0.09432032704353333,
"eval_precision": 0.8606520768200089,
"eval_recall": 0.8950301904319554,
"eval_runtime": 36.0505,
"eval_samples_per_second": 5.52,
"eval_steps_per_second": 1.387,
"step": 510
},
{
"epoch": 5.825242718446602,
"grad_norm": 2.1892480850219727,
"learning_rate": 8.25e-06,
"loss": 0.1024,
"step": 525
},
{
"epoch": 5.825242718446602,
"eval_accuracy": 0.980533174714757,
"eval_f1": 0.8882312770316413,
"eval_loss": 0.08036847412586212,
"eval_precision": 0.8709821428571428,
"eval_recall": 0.906177426846261,
"eval_runtime": 36.1406,
"eval_samples_per_second": 5.506,
"eval_steps_per_second": 1.383,
"step": 525
},
{
"epoch": 5.991678224687933,
"grad_norm": 1.8490287065505981,
"learning_rate": 8.2e-06,
"loss": 0.0952,
"step": 540
},
{
"epoch": 5.991678224687933,
"eval_accuracy": 0.9816146650083816,
"eval_f1": 0.900843400957374,
"eval_loss": 0.07866356521844864,
"eval_precision": 0.8845120859444942,
"eval_recall": 0.917789131444496,
"eval_runtime": 36.4974,
"eval_samples_per_second": 5.452,
"eval_steps_per_second": 1.37,
"step": 540
},
{
"epoch": 6.158113730929265,
"grad_norm": 3.0108256340026855,
"learning_rate": 8.15e-06,
"loss": 0.0742,
"step": 555
},
{
"epoch": 6.158113730929265,
"eval_accuracy": 0.9823176336992375,
"eval_f1": 0.9032553874369554,
"eval_loss": 0.07755902409553528,
"eval_precision": 0.8918062471706655,
"eval_recall": 0.9150023223409196,
"eval_runtime": 36.3184,
"eval_samples_per_second": 5.479,
"eval_steps_per_second": 1.377,
"step": 555
},
{
"epoch": 6.324549237170596,
"grad_norm": 2.533155679702759,
"learning_rate": 8.1e-06,
"loss": 0.0764,
"step": 570
},
{
"epoch": 6.324549237170596,
"eval_accuracy": 0.9837235710809495,
"eval_f1": 0.9106813996316758,
"eval_loss": 0.07210895419120789,
"eval_precision": 0.9027841168416249,
"eval_recall": 0.9187180678123549,
"eval_runtime": 36.5311,
"eval_samples_per_second": 5.447,
"eval_steps_per_second": 1.369,
"step": 570
},
{
"epoch": 6.490984743411928,
"grad_norm": 1.943320631980896,
"learning_rate": 8.050000000000001e-06,
"loss": 0.0813,
"step": 585
},
{
"epoch": 6.490984743411928,
"eval_accuracy": 0.9844265397718055,
"eval_f1": 0.914614499424626,
"eval_loss": 0.06643209606409073,
"eval_precision": 0.906478102189781,
"eval_recall": 0.9228982814677195,
"eval_runtime": 36.3904,
"eval_samples_per_second": 5.468,
"eval_steps_per_second": 1.374,
"step": 585
},
{
"epoch": 6.657420249653259,
"grad_norm": 1.322831392288208,
"learning_rate": 8.000000000000001e-06,
"loss": 0.0791,
"step": 600
},
{
"epoch": 6.657420249653259,
"eval_accuracy": 0.9848050613745741,
"eval_f1": 0.9137614678899082,
"eval_loss": 0.06415116786956787,
"eval_precision": 0.902582691436339,
"eval_recall": 0.9252206223873665,
"eval_runtime": 36.27,
"eval_samples_per_second": 5.487,
"eval_steps_per_second": 1.379,
"step": 600
},
{
"epoch": 6.8238557558945905,
"grad_norm": 1.5891202688217163,
"learning_rate": 7.950000000000002e-06,
"loss": 0.0792,
"step": 615
},
{
"epoch": 6.8238557558945905,
"eval_accuracy": 0.9841020926837182,
"eval_f1": 0.9103795153177869,
"eval_loss": 0.06728328764438629,
"eval_precision": 0.8964430436740207,
"eval_recall": 0.924756154203437,
"eval_runtime": 35.8847,
"eval_samples_per_second": 5.546,
"eval_steps_per_second": 1.393,
"step": 615
},
{
"epoch": 6.990291262135923,
"grad_norm": 3.470646858215332,
"learning_rate": 7.9e-06,
"loss": 0.078,
"step": 630
},
{
"epoch": 6.990291262135923,
"eval_accuracy": 0.9832909749634997,
"eval_f1": 0.9078857142857143,
"eval_loss": 0.06933122873306274,
"eval_precision": 0.8937893789378938,
"eval_recall": 0.92243381328379,
"eval_runtime": 36.1839,
"eval_samples_per_second": 5.5,
"eval_steps_per_second": 1.382,
"step": 630
},
{
"epoch": 7.156726768377254,
"grad_norm": 2.4168286323547363,
"learning_rate": 7.850000000000001e-06,
"loss": 0.0678,
"step": 645
},
{
"epoch": 7.156726768377254,
"eval_accuracy": 0.985237657492024,
"eval_f1": 0.92025664527956,
"eval_loss": 0.06722652167081833,
"eval_precision": 0.9081863410221619,
"eval_recall": 0.9326521133302369,
"eval_runtime": 36.716,
"eval_samples_per_second": 5.42,
"eval_steps_per_second": 1.362,
"step": 645
},
{
"epoch": 7.323162274618586,
"grad_norm": 1.048614501953125,
"learning_rate": 7.800000000000002e-06,
"loss": 0.0685,
"step": 660
},
{
"epoch": 7.323162274618586,
"eval_accuracy": 0.9839939436543557,
"eval_f1": 0.9072635906806761,
"eval_loss": 0.06548429280519485,
"eval_precision": 0.8925842696629214,
"eval_recall": 0.92243381328379,
"eval_runtime": 36.897,
"eval_samples_per_second": 5.393,
"eval_steps_per_second": 1.355,
"step": 660
},
{
"epoch": 7.489597780859917,
"grad_norm": 2.5844979286193848,
"learning_rate": 7.75e-06,
"loss": 0.0555,
"step": 675
},
{
"epoch": 7.489597780859917,
"eval_accuracy": 0.9856161790947926,
"eval_f1": 0.9213016385875836,
"eval_loss": 0.06148982420563698,
"eval_precision": 0.9155963302752294,
"eval_recall": 0.927078495123084,
"eval_runtime": 36.1847,
"eval_samples_per_second": 5.5,
"eval_steps_per_second": 1.382,
"step": 675
},
{
"epoch": 7.656033287101248,
"grad_norm": 1.9488413333892822,
"learning_rate": 7.7e-06,
"loss": 0.07,
"step": 690
},
{
"epoch": 7.656033287101248,
"eval_accuracy": 0.9867517439030985,
"eval_f1": 0.927176659774868,
"eval_loss": 0.058708589524030685,
"eval_precision": 0.9172727272727272,
"eval_recall": 0.9372967951695309,
"eval_runtime": 36.4405,
"eval_samples_per_second": 5.461,
"eval_steps_per_second": 1.372,
"step": 690
},
{
"epoch": 7.82246879334258,
"grad_norm": 1.7437242269515991,
"learning_rate": 7.650000000000001e-06,
"loss": 0.065,
"step": 705
},
{
"epoch": 7.82246879334258,
"eval_accuracy": 0.9874547125939545,
"eval_f1": 0.9303928325292902,
"eval_loss": 0.0557989701628685,
"eval_precision": 0.9204545454545454,
"eval_recall": 0.9405480724570366,
"eval_runtime": 36.0661,
"eval_samples_per_second": 5.518,
"eval_steps_per_second": 1.386,
"step": 705
},
{
"epoch": 7.988904299583911,
"grad_norm": 1.0527422428131104,
"learning_rate": 7.600000000000001e-06,
"loss": 0.0599,
"step": 720
},
{
"epoch": 7.988904299583911,
"eval_accuracy": 0.9878332341967231,
"eval_f1": 0.9342226310947562,
"eval_loss": 0.05789622664451599,
"eval_precision": 0.9252847380410023,
"eval_recall": 0.9433348815606131,
"eval_runtime": 36.3712,
"eval_samples_per_second": 5.471,
"eval_steps_per_second": 1.375,
"step": 720
},
{
"epoch": 8.155339805825243,
"grad_norm": 1.6904972791671753,
"learning_rate": 7.5500000000000006e-06,
"loss": 0.0571,
"step": 735
},
{
"epoch": 8.155339805825243,
"eval_accuracy": 0.9865895203590548,
"eval_f1": 0.9238905495516211,
"eval_loss": 0.059290919452905655,
"eval_precision": 0.9148451730418944,
"eval_recall": 0.9331165815141663,
"eval_runtime": 36.0084,
"eval_samples_per_second": 5.526,
"eval_steps_per_second": 1.389,
"step": 735
},
{
"epoch": 8.321775312066574,
"grad_norm": 1.9831328392028809,
"learning_rate": 7.500000000000001e-06,
"loss": 0.0563,
"step": 750
},
{
"epoch": 8.321775312066574,
"eval_accuracy": 0.9863191477856487,
"eval_f1": 0.9236079153244362,
"eval_loss": 0.06046581640839577,
"eval_precision": 0.9151846785225718,
"eval_recall": 0.9321876451463075,
"eval_runtime": 36.0693,
"eval_samples_per_second": 5.517,
"eval_steps_per_second": 1.386,
"step": 750
},
{
"epoch": 8.488210818307905,
"grad_norm": 2.0379467010498047,
"learning_rate": 7.450000000000001e-06,
"loss": 0.0602,
"step": 765
},
{
"epoch": 8.488210818307905,
"eval_accuracy": 0.9863191477856487,
"eval_f1": 0.927992590877518,
"eval_loss": 0.058113399893045425,
"eval_precision": 0.925207756232687,
"eval_recall": 0.9307942405945193,
"eval_runtime": 35.9178,
"eval_samples_per_second": 5.54,
"eval_steps_per_second": 1.392,
"step": 765
},
{
"epoch": 8.654646324549237,
"grad_norm": 3.095200538635254,
"learning_rate": 7.4e-06,
"loss": 0.0582,
"step": 780
},
{
"epoch": 8.654646324549237,
"eval_accuracy": 0.9872384145352295,
"eval_f1": 0.9288837744533948,
"eval_loss": 0.05814095214009285,
"eval_precision": 0.9206204379562044,
"eval_recall": 0.9372967951695309,
"eval_runtime": 36.2273,
"eval_samples_per_second": 5.493,
"eval_steps_per_second": 1.38,
"step": 780
},
{
"epoch": 8.821081830790568,
"grad_norm": 1.0786473751068115,
"learning_rate": 7.350000000000001e-06,
"loss": 0.0514,
"step": 795
},
{
"epoch": 8.821081830790568,
"eval_accuracy": 0.9872924890499107,
"eval_f1": 0.9313047487321346,
"eval_loss": 0.055727362632751465,
"eval_precision": 0.9244851258581236,
"eval_recall": 0.9382257315373896,
"eval_runtime": 36.0241,
"eval_samples_per_second": 5.524,
"eval_steps_per_second": 1.388,
"step": 795
},
{
"epoch": 8.9875173370319,
"grad_norm": 1.6077920198440552,
"learning_rate": 7.3e-06,
"loss": 0.0467,
"step": 810
},
{
"epoch": 8.9875173370319,
"eval_accuracy": 0.9883199048288541,
"eval_f1": 0.9393661001378043,
"eval_loss": 0.05200658738613129,
"eval_precision": 0.9291231258518855,
"eval_recall": 0.9498374361356247,
"eval_runtime": 35.9411,
"eval_samples_per_second": 5.537,
"eval_steps_per_second": 1.391,
"step": 810
},
{
"epoch": 9.153952843273231,
"grad_norm": 1.601219892501831,
"learning_rate": 7.25e-06,
"loss": 0.0435,
"step": 825
},
{
"epoch": 9.153952843273231,
"eval_accuracy": 0.9879954577407668,
"eval_f1": 0.9336699563920129,
"eval_loss": 0.05260741710662842,
"eval_precision": 0.9228675136116152,
"eval_recall": 0.9447282861124013,
"eval_runtime": 35.7996,
"eval_samples_per_second": 5.559,
"eval_steps_per_second": 1.397,
"step": 825
},
{
"epoch": 9.320388349514563,
"grad_norm": 0.7272451519966125,
"learning_rate": 7.2000000000000005e-06,
"loss": 0.0531,
"step": 840
},
{
"epoch": 9.320388349514563,
"eval_accuracy": 0.9883739793435354,
"eval_f1": 0.9344978165938865,
"eval_loss": 0.05022520199418068,
"eval_precision": 0.9249317561419472,
"eval_recall": 0.9442638179284719,
"eval_runtime": 36.0285,
"eval_samples_per_second": 5.523,
"eval_steps_per_second": 1.388,
"step": 840
},
{
"epoch": 9.486823855755894,
"grad_norm": 0.9556881189346313,
"learning_rate": 7.15e-06,
"loss": 0.0502,
"step": 855
},
{
"epoch": 9.486823855755894,
"eval_accuracy": 0.9874006380792733,
"eval_f1": 0.9309240622140896,
"eval_loss": 0.05446859449148178,
"eval_precision": 0.9170797656602073,
"eval_recall": 0.9451927542963307,
"eval_runtime": 36.0609,
"eval_samples_per_second": 5.518,
"eval_steps_per_second": 1.387,
"step": 855
},
{
"epoch": 9.653259361997225,
"grad_norm": 1.0404924154281616,
"learning_rate": 7.100000000000001e-06,
"loss": 0.0377,
"step": 870
},
{
"epoch": 9.653259361997225,
"eval_accuracy": 0.9850754339479804,
"eval_f1": 0.9220571428571429,
"eval_loss": 0.06175297126173973,
"eval_precision": 0.9077407740774077,
"eval_recall": 0.9368323269856015,
"eval_runtime": 36.326,
"eval_samples_per_second": 5.478,
"eval_steps_per_second": 1.376,
"step": 870
},
{
"epoch": 9.819694868238557,
"grad_norm": 1.1249316930770874,
"learning_rate": 7.05e-06,
"loss": 0.0416,
"step": 885
},
{
"epoch": 9.819694868238557,
"eval_accuracy": 0.9881036067701292,
"eval_f1": 0.9328719723183392,
"eval_loss": 0.05493583530187607,
"eval_precision": 0.9266727772685609,
"eval_recall": 0.9391546679052485,
"eval_runtime": 36.1852,
"eval_samples_per_second": 5.499,
"eval_steps_per_second": 1.382,
"step": 885
},
{
"epoch": 9.986130374479888,
"grad_norm": 1.0846829414367676,
"learning_rate": 7e-06,
"loss": 0.044,
"step": 900
},
{
"epoch": 9.986130374479888,
"eval_accuracy": 0.9884280538582166,
"eval_f1": 0.9420457169244978,
"eval_loss": 0.05289188027381897,
"eval_precision": 0.9366391184573003,
"eval_recall": 0.9475150952159777,
"eval_runtime": 36.0505,
"eval_samples_per_second": 5.52,
"eval_steps_per_second": 1.387,
"step": 900
},
{
"epoch": 10.152565880721221,
"grad_norm": 0.8957504630088806,
"learning_rate": 6.95e-06,
"loss": 0.0383,
"step": 915
},
{
"epoch": 10.152565880721221,
"eval_accuracy": 0.9889147244903477,
"eval_f1": 0.9403088269186448,
"eval_loss": 0.048978183418512344,
"eval_precision": 0.9332113449222323,
"eval_recall": 0.9475150952159777,
"eval_runtime": 36.1551,
"eval_samples_per_second": 5.504,
"eval_steps_per_second": 1.383,
"step": 915
},
{
"epoch": 10.319001386962553,
"grad_norm": 1.6940028667449951,
"learning_rate": 6.9e-06,
"loss": 0.0454,
"step": 930
},
{
"epoch": 10.319001386962553,
"eval_accuracy": 0.988536202887579,
"eval_f1": 0.9366100137804317,
"eval_loss": 0.05073446407914162,
"eval_precision": 0.9263970922308041,
"eval_recall": 0.9470506270320483,
"eval_runtime": 36.1642,
"eval_samples_per_second": 5.503,
"eval_steps_per_second": 1.383,
"step": 930
},
{
"epoch": 10.485436893203884,
"grad_norm": 0.9225968718528748,
"learning_rate": 6.850000000000001e-06,
"loss": 0.0416,
"step": 945
},
{
"epoch": 10.485436893203884,
"eval_accuracy": 0.9891310225490726,
"eval_f1": 0.9430481899930827,
"eval_loss": 0.046711865812540054,
"eval_precision": 0.9363553113553114,
"eval_recall": 0.9498374361356247,
"eval_runtime": 36.5741,
"eval_samples_per_second": 5.441,
"eval_steps_per_second": 1.367,
"step": 945
},
{
"epoch": 10.651872399445216,
"grad_norm": 2.7210068702697754,
"learning_rate": 6.800000000000001e-06,
"loss": 0.0403,
"step": 960
},
{
"epoch": 10.651872399445216,
"eval_accuracy": 0.9886443519169416,
"eval_f1": 0.9384650841207652,
"eval_loss": 0.04987097531557083,
"eval_precision": 0.9313815187557182,
"eval_recall": 0.9456572224802601,
"eval_runtime": 36.5567,
"eval_samples_per_second": 5.444,
"eval_steps_per_second": 1.368,
"step": 960
},
{
"epoch": 10.818307905686547,
"grad_norm": 1.160333275794983,
"learning_rate": 6.750000000000001e-06,
"loss": 0.0354,
"step": 975
},
{
"epoch": 10.818307905686547,
"eval_accuracy": 0.9882658303141729,
"eval_f1": 0.9354171454837968,
"eval_loss": 0.05233873799443245,
"eval_precision": 0.9258416742493175,
"eval_recall": 0.9451927542963307,
"eval_runtime": 36.44,
"eval_samples_per_second": 5.461,
"eval_steps_per_second": 1.372,
"step": 975
},
{
"epoch": 10.984743411927878,
"grad_norm": 0.8807191848754883,
"learning_rate": 6.700000000000001e-06,
"loss": 0.0338,
"step": 990
},
{
"epoch": 10.984743411927878,
"eval_accuracy": 0.9879954577407668,
"eval_f1": 0.9318025258323767,
"eval_loss": 0.052071038633584976,
"eval_precision": 0.9214350590372389,
"eval_recall": 0.9424059451927543,
"eval_runtime": 36.6322,
"eval_samples_per_second": 5.432,
"eval_steps_per_second": 1.365,
"step": 990
},
{
"epoch": 11.15117891816921,
"grad_norm": 1.1557176113128662,
"learning_rate": 6.650000000000001e-06,
"loss": 0.0347,
"step": 1005
},
{
"epoch": 11.15117891816921,
"eval_accuracy": 0.988049532255448,
"eval_f1": 0.9353507565337001,
"eval_loss": 0.053912434726953506,
"eval_precision": 0.9234947940244455,
"eval_recall": 0.9475150952159777,
"eval_runtime": 36.5986,
"eval_samples_per_second": 5.437,
"eval_steps_per_second": 1.366,
"step": 1005
},
{
"epoch": 11.317614424410541,
"grad_norm": 1.668484091758728,
"learning_rate": 6.600000000000001e-06,
"loss": 0.0364,
"step": 1020
},
{
"epoch": 11.317614424410541,
"eval_accuracy": 0.9870761909911858,
"eval_f1": 0.9334552938486165,
"eval_loss": 0.055973075330257416,
"eval_precision": 0.9193693693693694,
"eval_recall": 0.9479795633999071,
"eval_runtime": 36.6625,
"eval_samples_per_second": 5.428,
"eval_steps_per_second": 1.364,
"step": 1020
},
{
"epoch": 11.484049930651873,
"grad_norm": 2.5720293521881104,
"learning_rate": 6.550000000000001e-06,
"loss": 0.0363,
"step": 1035
},
{
"epoch": 11.484049930651873,
"eval_accuracy": 0.9889147244903477,
"eval_f1": 0.9381751321535279,
"eval_loss": 0.050925422459840775,
"eval_precision": 0.9285714285714286,
"eval_recall": 0.9479795633999071,
"eval_runtime": 36.4069,
"eval_samples_per_second": 5.466,
"eval_steps_per_second": 1.373,
"step": 1035
},
{
"epoch": 11.650485436893204,
"grad_norm": 2.5676207542419434,
"learning_rate": 6.5000000000000004e-06,
"loss": 0.0308,
"step": 1050
},
{
"epoch": 11.650485436893204,
"eval_accuracy": 0.9893473206077975,
"eval_f1": 0.94362292051756,
"eval_loss": 0.04982053115963936,
"eval_precision": 0.9388505747126437,
"eval_recall": 0.9484440315838365,
"eval_runtime": 36.3679,
"eval_samples_per_second": 5.472,
"eval_steps_per_second": 1.375,
"step": 1050
},
{
"epoch": 11.816920943134535,
"grad_norm": 0.9586185812950134,
"learning_rate": 6.450000000000001e-06,
"loss": 0.032,
"step": 1065
},
{
"epoch": 11.816920943134535,
"eval_accuracy": 0.9891310225490726,
"eval_f1": 0.9403330249768733,
"eval_loss": 0.04908496141433716,
"eval_precision": 0.9364348226623675,
"eval_recall": 0.9442638179284719,
"eval_runtime": 35.9979,
"eval_samples_per_second": 5.528,
"eval_steps_per_second": 1.389,
"step": 1065
},
{
"epoch": 11.983356449375867,
"grad_norm": 1.067063331604004,
"learning_rate": 6.4000000000000006e-06,
"loss": 0.0331,
"step": 1080
},
{
"epoch": 11.983356449375867,
"eval_accuracy": 0.9891850970637539,
"eval_f1": 0.940768162887552,
"eval_loss": 0.0454898327589035,
"eval_precision": 0.9372982941447672,
"eval_recall": 0.9442638179284719,
"eval_runtime": 36.1674,
"eval_samples_per_second": 5.502,
"eval_steps_per_second": 1.382,
"step": 1080
},
{
"epoch": 12.149791955617198,
"grad_norm": 1.4905815124511719,
"learning_rate": 6.35e-06,
"loss": 0.0301,
"step": 1095
},
{
"epoch": 12.149791955617198,
"eval_accuracy": 0.9891850970637539,
"eval_f1": 0.9423431734317342,
"eval_loss": 0.04859260097146034,
"eval_precision": 0.9358680714612918,
"eval_recall": 0.9489084997677659,
"eval_runtime": 36.286,
"eval_samples_per_second": 5.484,
"eval_steps_per_second": 1.378,
"step": 1095
},
{
"epoch": 12.31622746185853,
"grad_norm": 1.3888496160507202,
"learning_rate": 6.300000000000001e-06,
"loss": 0.0308,
"step": 1110
},
{
"epoch": 12.31622746185853,
"eval_accuracy": 0.9891310225490726,
"eval_f1": 0.9413388543823326,
"eval_loss": 0.051349248737096786,
"eval_precision": 0.9325432999088423,
"eval_recall": 0.9503019043195541,
"eval_runtime": 36.2143,
"eval_samples_per_second": 5.495,
"eval_steps_per_second": 1.381,
"step": 1110
},
{
"epoch": 12.482662968099861,
"grad_norm": 0.5457278490066528,
"learning_rate": 6.25e-06,
"loss": 0.0253,
"step": 1125
},
{
"epoch": 12.482662968099861,
"eval_accuracy": 0.9891850970637539,
"eval_f1": 0.939825447864033,
"eval_loss": 0.05103699862957001,
"eval_precision": 0.9295774647887324,
"eval_recall": 0.9503019043195541,
"eval_runtime": 36.4491,
"eval_samples_per_second": 5.46,
"eval_steps_per_second": 1.372,
"step": 1125
},
{
"epoch": 12.649098474341192,
"grad_norm": 1.106314778327942,
"learning_rate": 6.200000000000001e-06,
"loss": 0.0301,
"step": 1140
},
{
"epoch": 12.649098474341192,
"eval_accuracy": 0.9886443519169416,
"eval_f1": 0.9397424103035878,
"eval_loss": 0.053277622908353806,
"eval_precision": 0.9307517084282461,
"eval_recall": 0.9489084997677659,
"eval_runtime": 36.4299,
"eval_samples_per_second": 5.463,
"eval_steps_per_second": 1.372,
"step": 1140
},
{
"epoch": 12.815533980582524,
"grad_norm": 0.9172839522361755,
"learning_rate": 6.15e-06,
"loss": 0.0328,
"step": 1155
},
{
"epoch": 12.815533980582524,
"eval_accuracy": 0.9884821283728978,
"eval_f1": 0.9364348226623675,
"eval_loss": 0.0548846460878849,
"eval_precision": 0.9287345820009136,
"eval_recall": 0.9442638179284719,
"eval_runtime": 36.3929,
"eval_samples_per_second": 5.468,
"eval_steps_per_second": 1.374,
"step": 1155
},
{
"epoch": 12.981969486823855,
"grad_norm": 1.9091347455978394,
"learning_rate": 6.1e-06,
"loss": 0.0298,
"step": 1170
},
{
"epoch": 12.981969486823855,
"eval_accuracy": 0.98945546963716,
"eval_f1": 0.9450092421441775,
"eval_loss": 0.05042650178074837,
"eval_precision": 0.9402298850574713,
"eval_recall": 0.9498374361356247,
"eval_runtime": 35.8371,
"eval_samples_per_second": 5.553,
"eval_steps_per_second": 1.395,
"step": 1170
},
{
"epoch": 13.148404993065187,
"grad_norm": 1.2674860954284668,
"learning_rate": 6.0500000000000005e-06,
"loss": 0.0256,
"step": 1185
},
{
"epoch": 13.148404993065187,
"eval_accuracy": 0.988752500946304,
"eval_f1": 0.9386716037954178,
"eval_loss": 0.051467474550008774,
"eval_precision": 0.9354243542435424,
"eval_recall": 0.9419414770088249,
"eval_runtime": 36.0333,
"eval_samples_per_second": 5.523,
"eval_steps_per_second": 1.388,
"step": 1185
},
{
"epoch": 13.314840499306518,
"grad_norm": 1.406807780265808,
"learning_rate": 6e-06,
"loss": 0.0313,
"step": 1200
},
{
"epoch": 13.314840499306518,
"eval_accuracy": 0.9905369599307846,
"eval_f1": 0.9480968858131489,
"eval_loss": 0.048274096101522446,
"eval_precision": 0.9417965169569202,
"eval_recall": 0.9544821179749187,
"eval_runtime": 35.8422,
"eval_samples_per_second": 5.552,
"eval_steps_per_second": 1.395,
"step": 1200
},
{
"epoch": 13.48127600554785,
"grad_norm": 0.5426374673843384,
"learning_rate": 5.950000000000001e-06,
"loss": 0.022,
"step": 1215
},
{
"epoch": 13.48127600554785,
"eval_accuracy": 0.9898880657546099,
"eval_f1": 0.9445339470655927,
"eval_loss": 0.0463298000395298,
"eval_precision": 0.9361313868613139,
"eval_recall": 0.9530887134231305,
"eval_runtime": 36.2558,
"eval_samples_per_second": 5.489,
"eval_steps_per_second": 1.379,
"step": 1215
},
{
"epoch": 13.647711511789181,
"grad_norm": 2.050182342529297,
"learning_rate": 5.9e-06,
"loss": 0.0245,
"step": 1230
},
{
"epoch": 13.647711511789181,
"eval_accuracy": 0.9893473206077975,
"eval_f1": 0.9430219146482123,
"eval_loss": 0.04942420497536659,
"eval_precision": 0.9367552703941339,
"eval_recall": 0.9493729679516953,
"eval_runtime": 36.4711,
"eval_samples_per_second": 5.456,
"eval_steps_per_second": 1.371,
"step": 1230
},
{
"epoch": 13.814147018030512,
"grad_norm": 1.7617555856704712,
"learning_rate": 5.85e-06,
"loss": 0.0251,
"step": 1245
},
{
"epoch": 13.814147018030512,
"eval_accuracy": 0.9897799167252473,
"eval_f1": 0.9467128027681662,
"eval_loss": 0.049306854605674744,
"eval_precision": 0.9404216315307058,
"eval_recall": 0.9530887134231305,
"eval_runtime": 36.1814,
"eval_samples_per_second": 5.5,
"eval_steps_per_second": 1.382,
"step": 1245
},
{
"epoch": 13.980582524271846,
"grad_norm": 1.183014154434204,
"learning_rate": 5.8e-06,
"loss": 0.0259,
"step": 1260
},
{
"epoch": 13.980582524271846,
"eval_accuracy": 0.98945546963716,
"eval_f1": 0.9453539312889093,
"eval_loss": 0.05114530399441719,
"eval_precision": 0.9386446886446886,
"eval_recall": 0.9521597770552717,
"eval_runtime": 36.0831,
"eval_samples_per_second": 5.515,
"eval_steps_per_second": 1.386,
"step": 1260
},
{
"epoch": 14.147018030513177,
"grad_norm": 0.6956959962844849,
"learning_rate": 5.75e-06,
"loss": 0.03,
"step": 1275
},
{
"epoch": 14.147018030513177,
"eval_accuracy": 0.9888606499756665,
"eval_f1": 0.9399815327793166,
"eval_loss": 0.053482603281736374,
"eval_precision": 0.9343735658558971,
"eval_recall": 0.9456572224802601,
"eval_runtime": 35.8745,
"eval_samples_per_second": 5.547,
"eval_steps_per_second": 1.394,
"step": 1275
},
{
"epoch": 14.313453536754508,
"grad_norm": 1.2064058780670166,
"learning_rate": 5.7e-06,
"loss": 0.0192,
"step": 1290
},
{
"epoch": 14.313453536754508,
"eval_accuracy": 0.9898880657546099,
"eval_f1": 0.9460772969220087,
"eval_loss": 0.049094799906015396,
"eval_precision": 0.9428044280442804,
"eval_recall": 0.9493729679516953,
"eval_runtime": 35.7923,
"eval_samples_per_second": 5.56,
"eval_steps_per_second": 1.397,
"step": 1290
},
{
"epoch": 14.47988904299584,
"grad_norm": 1.727489948272705,
"learning_rate": 5.65e-06,
"loss": 0.0267,
"step": 1305
},
{
"epoch": 14.47988904299584,
"eval_accuracy": 0.9901043638133348,
"eval_f1": 0.9500693481276006,
"eval_loss": 0.04895344376564026,
"eval_precision": 0.9456971928209849,
"eval_recall": 0.9544821179749187,
"eval_runtime": 36.349,
"eval_samples_per_second": 5.475,
"eval_steps_per_second": 1.376,
"step": 1305
},
{
"epoch": 14.646324549237171,
"grad_norm": 0.6142871379852295,
"learning_rate": 5.600000000000001e-06,
"loss": 0.0241,
"step": 1320
},
{
"epoch": 14.646324549237171,
"eval_accuracy": 0.9899421402692911,
"eval_f1": 0.948729792147806,
"eval_loss": 0.050602879375219345,
"eval_precision": 0.9435002296738632,
"eval_recall": 0.9540176497909894,
"eval_runtime": 36.205,
"eval_samples_per_second": 5.496,
"eval_steps_per_second": 1.381,
"step": 1320
},
{
"epoch": 14.812760055478503,
"grad_norm": 1.6362483501434326,
"learning_rate": 5.550000000000001e-06,
"loss": 0.0211,
"step": 1335
},
{
"epoch": 14.812760055478503,
"eval_accuracy": 0.9903206618720597,
"eval_f1": 0.9491682070240296,
"eval_loss": 0.050954435020685196,
"eval_precision": 0.944367816091954,
"eval_recall": 0.9540176497909894,
"eval_runtime": 36.469,
"eval_samples_per_second": 5.457,
"eval_steps_per_second": 1.371,
"step": 1335
},
{
"epoch": 14.979195561719834,
"grad_norm": 0.9267581105232239,
"learning_rate": 5.500000000000001e-06,
"loss": 0.0171,
"step": 1350
},
{
"epoch": 14.979195561719834,
"eval_accuracy": 0.9897799167252473,
"eval_f1": 0.9474412171507607,
"eval_loss": 0.04994847625494003,
"eval_precision": 0.9405034324942791,
"eval_recall": 0.9544821179749187,
"eval_runtime": 36.7159,
"eval_samples_per_second": 5.42,
"eval_steps_per_second": 1.362,
"step": 1350
},
{
"epoch": 15.145631067961165,
"grad_norm": 0.6142176389694214,
"learning_rate": 5.450000000000001e-06,
"loss": 0.0226,
"step": 1365
},
{
"epoch": 15.145631067961165,
"eval_accuracy": 0.9894013951224788,
"eval_f1": 0.9452369995398067,
"eval_loss": 0.05113999918103218,
"eval_precision": 0.9366165070679434,
"eval_recall": 0.9540176497909894,
"eval_runtime": 36.247,
"eval_samples_per_second": 5.49,
"eval_steps_per_second": 1.379,
"step": 1365
},
{
"epoch": 15.312066574202497,
"grad_norm": 0.46341672539711,
"learning_rate": 5.400000000000001e-06,
"loss": 0.024,
"step": 1380
},
{
"epoch": 15.312066574202497,
"eval_accuracy": 0.9899421402692911,
"eval_f1": 0.9501385041551247,
"eval_loss": 0.04835886508226395,
"eval_precision": 0.9444699403396053,
"eval_recall": 0.9558755225267069,
"eval_runtime": 35.8678,
"eval_samples_per_second": 5.548,
"eval_steps_per_second": 1.394,
"step": 1380
},
{
"epoch": 15.478502080443828,
"grad_norm": 1.446049690246582,
"learning_rate": 5.3500000000000004e-06,
"loss": 0.018,
"step": 1395
},
{
"epoch": 15.478502080443828,
"eval_accuracy": 0.9903206618720597,
"eval_f1": 0.9492703266157054,
"eval_loss": 0.04823274910449982,
"eval_precision": 0.9468576709796673,
"eval_recall": 0.9516953088713423,
"eval_runtime": 35.9765,
"eval_samples_per_second": 5.531,
"eval_steps_per_second": 1.39,
"step": 1395
},
{
"epoch": 15.64493758668516,
"grad_norm": 0.7485630512237549,
"learning_rate": 5.300000000000001e-06,
"loss": 0.0191,
"step": 1410
},
{
"epoch": 15.64493758668516,
"eval_accuracy": 0.9899421402692911,
"eval_f1": 0.947709393799167,
"eval_loss": 0.04913439229130745,
"eval_precision": 0.9442139234670355,
"eval_recall": 0.9512308406874129,
"eval_runtime": 36.5589,
"eval_samples_per_second": 5.443,
"eval_steps_per_second": 1.368,
"step": 1410
},
{
"epoch": 15.811373092926491,
"grad_norm": 0.8376514911651611,
"learning_rate": 5.2500000000000006e-06,
"loss": 0.0203,
"step": 1425
},
{
"epoch": 15.811373092926491,
"eval_accuracy": 0.9912399286216407,
"eval_f1": 0.9531974050046339,
"eval_loss": 0.04510456323623657,
"eval_precision": 0.9509939898289412,
"eval_recall": 0.9554110543427775,
"eval_runtime": 36.8157,
"eval_samples_per_second": 5.405,
"eval_steps_per_second": 1.358,
"step": 1425
},
{
"epoch": 15.977808599167822,
"grad_norm": 1.1797449588775635,
"learning_rate": 5.2e-06,
"loss": 0.0198,
"step": 1440
},
{
"epoch": 15.977808599167822,
"eval_accuracy": 0.9911317795922782,
"eval_f1": 0.952292728114868,
"eval_loss": 0.04465332254767418,
"eval_precision": 0.9496535796766744,
"eval_recall": 0.9549465861588481,
"eval_runtime": 36.3506,
"eval_samples_per_second": 5.474,
"eval_steps_per_second": 1.375,
"step": 1440
},
{
"epoch": 16.144244105409154,
"grad_norm": 2.32300066947937,
"learning_rate": 5.150000000000001e-06,
"loss": 0.0167,
"step": 1455
},
{
"epoch": 16.144244105409154,
"eval_accuracy": 0.9909154815335532,
"eval_f1": 0.9513663733209818,
"eval_loss": 0.044419851154088974,
"eval_precision": 0.948729792147806,
"eval_recall": 0.9540176497909894,
"eval_runtime": 36.4511,
"eval_samples_per_second": 5.459,
"eval_steps_per_second": 1.372,
"step": 1455
},
{
"epoch": 16.310679611650485,
"grad_norm": 1.4079307317733765,
"learning_rate": 5.1e-06,
"loss": 0.0178,
"step": 1470
},
{
"epoch": 16.310679611650485,
"eval_accuracy": 0.9891850970637539,
"eval_f1": 0.9448673587081892,
"eval_loss": 0.05134458467364311,
"eval_precision": 0.9385884509624198,
"eval_recall": 0.9512308406874129,
"eval_runtime": 35.9882,
"eval_samples_per_second": 5.53,
"eval_steps_per_second": 1.389,
"step": 1470
},
{
"epoch": 16.477115117891817,
"grad_norm": 1.1276496648788452,
"learning_rate": 5.050000000000001e-06,
"loss": 0.024,
"step": 1485
},
{
"epoch": 16.477115117891817,
"eval_accuracy": 0.9899421402692911,
"eval_f1": 0.9482678983833718,
"eval_loss": 0.0502447672188282,
"eval_precision": 0.9430408819476344,
"eval_recall": 0.9535531816070599,
"eval_runtime": 36.2001,
"eval_samples_per_second": 5.497,
"eval_steps_per_second": 1.381,
"step": 1485
},
{
"epoch": 16.643550624133148,
"grad_norm": 1.1420115232467651,
"learning_rate": 5e-06,
"loss": 0.0206,
"step": 1500
},
{
"epoch": 16.643550624133148,
"eval_accuracy": 0.9907532579895095,
"eval_f1": 0.9513888888888888,
"eval_loss": 0.045851416885852814,
"eval_precision": 0.9483156437471159,
"eval_recall": 0.9544821179749187,
"eval_runtime": 36.0375,
"eval_samples_per_second": 5.522,
"eval_steps_per_second": 1.387,
"step": 1500
},
{
"epoch": 16.80998613037448,
"grad_norm": 0.6803048849105835,
"learning_rate": 4.95e-06,
"loss": 0.0188,
"step": 1515
},
{
"epoch": 16.80998613037448,
"eval_accuracy": 0.9906451089601471,
"eval_f1": 0.9507058551261283,
"eval_loss": 0.04693201929330826,
"eval_precision": 0.9474169741697417,
"eval_recall": 0.9540176497909894,
"eval_runtime": 36.4292,
"eval_samples_per_second": 5.463,
"eval_steps_per_second": 1.373,
"step": 1515
},
{
"epoch": 16.97642163661581,
"grad_norm": 0.6494084000587463,
"learning_rate": 4.9000000000000005e-06,
"loss": 0.016,
"step": 1530
},
{
"epoch": 16.97642163661581,
"eval_accuracy": 0.9905910344454658,
"eval_f1": 0.9524469067405354,
"eval_loss": 0.04632224142551422,
"eval_precision": 0.9467645709040844,
"eval_recall": 0.9581978634463539,
"eval_runtime": 36.8269,
"eval_samples_per_second": 5.404,
"eval_steps_per_second": 1.358,
"step": 1530
},
{
"epoch": 17.142857142857142,
"grad_norm": 0.9313808083534241,
"learning_rate": 4.85e-06,
"loss": 0.0161,
"step": 1545
},
{
"epoch": 17.142857142857142,
"eval_accuracy": 0.991077705077597,
"eval_f1": 0.9555966697502312,
"eval_loss": 0.045460253953933716,
"eval_precision": 0.9516351911561493,
"eval_recall": 0.9595912679981421,
"eval_runtime": 36.5267,
"eval_samples_per_second": 5.448,
"eval_steps_per_second": 1.369,
"step": 1545
},
{
"epoch": 17.309292649098474,
"grad_norm": 0.6977990865707397,
"learning_rate": 4.800000000000001e-06,
"loss": 0.0135,
"step": 1560
},
{
"epoch": 17.309292649098474,
"eval_accuracy": 0.9909154815335532,
"eval_f1": 0.9548297428769978,
"eval_loss": 0.04745380952954292,
"eval_precision": 0.9524029574861368,
"eval_recall": 0.9572689270784951,
"eval_runtime": 36.396,
"eval_samples_per_second": 5.468,
"eval_steps_per_second": 1.374,
"step": 1560
},
{
"epoch": 17.475728155339805,
"grad_norm": 0.7467624545097351,
"learning_rate": 4.75e-06,
"loss": 0.0148,
"step": 1575
},
{
"epoch": 17.475728155339805,
"eval_accuracy": 0.9904828854161034,
"eval_f1": 0.9491916859122401,
"eval_loss": 0.047850631177425385,
"eval_precision": 0.9439595774000918,
"eval_recall": 0.9544821179749187,
"eval_runtime": 36.2126,
"eval_samples_per_second": 5.495,
"eval_steps_per_second": 1.381,
"step": 1575
},
{
"epoch": 17.642163661581137,
"grad_norm": 0.7804221510887146,
"learning_rate": 4.7e-06,
"loss": 0.0173,
"step": 1590
},
{
"epoch": 17.642163661581137,
"eval_accuracy": 0.9915103011950468,
"eval_f1": 0.9571858366118954,
"eval_loss": 0.04551170393824577,
"eval_precision": 0.9538745387453874,
"eval_recall": 0.9605202043660009,
"eval_runtime": 36.006,
"eval_samples_per_second": 5.527,
"eval_steps_per_second": 1.389,
"step": 1590
},
{
"epoch": 17.808599167822468,
"grad_norm": 1.0907295942306519,
"learning_rate": 4.65e-06,
"loss": 0.0173,
"step": 1605
},
{
"epoch": 17.808599167822468,
"eval_accuracy": 0.9913480776510031,
"eval_f1": 0.9514338575393155,
"eval_loss": 0.04557771980762482,
"eval_precision": 0.9474896361123906,
"eval_recall": 0.9554110543427775,
"eval_runtime": 36.2064,
"eval_samples_per_second": 5.496,
"eval_steps_per_second": 1.381,
"step": 1605
},
{
"epoch": 17.9750346740638,
"grad_norm": 1.295432209968567,
"learning_rate": 4.600000000000001e-06,
"loss": 0.0185,
"step": 1620
},
{
"epoch": 17.9750346740638,
"eval_accuracy": 0.9907532579895095,
"eval_f1": 0.9537465309898243,
"eval_loss": 0.04614636301994324,
"eval_precision": 0.9497927222478121,
"eval_recall": 0.9577333952624245,
"eval_runtime": 36.269,
"eval_samples_per_second": 5.487,
"eval_steps_per_second": 1.379,
"step": 1620
},
{
"epoch": 18.14147018030513,
"grad_norm": 1.0728676319122314,
"learning_rate": 4.5500000000000005e-06,
"loss": 0.0153,
"step": 1635
},
{
"epoch": 18.14147018030513,
"eval_accuracy": 0.991077705077597,
"eval_f1": 0.9547553093259464,
"eval_loss": 0.04719853028655052,
"eval_precision": 0.9490592014685636,
"eval_recall": 0.9605202043660009,
"eval_runtime": 36.7621,
"eval_samples_per_second": 5.413,
"eval_steps_per_second": 1.36,
"step": 1635
},
{
"epoch": 18.307905686546462,
"grad_norm": 0.848417341709137,
"learning_rate": 4.5e-06,
"loss": 0.0148,
"step": 1650
},
{
"epoch": 18.307905686546462,
"eval_accuracy": 0.9913480776510031,
"eval_f1": 0.9546716003700277,
"eval_loss": 0.04460978880524635,
"eval_precision": 0.9507139567019807,
"eval_recall": 0.9586623316302834,
"eval_runtime": 36.6036,
"eval_samples_per_second": 5.437,
"eval_steps_per_second": 1.366,
"step": 1650
},
{
"epoch": 18.474341192787794,
"grad_norm": 0.8914014101028442,
"learning_rate": 4.450000000000001e-06,
"loss": 0.0136,
"step": 1665
},
{
"epoch": 18.474341192787794,
"eval_accuracy": 0.9914021521656843,
"eval_f1": 0.9542936288088641,
"eval_loss": 0.044093821197748184,
"eval_precision": 0.9486002753556677,
"eval_recall": 0.9600557361820715,
"eval_runtime": 36.4626,
"eval_samples_per_second": 5.458,
"eval_steps_per_second": 1.371,
"step": 1665
},
{
"epoch": 18.640776699029125,
"grad_norm": 1.768336534500122,
"learning_rate": 4.4e-06,
"loss": 0.0185,
"step": 1680
},
{
"epoch": 18.640776699029125,
"eval_accuracy": 0.9914562266803656,
"eval_f1": 0.9550509731232623,
"eval_loss": 0.047818973660469055,
"eval_precision": 0.9528432732316228,
"eval_recall": 0.9572689270784951,
"eval_runtime": 35.9606,
"eval_samples_per_second": 5.534,
"eval_steps_per_second": 1.39,
"step": 1680
},
{
"epoch": 18.807212205270456,
"grad_norm": 0.8891735672950745,
"learning_rate": 4.353333333333334e-06,
"loss": 0.0147,
"step": 1695
},
{
"epoch": 18.807212205270456,
"eval_accuracy": 0.9911858541069594,
"eval_f1": 0.9582660825455385,
"eval_loss": 0.04927229881286621,
"eval_precision": 0.9514652014652014,
"eval_recall": 0.965164886205295,
"eval_runtime": 36.0431,
"eval_samples_per_second": 5.521,
"eval_steps_per_second": 1.387,
"step": 1695
},
{
"epoch": 18.973647711511788,
"grad_norm": 0.860618531703949,
"learning_rate": 4.303333333333334e-06,
"loss": 0.0156,
"step": 1710
},
{
"epoch": 18.973647711511788,
"eval_accuracy": 0.9902665873573785,
"eval_f1": 0.9491916859122401,
"eval_loss": 0.05092372000217438,
"eval_precision": 0.9439595774000918,
"eval_recall": 0.9544821179749187,
"eval_runtime": 36.549,
"eval_samples_per_second": 5.445,
"eval_steps_per_second": 1.368,
"step": 1710
},
{
"epoch": 19.14008321775312,
"grad_norm": 0.4298454821109772,
"learning_rate": 4.253333333333334e-06,
"loss": 0.0113,
"step": 1725
},
{
"epoch": 19.14008321775312,
"eval_accuracy": 0.9911317795922782,
"eval_f1": 0.9566024599675098,
"eval_loss": 0.046022918075323105,
"eval_precision": 0.9559369202226345,
"eval_recall": 0.9572689270784951,
"eval_runtime": 36.802,
"eval_samples_per_second": 5.407,
"eval_steps_per_second": 1.359,
"step": 1725
},
{
"epoch": 19.30651872399445,
"grad_norm": 0.7119155526161194,
"learning_rate": 4.2033333333333335e-06,
"loss": 0.014,
"step": 1740
},
{
"epoch": 19.30651872399445,
"eval_accuracy": 0.9904828854161034,
"eval_f1": 0.948220064724919,
"eval_loss": 0.04928451031446457,
"eval_precision": 0.9438564196962724,
"eval_recall": 0.9526242452392011,
"eval_runtime": 36.4604,
"eval_samples_per_second": 5.458,
"eval_steps_per_second": 1.371,
"step": 1740
},
{
"epoch": 19.472954230235782,
"grad_norm": 0.6270649433135986,
"learning_rate": 4.153333333333334e-06,
"loss": 0.0147,
"step": 1755
},
{
"epoch": 19.472954230235782,
"eval_accuracy": 0.9906451089601471,
"eval_f1": 0.9521608504737693,
"eval_loss": 0.04984944686293602,
"eval_precision": 0.9475620975160993,
"eval_recall": 0.9568044588945657,
"eval_runtime": 36.1824,
"eval_samples_per_second": 5.5,
"eval_steps_per_second": 1.382,
"step": 1755
},
{
"epoch": 19.639389736477114,
"grad_norm": 0.9536636471748352,
"learning_rate": 4.1033333333333336e-06,
"loss": 0.0126,
"step": 1770
},
{
"epoch": 19.639389736477114,
"eval_accuracy": 0.9905910344454658,
"eval_f1": 0.9502199583236861,
"eval_loss": 0.04928808659315109,
"eval_precision": 0.9473684210526315,
"eval_recall": 0.9530887134231305,
"eval_runtime": 36.346,
"eval_samples_per_second": 5.475,
"eval_steps_per_second": 1.376,
"step": 1770
},
{
"epoch": 19.805825242718445,
"grad_norm": 2.24277925491333,
"learning_rate": 4.053333333333333e-06,
"loss": 0.0167,
"step": 1785
},
{
"epoch": 19.805825242718445,
"eval_accuracy": 0.9903747363867409,
"eval_f1": 0.9519852262234534,
"eval_loss": 0.04912427067756653,
"eval_precision": 0.9463056447911886,
"eval_recall": 0.9577333952624245,
"eval_runtime": 36.3829,
"eval_samples_per_second": 5.47,
"eval_steps_per_second": 1.374,
"step": 1785
},
{
"epoch": 19.972260748959776,
"grad_norm": 1.1929985284805298,
"learning_rate": 4.003333333333334e-06,
"loss": 0.0126,
"step": 1800
},
{
"epoch": 19.972260748959776,
"eval_accuracy": 0.9907532579895095,
"eval_f1": 0.9515867500579105,
"eval_loss": 0.04741891101002693,
"eval_precision": 0.9491682070240296,
"eval_recall": 0.9540176497909894,
"eval_runtime": 36.3224,
"eval_samples_per_second": 5.479,
"eval_steps_per_second": 1.377,
"step": 1800
},
{
"epoch": 20.13869625520111,
"grad_norm": 0.5980396866798401,
"learning_rate": 3.953333333333333e-06,
"loss": 0.0107,
"step": 1815
},
{
"epoch": 20.13869625520111,
"eval_accuracy": 0.9914021521656843,
"eval_f1": 0.9550717924965262,
"eval_loss": 0.04617602005600929,
"eval_precision": 0.9524249422632795,
"eval_recall": 0.9577333952624245,
"eval_runtime": 36.2289,
"eval_samples_per_second": 5.493,
"eval_steps_per_second": 1.38,
"step": 1815
},
{
"epoch": 20.305131761442443,
"grad_norm": 0.5774451494216919,
"learning_rate": 3.903333333333334e-06,
"loss": 0.0115,
"step": 1830
},
{
"epoch": 20.305131761442443,
"eval_accuracy": 0.9911317795922782,
"eval_f1": 0.9558993304086816,
"eval_loss": 0.048068635165691376,
"eval_precision": 0.9504132231404959,
"eval_recall": 0.9614491407338597,
"eval_runtime": 36.6091,
"eval_samples_per_second": 5.436,
"eval_steps_per_second": 1.366,
"step": 1830
},
{
"epoch": 20.471567267683774,
"grad_norm": 0.8061049580574036,
"learning_rate": 3.853333333333334e-06,
"loss": 0.0128,
"step": 1845
},
{
"epoch": 20.471567267683774,
"eval_accuracy": 0.9906991834748283,
"eval_f1": 0.951918631530282,
"eval_loss": 0.04859815165400505,
"eval_precision": 0.9475379659456972,
"eval_recall": 0.9563399907106364,
"eval_runtime": 36.2061,
"eval_samples_per_second": 5.496,
"eval_steps_per_second": 1.381,
"step": 1845
},
{
"epoch": 20.638002773925106,
"grad_norm": 0.5735962986946106,
"learning_rate": 3.803333333333334e-06,
"loss": 0.0113,
"step": 1860
},
{
"epoch": 20.638002773925106,
"eval_accuracy": 0.9910236305629156,
"eval_f1": 0.9533702677746998,
"eval_loss": 0.04910165071487427,
"eval_precision": 0.947682423129876,
"eval_recall": 0.9591267998142127,
"eval_runtime": 36.2871,
"eval_samples_per_second": 5.484,
"eval_steps_per_second": 1.378,
"step": 1860
},
{
"epoch": 20.804438280166437,
"grad_norm": 0.5703373551368713,
"learning_rate": 3.753333333333334e-06,
"loss": 0.0119,
"step": 1875
},
{
"epoch": 20.804438280166437,
"eval_accuracy": 0.9901043638133348,
"eval_f1": 0.9498607242339832,
"eval_loss": 0.05141424015164375,
"eval_precision": 0.9494199535962877,
"eval_recall": 0.9503019043195541,
"eval_runtime": 36.2237,
"eval_samples_per_second": 5.494,
"eval_steps_per_second": 1.38,
"step": 1875
},
{
"epoch": 20.97087378640777,
"grad_norm": 0.8812251091003418,
"learning_rate": 3.7033333333333336e-06,
"loss": 0.0122,
"step": 1890
},
{
"epoch": 20.97087378640777,
"eval_accuracy": 0.9911317795922782,
"eval_f1": 0.9535903948279844,
"eval_loss": 0.04799521341919899,
"eval_precision": 0.94811753902663,
"eval_recall": 0.9591267998142127,
"eval_runtime": 36.3818,
"eval_samples_per_second": 5.47,
"eval_steps_per_second": 1.374,
"step": 1890
},
{
"epoch": 21.1373092926491,
"grad_norm": 0.729183554649353,
"learning_rate": 3.6533333333333336e-06,
"loss": 0.0123,
"step": 1905
},
{
"epoch": 21.1373092926491,
"eval_accuracy": 0.9909154815335532,
"eval_f1": 0.9522050334795659,
"eval_loss": 0.04769909009337425,
"eval_precision": 0.9467401285583104,
"eval_recall": 0.9577333952624245,
"eval_runtime": 36.5345,
"eval_samples_per_second": 5.447,
"eval_steps_per_second": 1.369,
"step": 1905
},
{
"epoch": 21.30374479889043,
"grad_norm": 0.3428969085216522,
"learning_rate": 3.6033333333333337e-06,
"loss": 0.0116,
"step": 1920
},
{
"epoch": 21.30374479889043,
"eval_accuracy": 0.9910236305629156,
"eval_f1": 0.9533271719038817,
"eval_loss": 0.04861655458807945,
"eval_precision": 0.9485057471264368,
"eval_recall": 0.9581978634463539,
"eval_runtime": 36.8199,
"eval_samples_per_second": 5.405,
"eval_steps_per_second": 1.358,
"step": 1920
},
{
"epoch": 21.470180305131763,
"grad_norm": 0.4823513925075531,
"learning_rate": 3.5533333333333338e-06,
"loss": 0.0108,
"step": 1935
},
{
"epoch": 21.470180305131763,
"eval_accuracy": 0.9904828854161034,
"eval_f1": 0.9511295527893039,
"eval_loss": 0.048778366297483444,
"eval_precision": 0.9441647597254005,
"eval_recall": 0.9581978634463539,
"eval_runtime": 36.5655,
"eval_samples_per_second": 5.442,
"eval_steps_per_second": 1.367,
"step": 1935
},
{
"epoch": 21.636615811373094,
"grad_norm": 0.3686061203479767,
"learning_rate": 3.5033333333333334e-06,
"loss": 0.0115,
"step": 1950
},
{
"epoch": 21.636615811373094,
"eval_accuracy": 0.9912940031363219,
"eval_f1": 0.9542302357836339,
"eval_loss": 0.04717012122273445,
"eval_precision": 0.9498389323515877,
"eval_recall": 0.9586623316302834,
"eval_runtime": 36.5437,
"eval_samples_per_second": 5.446,
"eval_steps_per_second": 1.368,
"step": 1950
},
{
"epoch": 21.803051317614425,
"grad_norm": 1.0370802879333496,
"learning_rate": 3.4533333333333334e-06,
"loss": 0.0083,
"step": 1965
},
{
"epoch": 21.803051317614425,
"eval_accuracy": 0.991077705077597,
"eval_f1": 0.954272517321016,
"eval_loss": 0.04759324714541435,
"eval_precision": 0.9490124023886082,
"eval_recall": 0.9595912679981421,
"eval_runtime": 36.2291,
"eval_samples_per_second": 5.493,
"eval_steps_per_second": 1.38,
"step": 1965
},
{
"epoch": 21.969486823855757,
"grad_norm": 1.2627676725387573,
"learning_rate": 3.4033333333333335e-06,
"loss": 0.0094,
"step": 1980
},
{
"epoch": 21.969486823855757,
"eval_accuracy": 0.990861407018872,
"eval_f1": 0.9543147208121827,
"eval_loss": 0.047525253146886826,
"eval_precision": 0.948188904172398,
"eval_recall": 0.9605202043660009,
"eval_runtime": 36.2268,
"eval_samples_per_second": 5.493,
"eval_steps_per_second": 1.38,
"step": 1980
},
{
"epoch": 22.135922330097088,
"grad_norm": 0.2426026463508606,
"learning_rate": 3.3533333333333336e-06,
"loss": 0.0118,
"step": 1995
},
{
"epoch": 22.135922330097088,
"eval_accuracy": 0.9904288109014222,
"eval_f1": 0.9501154734411085,
"eval_loss": 0.049215689301490784,
"eval_precision": 0.9448782728525493,
"eval_recall": 0.9554110543427775,
"eval_runtime": 36.1992,
"eval_samples_per_second": 5.497,
"eval_steps_per_second": 1.381,
"step": 1995
},
{
"epoch": 22.30235783633842,
"grad_norm": 0.6006263494491577,
"learning_rate": 3.303333333333333e-06,
"loss": 0.01,
"step": 2010
},
{
"epoch": 22.30235783633842,
"eval_accuracy": 0.990861407018872,
"eval_f1": 0.9523148148148148,
"eval_loss": 0.048562802374362946,
"eval_precision": 0.949238578680203,
"eval_recall": 0.9554110543427775,
"eval_runtime": 36.2887,
"eval_samples_per_second": 5.484,
"eval_steps_per_second": 1.378,
"step": 2010
},
{
"epoch": 22.46879334257975,
"grad_norm": 0.7383334040641785,
"learning_rate": 3.2533333333333332e-06,
"loss": 0.0114,
"step": 2025
},
{
"epoch": 22.46879334257975,
"eval_accuracy": 0.9910236305629156,
"eval_f1": 0.9539671524404348,
"eval_loss": 0.04967198148369789,
"eval_precision": 0.9502304147465438,
"eval_recall": 0.9577333952624245,
"eval_runtime": 36.3824,
"eval_samples_per_second": 5.47,
"eval_steps_per_second": 1.374,
"step": 2025
},
{
"epoch": 22.635228848821082,
"grad_norm": 0.5105836987495422,
"learning_rate": 3.2033333333333337e-06,
"loss": 0.0091,
"step": 2040
},
{
"epoch": 22.635228848821082,
"eval_accuracy": 0.9909695560482344,
"eval_f1": 0.954209065679926,
"eval_loss": 0.049895454198122025,
"eval_precision": 0.9502533394748963,
"eval_recall": 0.9581978634463539,
"eval_runtime": 36.2966,
"eval_samples_per_second": 5.483,
"eval_steps_per_second": 1.378,
"step": 2040
},
{
"epoch": 22.801664355062414,
"grad_norm": 0.8460143804550171,
"learning_rate": 3.1533333333333338e-06,
"loss": 0.0077,
"step": 2055
},
{
"epoch": 22.801664355062414,
"eval_accuracy": 0.991077705077597,
"eval_f1": 0.9563409563409564,
"eval_loss": 0.05023453012108803,
"eval_precision": 0.9512867647058824,
"eval_recall": 0.9614491407338597,
"eval_runtime": 36.5792,
"eval_samples_per_second": 5.44,
"eval_steps_per_second": 1.367,
"step": 2055
},
{
"epoch": 22.968099861303745,
"grad_norm": 0.46876421570777893,
"learning_rate": 3.103333333333334e-06,
"loss": 0.01,
"step": 2070
},
{
"epoch": 22.968099861303745,
"eval_accuracy": 0.9912940031363219,
"eval_f1": 0.9586127167630057,
"eval_loss": 0.05132585018873215,
"eval_precision": 0.9544198895027625,
"eval_recall": 0.9628425452856479,
"eval_runtime": 36.5943,
"eval_samples_per_second": 5.438,
"eval_steps_per_second": 1.366,
"step": 2070
},
{
"epoch": 23.134535367545077,
"grad_norm": 0.26761332154273987,
"learning_rate": 3.053333333333334e-06,
"loss": 0.0087,
"step": 2085
},
{
"epoch": 23.134535367545077,
"eval_accuracy": 0.9911858541069594,
"eval_f1": 0.9554375432925422,
"eval_loss": 0.04853161796927452,
"eval_precision": 0.9499540863177227,
"eval_recall": 0.9609846725499304,
"eval_runtime": 36.2471,
"eval_samples_per_second": 5.49,
"eval_steps_per_second": 1.379,
"step": 2085
},
{
"epoch": 23.300970873786408,
"grad_norm": 0.32841914892196655,
"learning_rate": 3.0033333333333335e-06,
"loss": 0.0073,
"step": 2100
},
{
"epoch": 23.300970873786408,
"eval_accuracy": 0.9916725247390905,
"eval_f1": 0.959278111985192,
"eval_loss": 0.04846283420920372,
"eval_precision": 0.9557399723374828,
"eval_recall": 0.9628425452856479,
"eval_runtime": 36.0113,
"eval_samples_per_second": 5.526,
"eval_steps_per_second": 1.388,
"step": 2100
},
{
"epoch": 23.46740638002774,
"grad_norm": 0.3114074766635895,
"learning_rate": 2.9533333333333336e-06,
"loss": 0.0083,
"step": 2115
},
{
"epoch": 23.46740638002774,
"eval_accuracy": 0.9913480776510031,
"eval_f1": 0.957205644228545,
"eval_loss": 0.04847896471619606,
"eval_precision": 0.9534562211981567,
"eval_recall": 0.9609846725499304,
"eval_runtime": 36.2766,
"eval_samples_per_second": 5.486,
"eval_steps_per_second": 1.378,
"step": 2115
},
{
"epoch": 23.63384188626907,
"grad_norm": 0.815006673336029,
"learning_rate": 2.9033333333333336e-06,
"loss": 0.0117,
"step": 2130
},
{
"epoch": 23.63384188626907,
"eval_accuracy": 0.991564375709728,
"eval_f1": 0.9590372598935432,
"eval_loss": 0.04786692187190056,
"eval_precision": 0.955719557195572,
"eval_recall": 0.9623780771017185,
"eval_runtime": 36.5057,
"eval_samples_per_second": 5.451,
"eval_steps_per_second": 1.37,
"step": 2130
},
{
"epoch": 23.800277392510402,
"grad_norm": 0.34551236033439636,
"learning_rate": 2.8533333333333337e-06,
"loss": 0.0095,
"step": 2145
},
{
"epoch": 23.800277392510402,
"eval_accuracy": 0.991077705077597,
"eval_f1": 0.9542302357836339,
"eval_loss": 0.05084284767508507,
"eval_precision": 0.9498389323515877,
"eval_recall": 0.9586623316302834,
"eval_runtime": 36.5022,
"eval_samples_per_second": 5.452,
"eval_steps_per_second": 1.37,
"step": 2145
},
{
"epoch": 23.966712898751734,
"grad_norm": 0.988761305809021,
"learning_rate": 2.8033333333333333e-06,
"loss": 0.009,
"step": 2160
},
{
"epoch": 23.966712898751734,
"eval_accuracy": 0.9909695560482344,
"eval_f1": 0.9559603412497119,
"eval_loss": 0.051338665187358856,
"eval_precision": 0.9491758241758241,
"eval_recall": 0.9628425452856479,
"eval_runtime": 36.4961,
"eval_samples_per_second": 5.453,
"eval_steps_per_second": 1.37,
"step": 2160
},
{
"epoch": 24.133148404993065,
"grad_norm": 0.20439928770065308,
"learning_rate": 2.7533333333333334e-06,
"loss": 0.0077,
"step": 2175
},
{
"epoch": 24.133148404993065,
"eval_accuracy": 0.9915103011950468,
"eval_f1": 0.9590562109646079,
"eval_loss": 0.050405893474817276,
"eval_precision": 0.9552995391705069,
"eval_recall": 0.9628425452856479,
"eval_runtime": 36.5792,
"eval_samples_per_second": 5.44,
"eval_steps_per_second": 1.367,
"step": 2175
},
{
"epoch": 24.299583911234397,
"grad_norm": 0.6065575480461121,
"learning_rate": 2.7033333333333334e-06,
"loss": 0.0087,
"step": 2190
},
{
"epoch": 24.299583911234397,
"eval_accuracy": 0.9912940031363219,
"eval_f1": 0.9565418400369857,
"eval_loss": 0.04999900609254837,
"eval_precision": 0.9521398987574782,
"eval_recall": 0.9609846725499304,
"eval_runtime": 36.2889,
"eval_samples_per_second": 5.484,
"eval_steps_per_second": 1.378,
"step": 2190
},
{
"epoch": 24.466019417475728,
"grad_norm": 0.4505390226840973,
"learning_rate": 2.6533333333333335e-06,
"loss": 0.0068,
"step": 2205
},
{
"epoch": 24.466019417475728,
"eval_accuracy": 0.9912940031363219,
"eval_f1": 0.9574271170754282,
"eval_loss": 0.05055619403719902,
"eval_precision": 0.9538958045182112,
"eval_recall": 0.9609846725499304,
"eval_runtime": 35.8763,
"eval_samples_per_second": 5.547,
"eval_steps_per_second": 1.394,
"step": 2205
},
{
"epoch": 24.63245492371706,
"grad_norm": 0.2784092128276825,
"learning_rate": 2.603333333333334e-06,
"loss": 0.0094,
"step": 2220
},
{
"epoch": 24.63245492371706,
"eval_accuracy": 0.9913480776510031,
"eval_f1": 0.9549132947976879,
"eval_loss": 0.050024211406707764,
"eval_precision": 0.9507366482504604,
"eval_recall": 0.9591267998142127,
"eval_runtime": 36.3659,
"eval_samples_per_second": 5.472,
"eval_steps_per_second": 1.375,
"step": 2220
},
{
"epoch": 24.79889042995839,
"grad_norm": 0.24667127430438995,
"learning_rate": 2.5533333333333336e-06,
"loss": 0.0088,
"step": 2235
},
{
"epoch": 24.79889042995839,
"eval_accuracy": 0.9914021521656843,
"eval_f1": 0.9551548774849746,
"eval_loss": 0.048643559217453,
"eval_precision": 0.9507593189139438,
"eval_recall": 0.9595912679981421,
"eval_runtime": 36.4912,
"eval_samples_per_second": 5.453,
"eval_steps_per_second": 1.37,
"step": 2235
},
{
"epoch": 24.965325936199722,
"grad_norm": 0.10884588211774826,
"learning_rate": 2.5033333333333336e-06,
"loss": 0.0089,
"step": 2250
},
{
"epoch": 24.965325936199722,
"eval_accuracy": 0.991077705077597,
"eval_f1": 0.9558789558789559,
"eval_loss": 0.05070747807621956,
"eval_precision": 0.9508272058823529,
"eval_recall": 0.9609846725499304,
"eval_runtime": 36.2816,
"eval_samples_per_second": 5.485,
"eval_steps_per_second": 1.378,
"step": 2250
},
{
"epoch": 25.131761442441054,
"grad_norm": 0.6150490641593933,
"learning_rate": 2.4533333333333333e-06,
"loss": 0.0063,
"step": 2265
},
{
"epoch": 25.131761442441054,
"eval_accuracy": 0.9916725247390905,
"eval_f1": 0.9585360203845263,
"eval_loss": 0.04789712280035019,
"eval_precision": 0.9560998151571165,
"eval_recall": 0.9609846725499304,
"eval_runtime": 36.2469,
"eval_samples_per_second": 5.49,
"eval_steps_per_second": 1.379,
"step": 2265
},
{
"epoch": 25.298196948682385,
"grad_norm": 0.7432591319084167,
"learning_rate": 2.4033333333333338e-06,
"loss": 0.0058,
"step": 2280
},
{
"epoch": 25.298196948682385,
"eval_accuracy": 0.991077705077597,
"eval_f1": 0.9572452045296973,
"eval_loss": 0.050580546259880066,
"eval_precision": 0.952621895124195,
"eval_recall": 0.9619136089177891,
"eval_runtime": 36.4588,
"eval_samples_per_second": 5.458,
"eval_steps_per_second": 1.371,
"step": 2280
},
{
"epoch": 25.464632454923716,
"grad_norm": 0.742586612701416,
"learning_rate": 2.3533333333333334e-06,
"loss": 0.0102,
"step": 2295
},
{
"epoch": 25.464632454923716,
"eval_accuracy": 0.9912399286216407,
"eval_f1": 0.9574861367837338,
"eval_loss": 0.04992222413420677,
"eval_precision": 0.9526436781609195,
"eval_recall": 0.9623780771017185,
"eval_runtime": 36.5208,
"eval_samples_per_second": 5.449,
"eval_steps_per_second": 1.369,
"step": 2295
},
{
"epoch": 25.631067961165048,
"grad_norm": 0.9237321019172668,
"learning_rate": 2.3033333333333334e-06,
"loss": 0.0079,
"step": 2310
},
{
"epoch": 25.631067961165048,
"eval_accuracy": 0.9905369599307846,
"eval_f1": 0.9541368979027426,
"eval_loss": 0.05427027493715286,
"eval_precision": 0.9469350411710887,
"eval_recall": 0.9614491407338597,
"eval_runtime": 36.0226,
"eval_samples_per_second": 5.524,
"eval_steps_per_second": 1.388,
"step": 2310
},
{
"epoch": 25.79750346740638,
"grad_norm": 0.2974264621734619,
"learning_rate": 2.2533333333333335e-06,
"loss": 0.009,
"step": 2325
},
{
"epoch": 25.79750346740638,
"eval_accuracy": 0.9914562266803656,
"eval_f1": 0.9572452045296973,
"eval_loss": 0.049834854900836945,
"eval_precision": 0.952621895124195,
"eval_recall": 0.9619136089177891,
"eval_runtime": 36.6625,
"eval_samples_per_second": 5.428,
"eval_steps_per_second": 1.364,
"step": 2325
},
{
"epoch": 25.96393897364771,
"grad_norm": 0.6791291236877441,
"learning_rate": 2.2033333333333336e-06,
"loss": 0.0068,
"step": 2340
},
{
"epoch": 25.96393897364771,
"eval_accuracy": 0.991077705077597,
"eval_f1": 0.9563611175248211,
"eval_loss": 0.05109778791666031,
"eval_precision": 0.950872359963269,
"eval_recall": 0.9619136089177891,
"eval_runtime": 35.8655,
"eval_samples_per_second": 5.549,
"eval_steps_per_second": 1.394,
"step": 2340
},
{
"epoch": 26.130374479889042,
"grad_norm": 0.5723872184753418,
"learning_rate": 2.153333333333333e-06,
"loss": 0.007,
"step": 2355
},
{
"epoch": 26.130374479889042,
"eval_accuracy": 0.9914021521656843,
"eval_f1": 0.9579676674364895,
"eval_loss": 0.049178168177604675,
"eval_precision": 0.9526871841984382,
"eval_recall": 0.9633070134695774,
"eval_runtime": 35.9503,
"eval_samples_per_second": 5.535,
"eval_steps_per_second": 1.391,
"step": 2355
},
{
"epoch": 26.296809986130373,
"grad_norm": 0.3830583393573761,
"learning_rate": 2.1033333333333337e-06,
"loss": 0.0086,
"step": 2370
},
{
"epoch": 26.296809986130373,
"eval_accuracy": 0.9912940031363219,
"eval_f1": 0.9554375432925422,
"eval_loss": 0.05156167596578598,
"eval_precision": 0.9499540863177227,
"eval_recall": 0.9609846725499304,
"eval_runtime": 35.8417,
"eval_samples_per_second": 5.552,
"eval_steps_per_second": 1.395,
"step": 2370
},
{
"epoch": 26.463245492371705,
"grad_norm": 0.14329634606838226,
"learning_rate": 2.0533333333333337e-06,
"loss": 0.0078,
"step": 2385
},
{
"epoch": 26.463245492371705,
"eval_accuracy": 0.9914021521656843,
"eval_f1": 0.9556581986143187,
"eval_loss": 0.05027909576892853,
"eval_precision": 0.9503904455672945,
"eval_recall": 0.9609846725499304,
"eval_runtime": 35.8472,
"eval_samples_per_second": 5.551,
"eval_steps_per_second": 1.395,
"step": 2385
},
{
"epoch": 26.629680998613036,
"grad_norm": 0.17582757771015167,
"learning_rate": 2.0033333333333334e-06,
"loss": 0.0067,
"step": 2400
},
{
"epoch": 26.629680998613036,
"eval_accuracy": 0.9915103011950468,
"eval_f1": 0.9577269577269578,
"eval_loss": 0.05140436813235283,
"eval_precision": 0.9526654411764706,
"eval_recall": 0.9628425452856479,
"eval_runtime": 35.8691,
"eval_samples_per_second": 5.548,
"eval_steps_per_second": 1.394,
"step": 2400
},
{
"epoch": 26.796116504854368,
"grad_norm": 0.6374102830886841,
"learning_rate": 1.9533333333333334e-06,
"loss": 0.0059,
"step": 2415
},
{
"epoch": 26.796116504854368,
"eval_accuracy": 0.9918888227978154,
"eval_f1": 0.9588344125809436,
"eval_loss": 0.05035752058029175,
"eval_precision": 0.9548595117457392,
"eval_recall": 0.9628425452856479,
"eval_runtime": 35.7794,
"eval_samples_per_second": 5.562,
"eval_steps_per_second": 1.397,
"step": 2415
},
{
"epoch": 26.9625520110957,
"grad_norm": 0.5752395987510681,
"learning_rate": 1.9033333333333335e-06,
"loss": 0.0089,
"step": 2430
},
{
"epoch": 26.9625520110957,
"eval_accuracy": 0.9916184502244092,
"eval_f1": 0.9560795191863154,
"eval_loss": 0.051971472799777985,
"eval_precision": 0.9516797054763001,
"eval_recall": 0.9605202043660009,
"eval_runtime": 36.1279,
"eval_samples_per_second": 5.508,
"eval_steps_per_second": 1.384,
"step": 2430
},
{
"epoch": 27.12898751733703,
"grad_norm": 0.40148672461509705,
"learning_rate": 1.8533333333333333e-06,
"loss": 0.0059,
"step": 2445
},
{
"epoch": 27.12898751733703,
"eval_accuracy": 0.9916725247390905,
"eval_f1": 0.9572649572649573,
"eval_loss": 0.05115849897265434,
"eval_precision": 0.9522058823529411,
"eval_recall": 0.9623780771017185,
"eval_runtime": 36.2324,
"eval_samples_per_second": 5.492,
"eval_steps_per_second": 1.38,
"step": 2445
},
{
"epoch": 27.295423023578362,
"grad_norm": 0.19672174751758575,
"learning_rate": 1.8033333333333336e-06,
"loss": 0.0073,
"step": 2460
},
{
"epoch": 27.295423023578362,
"eval_accuracy": 0.9916184502244092,
"eval_f1": 0.9569842738205366,
"eval_loss": 0.05259764939546585,
"eval_precision": 0.9530170428374021,
"eval_recall": 0.9609846725499304,
"eval_runtime": 36.3768,
"eval_samples_per_second": 5.471,
"eval_steps_per_second": 1.375,
"step": 2460
},
{
"epoch": 27.461858529819693,
"grad_norm": 1.178671956062317,
"learning_rate": 1.7533333333333336e-06,
"loss": 0.0065,
"step": 2475
},
{
"epoch": 27.461858529819693,
"eval_accuracy": 0.991564375709728,
"eval_f1": 0.9577269577269578,
"eval_loss": 0.052951879799366,
"eval_precision": 0.9526654411764706,
"eval_recall": 0.9628425452856479,
"eval_runtime": 36.7573,
"eval_samples_per_second": 5.414,
"eval_steps_per_second": 1.36,
"step": 2475
},
{
"epoch": 27.628294036061025,
"grad_norm": 0.8156425356864929,
"learning_rate": 1.7033333333333335e-06,
"loss": 0.0064,
"step": 2490
},
{
"epoch": 27.628294036061025,
"eval_accuracy": 0.9916725247390905,
"eval_f1": 0.957205644228545,
"eval_loss": 0.05146779865026474,
"eval_precision": 0.9534562211981567,
"eval_recall": 0.9609846725499304,
"eval_runtime": 35.9308,
"eval_samples_per_second": 5.538,
"eval_steps_per_second": 1.392,
"step": 2490
},
{
"epoch": 27.794729542302356,
"grad_norm": 0.4098323881626129,
"learning_rate": 1.6533333333333335e-06,
"loss": 0.0072,
"step": 2505
},
{
"epoch": 27.794729542302356,
"eval_accuracy": 0.9906991834748283,
"eval_f1": 0.9545559400230681,
"eval_loss": 0.054223690181970596,
"eval_precision": 0.9482126489459212,
"eval_recall": 0.9609846725499304,
"eval_runtime": 35.9196,
"eval_samples_per_second": 5.54,
"eval_steps_per_second": 1.392,
"step": 2505
},
{
"epoch": 27.96116504854369,
"grad_norm": 0.5159748792648315,
"learning_rate": 1.6033333333333334e-06,
"loss": 0.0066,
"step": 2520
},
{
"epoch": 27.96116504854369,
"eval_accuracy": 0.990861407018872,
"eval_f1": 0.9549965381952458,
"eval_loss": 0.05374361574649811,
"eval_precision": 0.9490825688073394,
"eval_recall": 0.9609846725499304,
"eval_runtime": 35.7031,
"eval_samples_per_second": 5.574,
"eval_steps_per_second": 1.4,
"step": 2520
},
{
"epoch": 28.127600554785023,
"grad_norm": 0.499012291431427,
"learning_rate": 1.5533333333333334e-06,
"loss": 0.006,
"step": 2535
},
{
"epoch": 28.127600554785023,
"eval_accuracy": 0.9915103011950468,
"eval_f1": 0.9579482439926063,
"eval_loss": 0.05182594433426857,
"eval_precision": 0.953103448275862,
"eval_recall": 0.9628425452856479,
"eval_runtime": 35.8174,
"eval_samples_per_second": 5.556,
"eval_steps_per_second": 1.396,
"step": 2535
},
{
"epoch": 28.294036061026354,
"grad_norm": 0.5842483639717102,
"learning_rate": 1.5033333333333337e-06,
"loss": 0.0074,
"step": 2550
},
{
"epoch": 28.294036061026354,
"eval_accuracy": 0.9914021521656843,
"eval_f1": 0.9565418400369857,
"eval_loss": 0.05230095610022545,
"eval_precision": 0.9521398987574782,
"eval_recall": 0.9609846725499304,
"eval_runtime": 35.928,
"eval_samples_per_second": 5.539,
"eval_steps_per_second": 1.392,
"step": 2550
},
{
"epoch": 28.460471567267685,
"grad_norm": 0.4897175431251526,
"learning_rate": 1.4533333333333335e-06,
"loss": 0.0068,
"step": 2565
},
{
"epoch": 28.460471567267685,
"eval_accuracy": 0.9912940031363219,
"eval_f1": 0.955458112162474,
"eval_loss": 0.05341142788529396,
"eval_precision": 0.9495412844036697,
"eval_recall": 0.9614491407338597,
"eval_runtime": 36.0278,
"eval_samples_per_second": 5.524,
"eval_steps_per_second": 1.388,
"step": 2565
},
{
"epoch": 28.626907073509017,
"grad_norm": 0.4191240668296814,
"learning_rate": 1.4033333333333336e-06,
"loss": 0.0055,
"step": 2580
},
{
"epoch": 28.626907073509017,
"eval_accuracy": 0.9916725247390905,
"eval_f1": 0.9583526145303101,
"eval_loss": 0.05210199952125549,
"eval_precision": 0.954817888427847,
"eval_recall": 0.9619136089177891,
"eval_runtime": 36.2636,
"eval_samples_per_second": 5.488,
"eval_steps_per_second": 1.379,
"step": 2580
},
{
"epoch": 28.793342579750348,
"grad_norm": 0.6655350923538208,
"learning_rate": 1.3533333333333334e-06,
"loss": 0.0056,
"step": 2595
},
{
"epoch": 28.793342579750348,
"eval_accuracy": 0.9912940031363219,
"eval_f1": 0.9567829905246129,
"eval_loss": 0.05259960889816284,
"eval_precision": 0.952161913523459,
"eval_recall": 0.9614491407338597,
"eval_runtime": 36.0456,
"eval_samples_per_second": 5.521,
"eval_steps_per_second": 1.387,
"step": 2595
},
{
"epoch": 28.95977808599168,
"grad_norm": 0.9510291814804077,
"learning_rate": 1.3033333333333335e-06,
"loss": 0.0066,
"step": 2610
},
{
"epoch": 28.95977808599168,
"eval_accuracy": 0.9913480776510031,
"eval_f1": 0.9570240295748613,
"eval_loss": 0.05272991955280304,
"eval_precision": 0.952183908045977,
"eval_recall": 0.9619136089177891,
"eval_runtime": 36.3753,
"eval_samples_per_second": 5.471,
"eval_steps_per_second": 1.375,
"step": 2610
},
{
"epoch": 29.12621359223301,
"grad_norm": 0.33463072776794434,
"learning_rate": 1.2533333333333333e-06,
"loss": 0.0053,
"step": 2625
},
{
"epoch": 29.12621359223301,
"eval_accuracy": 0.9912940031363219,
"eval_f1": 0.9579482439926063,
"eval_loss": 0.0533275306224823,
"eval_precision": 0.953103448275862,
"eval_recall": 0.9628425452856479,
"eval_runtime": 35.8945,
"eval_samples_per_second": 5.544,
"eval_steps_per_second": 1.393,
"step": 2625
},
{
"epoch": 29.292649098474342,
"grad_norm": 0.2936910092830658,
"learning_rate": 1.2033333333333334e-06,
"loss": 0.0063,
"step": 2640
},
{
"epoch": 29.292649098474342,
"eval_accuracy": 0.9912940031363219,
"eval_f1": 0.9569842738205366,
"eval_loss": 0.05200694501399994,
"eval_precision": 0.9530170428374021,
"eval_recall": 0.9609846725499304,
"eval_runtime": 35.7745,
"eval_samples_per_second": 5.563,
"eval_steps_per_second": 1.398,
"step": 2640
},
{
"epoch": 29.459084604715674,
"grad_norm": 0.45608168840408325,
"learning_rate": 1.1533333333333334e-06,
"loss": 0.0059,
"step": 2655
},
{
"epoch": 29.459084604715674,
"eval_accuracy": 0.9910236305629156,
"eval_f1": 0.9554169554169554,
"eval_loss": 0.0532723143696785,
"eval_precision": 0.9503676470588235,
"eval_recall": 0.9605202043660009,
"eval_runtime": 35.9196,
"eval_samples_per_second": 5.54,
"eval_steps_per_second": 1.392,
"step": 2655
},
{
"epoch": 29.625520110957005,
"grad_norm": 0.46974512934684753,
"learning_rate": 1.1033333333333335e-06,
"loss": 0.0059,
"step": 2670
},
{
"epoch": 29.625520110957005,
"eval_accuracy": 0.9911858541069594,
"eval_f1": 0.9572452045296973,
"eval_loss": 0.05324824899435043,
"eval_precision": 0.952621895124195,
"eval_recall": 0.9619136089177891,
"eval_runtime": 36.0296,
"eval_samples_per_second": 5.523,
"eval_steps_per_second": 1.388,
"step": 2670
},
{
"epoch": 29.791955617198337,
"grad_norm": 0.6280196309089661,
"learning_rate": 1.0533333333333333e-06,
"loss": 0.0062,
"step": 2685
},
{
"epoch": 29.791955617198337,
"eval_accuracy": 0.9916725247390905,
"eval_f1": 0.9579288025889968,
"eval_loss": 0.05163406580686569,
"eval_precision": 0.9535204786010124,
"eval_recall": 0.9623780771017185,
"eval_runtime": 35.8797,
"eval_samples_per_second": 5.546,
"eval_steps_per_second": 1.394,
"step": 2685
},
{
"epoch": 29.958391123439668,
"grad_norm": 0.3609830439090729,
"learning_rate": 1.0033333333333334e-06,
"loss": 0.0064,
"step": 2700
},
{
"epoch": 29.958391123439668,
"eval_accuracy": 0.9914562266803656,
"eval_f1": 0.9572649572649573,
"eval_loss": 0.05152719095349312,
"eval_precision": 0.9522058823529411,
"eval_recall": 0.9623780771017185,
"eval_runtime": 36.0059,
"eval_samples_per_second": 5.527,
"eval_steps_per_second": 1.389,
"step": 2700
},
{
"epoch": 30.124826629681,
"grad_norm": 0.37590721249580383,
"learning_rate": 9.533333333333335e-07,
"loss": 0.0055,
"step": 2715
},
{
"epoch": 30.124826629681,
"eval_accuracy": 0.9917265992537717,
"eval_f1": 0.9590751445086704,
"eval_loss": 0.05128318816423416,
"eval_precision": 0.9548802946593001,
"eval_recall": 0.9633070134695774,
"eval_runtime": 36.0097,
"eval_samples_per_second": 5.526,
"eval_steps_per_second": 1.389,
"step": 2715
},
{
"epoch": 30.29126213592233,
"grad_norm": 0.4574069678783417,
"learning_rate": 9.033333333333334e-07,
"loss": 0.0064,
"step": 2730
},
{
"epoch": 30.29126213592233,
"eval_accuracy": 0.991564375709728,
"eval_f1": 0.9583911234396673,
"eval_loss": 0.052385713905096054,
"eval_precision": 0.9539806718821905,
"eval_recall": 0.9628425452856479,
"eval_runtime": 35.8265,
"eval_samples_per_second": 5.555,
"eval_steps_per_second": 1.396,
"step": 2730
},
{
"epoch": 30.457697642163662,
"grad_norm": 1.509279489517212,
"learning_rate": 8.533333333333334e-07,
"loss": 0.0055,
"step": 2745
},
{
"epoch": 30.457697642163662,
"eval_accuracy": 0.9915103011950468,
"eval_f1": 0.9581889581889582,
"eval_loss": 0.05304015427827835,
"eval_precision": 0.953125,
"eval_recall": 0.9633070134695774,
"eval_runtime": 35.8068,
"eval_samples_per_second": 5.558,
"eval_steps_per_second": 1.396,
"step": 2745
},
{
"epoch": 30.624133148404994,
"grad_norm": 0.08701591938734055,
"learning_rate": 8.033333333333335e-07,
"loss": 0.0065,
"step": 2760
},
{
"epoch": 30.624133148404994,
"eval_accuracy": 0.9916725247390905,
"eval_f1": 0.9588914549653579,
"eval_loss": 0.05279012396931648,
"eval_precision": 0.9536058796508957,
"eval_recall": 0.9642359498374361,
"eval_runtime": 36.0763,
"eval_samples_per_second": 5.516,
"eval_steps_per_second": 1.386,
"step": 2760
},
{
"epoch": 30.790568654646325,
"grad_norm": 0.39128488302230835,
"learning_rate": 7.533333333333335e-07,
"loss": 0.0068,
"step": 2775
},
{
"epoch": 30.790568654646325,
"eval_accuracy": 0.991564375709728,
"eval_f1": 0.9575253924284395,
"eval_loss": 0.05296061187982559,
"eval_precision": 0.9518127581459385,
"eval_recall": 0.9633070134695774,
"eval_runtime": 35.9916,
"eval_samples_per_second": 5.529,
"eval_steps_per_second": 1.389,
"step": 2775
},
{
"epoch": 30.957004160887656,
"grad_norm": 0.20628976821899414,
"learning_rate": 7.033333333333334e-07,
"loss": 0.0047,
"step": 2790
},
{
"epoch": 30.957004160887656,
"eval_accuracy": 0.991564375709728,
"eval_f1": 0.958910433979686,
"eval_loss": 0.05448687821626663,
"eval_precision": 0.953189536484626,
"eval_recall": 0.9647004180213655,
"eval_runtime": 35.9295,
"eval_samples_per_second": 5.539,
"eval_steps_per_second": 1.392,
"step": 2790
},
{
"epoch": 31.123439667128988,
"grad_norm": 0.3910321295261383,
"learning_rate": 6.533333333333334e-07,
"loss": 0.0051,
"step": 2805
},
{
"epoch": 31.123439667128988,
"eval_accuracy": 0.9916725247390905,
"eval_f1": 0.9595749595749595,
"eval_loss": 0.05336242541670799,
"eval_precision": 0.9545036764705882,
"eval_recall": 0.9647004180213655,
"eval_runtime": 36.0288,
"eval_samples_per_second": 5.523,
"eval_steps_per_second": 1.388,
"step": 2805
},
{
"epoch": 31.28987517337032,
"grad_norm": 0.2049601525068283,
"learning_rate": 6.033333333333334e-07,
"loss": 0.0044,
"step": 2820
},
{
"epoch": 31.28987517337032,
"eval_accuracy": 0.9914021521656843,
"eval_f1": 0.9581889581889582,
"eval_loss": 0.053161416202783585,
"eval_precision": 0.953125,
"eval_recall": 0.9633070134695774,
"eval_runtime": 35.9772,
"eval_samples_per_second": 5.531,
"eval_steps_per_second": 1.39,
"step": 2820
},
{
"epoch": 31.45631067961165,
"grad_norm": 0.4429149329662323,
"learning_rate": 5.533333333333334e-07,
"loss": 0.0068,
"step": 2835
},
{
"epoch": 31.45631067961165,
"eval_accuracy": 0.9913480776510031,
"eval_f1": 0.9579676674364895,
"eval_loss": 0.05317556858062744,
"eval_precision": 0.9526871841984382,
"eval_recall": 0.9633070134695774,
"eval_runtime": 35.6808,
"eval_samples_per_second": 5.577,
"eval_steps_per_second": 1.401,
"step": 2835
},
{
"epoch": 31.622746185852982,
"grad_norm": 0.4102032482624054,
"learning_rate": 5.033333333333334e-07,
"loss": 0.0045,
"step": 2850
},
{
"epoch": 31.622746185852982,
"eval_accuracy": 0.9915103011950468,
"eval_f1": 0.9590940605500345,
"eval_loss": 0.053103264421224594,
"eval_precision": 0.9544618215271389,
"eval_recall": 0.9637714816535068,
"eval_runtime": 35.706,
"eval_samples_per_second": 5.573,
"eval_steps_per_second": 1.4,
"step": 2850
},
{
"epoch": 31.789181692094314,
"grad_norm": 0.8468719720840454,
"learning_rate": 4.533333333333334e-07,
"loss": 0.0047,
"step": 2865
},
{
"epoch": 31.789181692094314,
"eval_accuracy": 0.991564375709728,
"eval_f1": 0.9586318465449504,
"eval_loss": 0.05298003926873207,
"eval_precision": 0.954001839926403,
"eval_recall": 0.9633070134695774,
"eval_runtime": 35.8749,
"eval_samples_per_second": 5.547,
"eval_steps_per_second": 1.394,
"step": 2865
},
{
"epoch": 31.955617198335645,
"grad_norm": 0.2063705176115036,
"learning_rate": 4.0333333333333337e-07,
"loss": 0.0075,
"step": 2880
},
{
"epoch": 31.955617198335645,
"eval_accuracy": 0.9916184502244092,
"eval_f1": 0.9593157651410079,
"eval_loss": 0.05329431965947151,
"eval_precision": 0.9549010584445468,
"eval_recall": 0.9637714816535068,
"eval_runtime": 36.0809,
"eval_samples_per_second": 5.515,
"eval_steps_per_second": 1.386,
"step": 2880
},
{
"epoch": 32.12205270457698,
"grad_norm": 0.3478763997554779,
"learning_rate": 3.533333333333334e-07,
"loss": 0.0055,
"step": 2895
},
{
"epoch": 32.12205270457698,
"eval_accuracy": 0.9917265992537717,
"eval_f1": 0.9595375722543353,
"eval_loss": 0.05245138704776764,
"eval_precision": 0.9553406998158379,
"eval_recall": 0.9637714816535068,
"eval_runtime": 35.9673,
"eval_samples_per_second": 5.533,
"eval_steps_per_second": 1.39,
"step": 2895
},
{
"epoch": 32.28848821081831,
"grad_norm": 0.721191942691803,
"learning_rate": 3.033333333333334e-07,
"loss": 0.006,
"step": 2910
},
{
"epoch": 32.28848821081831,
"eval_accuracy": 0.9917265992537717,
"eval_f1": 0.9595375722543353,
"eval_loss": 0.05226488783955574,
"eval_precision": 0.9553406998158379,
"eval_recall": 0.9637714816535068,
"eval_runtime": 35.9385,
"eval_samples_per_second": 5.537,
"eval_steps_per_second": 1.391,
"step": 2910
},
{
"epoch": 32.45492371705964,
"grad_norm": 0.3022706210613251,
"learning_rate": 2.533333333333333e-07,
"loss": 0.0062,
"step": 2925
},
{
"epoch": 32.45492371705964,
"eval_accuracy": 0.9916725247390905,
"eval_f1": 0.9588534442903375,
"eval_loss": 0.05245348811149597,
"eval_precision": 0.9544408651633686,
"eval_recall": 0.9633070134695774,
"eval_runtime": 35.8766,
"eval_samples_per_second": 5.547,
"eval_steps_per_second": 1.394,
"step": 2925
},
{
"epoch": 32.62135922330097,
"grad_norm": 0.4700392186641693,
"learning_rate": 2.0333333333333333e-07,
"loss": 0.0059,
"step": 2940
},
{
"epoch": 32.62135922330097,
"eval_accuracy": 0.9917265992537717,
"eval_f1": 0.9593157651410079,
"eval_loss": 0.05246575176715851,
"eval_precision": 0.9549010584445468,
"eval_recall": 0.9637714816535068,
"eval_runtime": 35.8779,
"eval_samples_per_second": 5.547,
"eval_steps_per_second": 1.394,
"step": 2940
},
{
"epoch": 32.787794729542306,
"grad_norm": 0.7413909435272217,
"learning_rate": 1.5333333333333333e-07,
"loss": 0.0058,
"step": 2955
},
{
"epoch": 32.787794729542306,
"eval_accuracy": 0.9917265992537717,
"eval_f1": 0.959556274555119,
"eval_loss": 0.053051915019750595,
"eval_precision": 0.9549218031278749,
"eval_recall": 0.9642359498374361,
"eval_runtime": 35.8838,
"eval_samples_per_second": 5.546,
"eval_steps_per_second": 1.393,
"step": 2955
},
{
"epoch": 32.95423023578363,
"grad_norm": 0.7399964332580566,
"learning_rate": 1.0333333333333335e-07,
"loss": 0.005,
"step": 2970
},
{
"epoch": 32.95423023578363,
"eval_accuracy": 0.991564375709728,
"eval_f1": 0.9584103512014789,
"eval_loss": 0.05329006537795067,
"eval_precision": 0.9535632183908046,
"eval_recall": 0.9633070134695774,
"eval_runtime": 35.9193,
"eval_samples_per_second": 5.54,
"eval_steps_per_second": 1.392,
"step": 2970
},
{
"epoch": 33.12066574202497,
"grad_norm": 0.3113914728164673,
"learning_rate": 5.3333333333333334e-08,
"loss": 0.007,
"step": 2985
},
{
"epoch": 33.12066574202497,
"eval_accuracy": 0.991564375709728,
"eval_f1": 0.9584103512014789,
"eval_loss": 0.05327802523970604,
"eval_precision": 0.9535632183908046,
"eval_recall": 0.9633070134695774,
"eval_runtime": 35.914,
"eval_samples_per_second": 5.541,
"eval_steps_per_second": 1.392,
"step": 2985
},
{
"epoch": 33.287101248266296,
"grad_norm": 0.33092889189720154,
"learning_rate": 3.3333333333333334e-09,
"loss": 0.0047,
"step": 3000
},
{
"epoch": 33.287101248266296,
"eval_accuracy": 0.991564375709728,
"eval_f1": 0.9584103512014789,
"eval_loss": 0.05324762314558029,
"eval_precision": 0.9535632183908046,
"eval_recall": 0.9633070134695774,
"eval_runtime": 35.8608,
"eval_samples_per_second": 5.549,
"eval_steps_per_second": 1.394,
"step": 3000
},
{
"epoch": 33.287101248266296,
"step": 3000,
"total_flos": 8.9780255686656e+16,
"train_loss": 0.0764370101193587,
"train_runtime": 55223.9534,
"train_samples_per_second": 1.738,
"train_steps_per_second": 0.054
}
],
"logging_steps": 15,
"max_steps": 3000,
"num_input_tokens_seen": 0,
"num_train_epochs": 34,
"save_steps": 15,
"total_flos": 8.9780255686656e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}