{ "best_metric": 0.9595749595749595, "best_model_checkpoint": "ds-v6-large/checkpoint-2805", "epoch": 33.287101248266296, "eval_steps": 15, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1664355062413315, "grad_norm": 2.6665048599243164, "learning_rate": 9.950000000000001e-06, "loss": 1.9852, "step": 15 }, { "epoch": 0.1664355062413315, "eval_accuracy": 0.8101443789541989, "eval_f1": 0.0, "eval_loss": 1.1499630212783813, "eval_precision": 0.0, "eval_recall": 0.0, "eval_runtime": 35.775, "eval_samples_per_second": 5.563, "eval_steps_per_second": 1.398, "step": 15 }, { "epoch": 0.332871012482663, "grad_norm": 2.08683443069458, "learning_rate": 9.9e-06, "loss": 1.0244, "step": 30 }, { "epoch": 0.332871012482663, "eval_accuracy": 0.8122532850267669, "eval_f1": 0.012939749292357462, "eval_loss": 0.834208607673645, "eval_precision": 0.05, "eval_recall": 0.0074314909428704135, "eval_runtime": 35.1999, "eval_samples_per_second": 5.653, "eval_steps_per_second": 1.42, "step": 30 }, { "epoch": 0.49930651872399445, "grad_norm": 2.1074297428131104, "learning_rate": 9.85e-06, "loss": 0.7826, "step": 45 }, { "epoch": 0.49930651872399445, "eval_accuracy": 0.8479424647163791, "eval_f1": 0.09320905459387482, "eval_loss": 0.6794766187667847, "eval_precision": 0.07893041237113402, "eval_recall": 0.1137947050627032, "eval_runtime": 35.557, "eval_samples_per_second": 5.597, "eval_steps_per_second": 1.406, "step": 45 }, { "epoch": 0.665742024965326, "grad_norm": 1.8543498516082764, "learning_rate": 9.800000000000001e-06, "loss": 0.6767, "step": 60 }, { "epoch": 0.665742024965326, "eval_accuracy": 0.8578381009030444, "eval_f1": 0.13828125, "eval_loss": 0.5963338613510132, "eval_precision": 0.11931243680485339, "eval_recall": 0.1644217371110079, "eval_runtime": 35.7897, "eval_samples_per_second": 5.56, "eval_steps_per_second": 1.397, "step": 60 }, { "epoch": 0.8321775312066574, "grad_norm": 1.9123793840408325, "learning_rate": 9.75e-06, "loss": 0.6031, "step": 75 }, { "epoch": 0.8321775312066574, "eval_accuracy": 0.8690855999567404, "eval_f1": 0.1916831683168317, "eval_loss": 0.5405648946762085, "eval_precision": 0.1670693821194339, "eval_recall": 0.22480260102183, "eval_runtime": 35.6034, "eval_samples_per_second": 5.589, "eval_steps_per_second": 1.404, "step": 75 }, { "epoch": 0.9986130374479889, "grad_norm": 1.9384328126907349, "learning_rate": 9.7e-06, "loss": 0.5756, "step": 90 }, { "epoch": 0.9986130374479889, "eval_accuracy": 0.8777915968204185, "eval_f1": 0.26393539491825885, "eval_loss": 0.49346938729286194, "eval_precision": 0.22913816689466485, "eval_recall": 0.3111936832326986, "eval_runtime": 35.6547, "eval_samples_per_second": 5.581, "eval_steps_per_second": 1.402, "step": 90 }, { "epoch": 1.1650485436893203, "grad_norm": 1.751382827758789, "learning_rate": 9.65e-06, "loss": 0.5215, "step": 105 }, { "epoch": 1.1650485436893203, "eval_accuracy": 0.8904991077705078, "eval_f1": 0.3575184016824396, "eval_loss": 0.43015486001968384, "eval_precision": 0.32667179093005383, "eval_recall": 0.3947979563399907, "eval_runtime": 35.5042, "eval_samples_per_second": 5.605, "eval_steps_per_second": 1.408, "step": 105 }, { "epoch": 1.331484049930652, "grad_norm": 1.7430224418640137, "learning_rate": 9.600000000000001e-06, "loss": 0.4782, "step": 120 }, { "epoch": 1.331484049930652, "eval_accuracy": 0.9020169793976099, "eval_f1": 0.4266553119012136, "eval_loss": 0.37819600105285645, "eval_precision": 0.3938679245283019, "eval_recall": 0.46539712029725966, "eval_runtime": 35.9551, "eval_samples_per_second": 5.535, "eval_steps_per_second": 1.391, "step": 120 }, { "epoch": 1.4979195561719834, "grad_norm": 2.754100799560547, "learning_rate": 9.55e-06, "loss": 0.4208, "step": 135 }, { "epoch": 1.4979195561719834, "eval_accuracy": 0.9080733250419077, "eval_f1": 0.44783505154639175, "eval_loss": 0.34046444296836853, "eval_precision": 0.40266963292547275, "eval_recall": 0.5044124477473293, "eval_runtime": 35.1015, "eval_samples_per_second": 5.669, "eval_steps_per_second": 1.424, "step": 135 }, { "epoch": 1.664355062413315, "grad_norm": 1.3271350860595703, "learning_rate": 9.5e-06, "loss": 0.3532, "step": 150 }, { "epoch": 1.664355062413315, "eval_accuracy": 0.9251608716811767, "eval_f1": 0.5355845266082496, "eval_loss": 0.2930045425891876, "eval_precision": 0.49604117181314333, "eval_recall": 0.5819786344635393, "eval_runtime": 34.7775, "eval_samples_per_second": 5.722, "eval_steps_per_second": 1.438, "step": 150 }, { "epoch": 1.8307905686546464, "grad_norm": 1.9117140769958496, "learning_rate": 9.450000000000001e-06, "loss": 0.3458, "step": 165 }, { "epoch": 1.8307905686546464, "eval_accuracy": 0.9301357270318499, "eval_f1": 0.5559597688850845, "eval_loss": 0.2658008933067322, "eval_precision": 0.5154761904761904, "eval_recall": 0.6033441709242917, "eval_runtime": 34.9787, "eval_samples_per_second": 5.689, "eval_steps_per_second": 1.429, "step": 165 }, { "epoch": 1.9972260748959778, "grad_norm": 1.9700042009353638, "learning_rate": 9.4e-06, "loss": 0.302, "step": 180 }, { "epoch": 1.9972260748959778, "eval_accuracy": 0.9474395717298437, "eval_f1": 0.6529640848117698, "eval_loss": 0.2320590764284134, "eval_precision": 0.6111786148238153, "eval_recall": 0.7008824895494659, "eval_runtime": 35.1938, "eval_samples_per_second": 5.654, "eval_steps_per_second": 1.421, "step": 180 }, { "epoch": 2.163661581137309, "grad_norm": 1.2119841575622559, "learning_rate": 9.350000000000002e-06, "loss": 0.2655, "step": 195 }, { "epoch": 2.163661581137309, "eval_accuracy": 0.9519818309630671, "eval_f1": 0.6844638949671772, "eval_loss": 0.20933493971824646, "eval_precision": 0.6470831609433182, "eval_recall": 0.7264282396655829, "eval_runtime": 35.2683, "eval_samples_per_second": 5.642, "eval_steps_per_second": 1.418, "step": 195 }, { "epoch": 2.3300970873786406, "grad_norm": 2.050490617752075, "learning_rate": 9.3e-06, "loss": 0.2598, "step": 210 }, { "epoch": 2.3300970873786406, "eval_accuracy": 0.9570107608284215, "eval_f1": 0.7274759669125868, "eval_loss": 0.1951305866241455, "eval_precision": 0.7012931034482759, "eval_recall": 0.7556897352531352, "eval_runtime": 35.2296, "eval_samples_per_second": 5.649, "eval_steps_per_second": 1.419, "step": 210 }, { "epoch": 2.496532593619972, "grad_norm": 2.1060705184936523, "learning_rate": 9.250000000000001e-06, "loss": 0.2364, "step": 225 }, { "epoch": 2.496532593619972, "eval_accuracy": 0.9590115178716271, "eval_f1": 0.7402309058614565, "eval_loss": 0.17936836183071136, "eval_precision": 0.7090599744789451, "eval_recall": 0.7742684626103112, "eval_runtime": 35.4911, "eval_samples_per_second": 5.607, "eval_steps_per_second": 1.409, "step": 225 }, { "epoch": 2.662968099861304, "grad_norm": 1.8435375690460205, "learning_rate": 9.200000000000002e-06, "loss": 0.2218, "step": 240 }, { "epoch": 2.662968099861304, "eval_accuracy": 0.9621478397231384, "eval_f1": 0.7557522123893805, "eval_loss": 0.1675911545753479, "eval_precision": 0.721588508660752, "eval_recall": 0.7933116581514166, "eval_runtime": 35.3833, "eval_samples_per_second": 5.624, "eval_steps_per_second": 1.413, "step": 240 }, { "epoch": 2.8294036061026353, "grad_norm": 2.065732479095459, "learning_rate": 9.15e-06, "loss": 0.206, "step": 255 }, { "epoch": 2.8294036061026353, "eval_accuracy": 0.9650137890012437, "eval_f1": 0.7758275938680294, "eval_loss": 0.15723256766796112, "eval_precision": 0.7436115843270868, "eval_recall": 0.8109614491407339, "eval_runtime": 35.4693, "eval_samples_per_second": 5.61, "eval_steps_per_second": 1.41, "step": 255 }, { "epoch": 2.9958391123439667, "grad_norm": 2.1758480072021484, "learning_rate": 9.100000000000001e-06, "loss": 0.2053, "step": 270 }, { "epoch": 2.9958391123439667, "eval_accuracy": 0.9640404477369816, "eval_f1": 0.7730088495575222, "eval_loss": 0.15795043110847473, "eval_precision": 0.7380650612589776, "eval_recall": 0.8114259173246633, "eval_runtime": 35.4002, "eval_samples_per_second": 5.621, "eval_steps_per_second": 1.412, "step": 270 }, { "epoch": 3.162274618585298, "grad_norm": 1.6404600143432617, "learning_rate": 9.050000000000001e-06, "loss": 0.1876, "step": 285 }, { "epoch": 3.162274618585298, "eval_accuracy": 0.9687449305142486, "eval_f1": 0.801343784994401, "eval_loss": 0.1406078040599823, "eval_precision": 0.7737889273356401, "eval_recall": 0.8309335810496981, "eval_runtime": 35.6674, "eval_samples_per_second": 5.579, "eval_steps_per_second": 1.402, "step": 285 }, { "epoch": 3.3287101248266295, "grad_norm": 1.8430469036102295, "learning_rate": 9e-06, "loss": 0.1602, "step": 300 }, { "epoch": 3.3287101248266295, "eval_accuracy": 0.9670686205591305, "eval_f1": 0.7985659870042572, "eval_loss": 0.14204147458076477, "eval_precision": 0.7714285714285715, "eval_recall": 0.8276823037621923, "eval_runtime": 35.5741, "eval_samples_per_second": 5.594, "eval_steps_per_second": 1.406, "step": 300 }, { "epoch": 3.4951456310679614, "grad_norm": 2.2237956523895264, "learning_rate": 8.95e-06, "loss": 0.1706, "step": 315 }, { "epoch": 3.4951456310679614, "eval_accuracy": 0.969069377602336, "eval_f1": 0.8149988705669754, "eval_loss": 0.13229934871196747, "eval_precision": 0.793315743183817, "eval_recall": 0.8379006038086391, "eval_runtime": 36.0972, "eval_samples_per_second": 5.513, "eval_steps_per_second": 1.385, "step": 315 }, { "epoch": 3.661581137309293, "grad_norm": 2.04622220993042, "learning_rate": 8.900000000000001e-06, "loss": 0.1585, "step": 330 }, { "epoch": 3.661581137309293, "eval_accuracy": 0.9700427188665982, "eval_f1": 0.8298399819697994, "eval_loss": 0.13131560385227203, "eval_precision": 0.8060420315236427, "eval_recall": 0.8550859266140269, "eval_runtime": 35.6467, "eval_samples_per_second": 5.583, "eval_steps_per_second": 1.403, "step": 330 }, { "epoch": 3.828016643550624, "grad_norm": 2.0790255069732666, "learning_rate": 8.85e-06, "loss": 0.1574, "step": 345 }, { "epoch": 3.828016643550624, "eval_accuracy": 0.9717190288217163, "eval_f1": 0.8376491781130375, "eval_loss": 0.12674090266227722, "eval_precision": 0.8129370629370629, "eval_recall": 0.8639108221086855, "eval_runtime": 35.5835, "eval_samples_per_second": 5.592, "eval_steps_per_second": 1.405, "step": 345 }, { "epoch": 3.9944521497919556, "grad_norm": 2.3372180461883545, "learning_rate": 8.8e-06, "loss": 0.15, "step": 360 }, { "epoch": 3.9944521497919556, "eval_accuracy": 0.97539609582004, "eval_f1": 0.8535811423390752, "eval_loss": 0.11569273471832275, "eval_precision": 0.8335546702080566, "eval_recall": 0.8745935903390618, "eval_runtime": 35.7391, "eval_samples_per_second": 5.568, "eval_steps_per_second": 1.399, "step": 360 }, { "epoch": 4.160887656033287, "grad_norm": 2.763075828552246, "learning_rate": 8.750000000000001e-06, "loss": 0.1192, "step": 375 }, { "epoch": 4.160887656033287, "eval_accuracy": 0.9740983074676904, "eval_f1": 0.8524664696521937, "eval_loss": 0.11200679838657379, "eval_precision": 0.8348174532502226, "eval_recall": 0.8708778448676265, "eval_runtime": 36.0008, "eval_samples_per_second": 5.528, "eval_steps_per_second": 1.389, "step": 375 }, { "epoch": 4.327323162274618, "grad_norm": 1.7937551736831665, "learning_rate": 8.700000000000001e-06, "loss": 0.1313, "step": 390 }, { "epoch": 4.327323162274618, "eval_accuracy": 0.9745309035851403, "eval_f1": 0.8588929219600727, "eval_loss": 0.1129654049873352, "eval_precision": 0.8394678492239468, "eval_recall": 0.8792382721783558, "eval_runtime": 36.0013, "eval_samples_per_second": 5.528, "eval_steps_per_second": 1.389, "step": 390 }, { "epoch": 4.49375866851595, "grad_norm": 1.4142848253250122, "learning_rate": 8.65e-06, "loss": 0.1179, "step": 405 }, { "epoch": 4.49375866851595, "eval_accuracy": 0.9755042448494025, "eval_f1": 0.8613303269447576, "eval_loss": 0.109279565513134, "eval_precision": 0.8369851007887817, "eval_recall": 0.8871342313051556, "eval_runtime": 36.0609, "eval_samples_per_second": 5.518, "eval_steps_per_second": 1.387, "step": 405 }, { "epoch": 4.660194174757281, "grad_norm": 1.6794809103012085, "learning_rate": 8.6e-06, "loss": 0.1327, "step": 420 }, { "epoch": 4.660194174757281, "eval_accuracy": 0.9745849780998216, "eval_f1": 0.862053369516056, "eval_loss": 0.11022669076919556, "eval_precision": 0.8400176289114147, "eval_recall": 0.885276358569438, "eval_runtime": 36.0549, "eval_samples_per_second": 5.519, "eval_steps_per_second": 1.387, "step": 420 }, { "epoch": 4.826629680998613, "grad_norm": 1.8358403444290161, "learning_rate": 8.550000000000001e-06, "loss": 0.1323, "step": 435 }, { "epoch": 4.826629680998613, "eval_accuracy": 0.978207970583464, "eval_f1": 0.8795454545454546, "eval_loss": 0.09974753856658936, "eval_precision": 0.8611481975967957, "eval_recall": 0.8987459359033906, "eval_runtime": 36.3053, "eval_samples_per_second": 5.481, "eval_steps_per_second": 1.377, "step": 435 }, { "epoch": 4.993065187239944, "grad_norm": 2.1321513652801514, "learning_rate": 8.5e-06, "loss": 0.1254, "step": 450 }, { "epoch": 4.993065187239944, "eval_accuracy": 0.9774509273779268, "eval_f1": 0.8727683615819208, "eval_loss": 0.094924695789814, "eval_precision": 0.8499119718309859, "eval_recall": 0.896888063167673, "eval_runtime": 36.229, "eval_samples_per_second": 5.493, "eval_steps_per_second": 1.38, "step": 450 }, { "epoch": 5.159500693481276, "grad_norm": 1.3562971353530884, "learning_rate": 8.45e-06, "loss": 0.0999, "step": 465 }, { "epoch": 5.159500693481276, "eval_accuracy": 0.9797220569945385, "eval_f1": 0.8822055137844612, "eval_loss": 0.08469922095537186, "eval_precision": 0.8658318425760286, "eval_recall": 0.89921040408732, "eval_runtime": 36.2946, "eval_samples_per_second": 5.483, "eval_steps_per_second": 1.378, "step": 465 }, { "epoch": 5.325936199722608, "grad_norm": 1.683296799659729, "learning_rate": 8.400000000000001e-06, "loss": 0.1017, "step": 480 }, { "epoch": 5.325936199722608, "eval_accuracy": 0.981019845346888, "eval_f1": 0.8923777019340159, "eval_loss": 0.08026640117168427, "eval_precision": 0.8746654772524531, "eval_recall": 0.910822108685555, "eval_runtime": 36.4129, "eval_samples_per_second": 5.465, "eval_steps_per_second": 1.373, "step": 480 }, { "epoch": 5.492371705963939, "grad_norm": 2.303062677383423, "learning_rate": 8.35e-06, "loss": 0.091, "step": 495 }, { "epoch": 5.492371705963939, "eval_accuracy": 0.9805872492294382, "eval_f1": 0.8918362680082322, "eval_loss": 0.07959215342998505, "eval_precision": 0.8783783783783784, "eval_recall": 0.9057129586623316, "eval_runtime": 36.4104, "eval_samples_per_second": 5.465, "eval_steps_per_second": 1.373, "step": 495 }, { "epoch": 5.658807212205271, "grad_norm": 3.383983850479126, "learning_rate": 8.3e-06, "loss": 0.0979, "step": 510 }, { "epoch": 5.658807212205271, "eval_accuracy": 0.9773427783485643, "eval_f1": 0.8775045537340619, "eval_loss": 0.09432032704353333, "eval_precision": 0.8606520768200089, "eval_recall": 0.8950301904319554, "eval_runtime": 36.0505, "eval_samples_per_second": 5.52, "eval_steps_per_second": 1.387, "step": 510 }, { "epoch": 5.825242718446602, "grad_norm": 2.1892480850219727, "learning_rate": 8.25e-06, "loss": 0.1024, "step": 525 }, { "epoch": 5.825242718446602, "eval_accuracy": 0.980533174714757, "eval_f1": 0.8882312770316413, "eval_loss": 0.08036847412586212, "eval_precision": 0.8709821428571428, "eval_recall": 0.906177426846261, "eval_runtime": 36.1406, "eval_samples_per_second": 5.506, "eval_steps_per_second": 1.383, "step": 525 }, { "epoch": 5.991678224687933, "grad_norm": 1.8490287065505981, "learning_rate": 8.2e-06, "loss": 0.0952, "step": 540 }, { "epoch": 5.991678224687933, "eval_accuracy": 0.9816146650083816, "eval_f1": 0.900843400957374, "eval_loss": 0.07866356521844864, "eval_precision": 0.8845120859444942, "eval_recall": 0.917789131444496, "eval_runtime": 36.4974, "eval_samples_per_second": 5.452, "eval_steps_per_second": 1.37, "step": 540 }, { "epoch": 6.158113730929265, "grad_norm": 3.0108256340026855, "learning_rate": 8.15e-06, "loss": 0.0742, "step": 555 }, { "epoch": 6.158113730929265, "eval_accuracy": 0.9823176336992375, "eval_f1": 0.9032553874369554, "eval_loss": 0.07755902409553528, "eval_precision": 0.8918062471706655, "eval_recall": 0.9150023223409196, "eval_runtime": 36.3184, "eval_samples_per_second": 5.479, "eval_steps_per_second": 1.377, "step": 555 }, { "epoch": 6.324549237170596, "grad_norm": 2.533155679702759, "learning_rate": 8.1e-06, "loss": 0.0764, "step": 570 }, { "epoch": 6.324549237170596, "eval_accuracy": 0.9837235710809495, "eval_f1": 0.9106813996316758, "eval_loss": 0.07210895419120789, "eval_precision": 0.9027841168416249, "eval_recall": 0.9187180678123549, "eval_runtime": 36.5311, "eval_samples_per_second": 5.447, "eval_steps_per_second": 1.369, "step": 570 }, { "epoch": 6.490984743411928, "grad_norm": 1.943320631980896, "learning_rate": 8.050000000000001e-06, "loss": 0.0813, "step": 585 }, { "epoch": 6.490984743411928, "eval_accuracy": 0.9844265397718055, "eval_f1": 0.914614499424626, "eval_loss": 0.06643209606409073, "eval_precision": 0.906478102189781, "eval_recall": 0.9228982814677195, "eval_runtime": 36.3904, "eval_samples_per_second": 5.468, "eval_steps_per_second": 1.374, "step": 585 }, { "epoch": 6.657420249653259, "grad_norm": 1.322831392288208, "learning_rate": 8.000000000000001e-06, "loss": 0.0791, "step": 600 }, { "epoch": 6.657420249653259, "eval_accuracy": 0.9848050613745741, "eval_f1": 0.9137614678899082, "eval_loss": 0.06415116786956787, "eval_precision": 0.902582691436339, "eval_recall": 0.9252206223873665, "eval_runtime": 36.27, "eval_samples_per_second": 5.487, "eval_steps_per_second": 1.379, "step": 600 }, { "epoch": 6.8238557558945905, "grad_norm": 1.5891202688217163, "learning_rate": 7.950000000000002e-06, "loss": 0.0792, "step": 615 }, { "epoch": 6.8238557558945905, "eval_accuracy": 0.9841020926837182, "eval_f1": 0.9103795153177869, "eval_loss": 0.06728328764438629, "eval_precision": 0.8964430436740207, "eval_recall": 0.924756154203437, "eval_runtime": 35.8847, "eval_samples_per_second": 5.546, "eval_steps_per_second": 1.393, "step": 615 }, { "epoch": 6.990291262135923, "grad_norm": 3.470646858215332, "learning_rate": 7.9e-06, "loss": 0.078, "step": 630 }, { "epoch": 6.990291262135923, "eval_accuracy": 0.9832909749634997, "eval_f1": 0.9078857142857143, "eval_loss": 0.06933122873306274, "eval_precision": 0.8937893789378938, "eval_recall": 0.92243381328379, "eval_runtime": 36.1839, "eval_samples_per_second": 5.5, "eval_steps_per_second": 1.382, "step": 630 }, { "epoch": 7.156726768377254, "grad_norm": 2.4168286323547363, "learning_rate": 7.850000000000001e-06, "loss": 0.0678, "step": 645 }, { "epoch": 7.156726768377254, "eval_accuracy": 0.985237657492024, "eval_f1": 0.92025664527956, "eval_loss": 0.06722652167081833, "eval_precision": 0.9081863410221619, "eval_recall": 0.9326521133302369, "eval_runtime": 36.716, "eval_samples_per_second": 5.42, "eval_steps_per_second": 1.362, "step": 645 }, { "epoch": 7.323162274618586, "grad_norm": 1.048614501953125, "learning_rate": 7.800000000000002e-06, "loss": 0.0685, "step": 660 }, { "epoch": 7.323162274618586, "eval_accuracy": 0.9839939436543557, "eval_f1": 0.9072635906806761, "eval_loss": 0.06548429280519485, "eval_precision": 0.8925842696629214, "eval_recall": 0.92243381328379, "eval_runtime": 36.897, "eval_samples_per_second": 5.393, "eval_steps_per_second": 1.355, "step": 660 }, { "epoch": 7.489597780859917, "grad_norm": 2.5844979286193848, "learning_rate": 7.75e-06, "loss": 0.0555, "step": 675 }, { "epoch": 7.489597780859917, "eval_accuracy": 0.9856161790947926, "eval_f1": 0.9213016385875836, "eval_loss": 0.06148982420563698, "eval_precision": 0.9155963302752294, "eval_recall": 0.927078495123084, "eval_runtime": 36.1847, "eval_samples_per_second": 5.5, "eval_steps_per_second": 1.382, "step": 675 }, { "epoch": 7.656033287101248, "grad_norm": 1.9488413333892822, "learning_rate": 7.7e-06, "loss": 0.07, "step": 690 }, { "epoch": 7.656033287101248, "eval_accuracy": 0.9867517439030985, "eval_f1": 0.927176659774868, "eval_loss": 0.058708589524030685, "eval_precision": 0.9172727272727272, "eval_recall": 0.9372967951695309, "eval_runtime": 36.4405, "eval_samples_per_second": 5.461, "eval_steps_per_second": 1.372, "step": 690 }, { "epoch": 7.82246879334258, "grad_norm": 1.7437242269515991, "learning_rate": 7.650000000000001e-06, "loss": 0.065, "step": 705 }, { "epoch": 7.82246879334258, "eval_accuracy": 0.9874547125939545, "eval_f1": 0.9303928325292902, "eval_loss": 0.0557989701628685, "eval_precision": 0.9204545454545454, "eval_recall": 0.9405480724570366, "eval_runtime": 36.0661, "eval_samples_per_second": 5.518, "eval_steps_per_second": 1.386, "step": 705 }, { "epoch": 7.988904299583911, "grad_norm": 1.0527422428131104, "learning_rate": 7.600000000000001e-06, "loss": 0.0599, "step": 720 }, { "epoch": 7.988904299583911, "eval_accuracy": 0.9878332341967231, "eval_f1": 0.9342226310947562, "eval_loss": 0.05789622664451599, "eval_precision": 0.9252847380410023, "eval_recall": 0.9433348815606131, "eval_runtime": 36.3712, "eval_samples_per_second": 5.471, "eval_steps_per_second": 1.375, "step": 720 }, { "epoch": 8.155339805825243, "grad_norm": 1.6904972791671753, "learning_rate": 7.5500000000000006e-06, "loss": 0.0571, "step": 735 }, { "epoch": 8.155339805825243, "eval_accuracy": 0.9865895203590548, "eval_f1": 0.9238905495516211, "eval_loss": 0.059290919452905655, "eval_precision": 0.9148451730418944, "eval_recall": 0.9331165815141663, "eval_runtime": 36.0084, "eval_samples_per_second": 5.526, "eval_steps_per_second": 1.389, "step": 735 }, { "epoch": 8.321775312066574, "grad_norm": 1.9831328392028809, "learning_rate": 7.500000000000001e-06, "loss": 0.0563, "step": 750 }, { "epoch": 8.321775312066574, "eval_accuracy": 0.9863191477856487, "eval_f1": 0.9236079153244362, "eval_loss": 0.06046581640839577, "eval_precision": 0.9151846785225718, "eval_recall": 0.9321876451463075, "eval_runtime": 36.0693, "eval_samples_per_second": 5.517, "eval_steps_per_second": 1.386, "step": 750 }, { "epoch": 8.488210818307905, "grad_norm": 2.0379467010498047, "learning_rate": 7.450000000000001e-06, "loss": 0.0602, "step": 765 }, { "epoch": 8.488210818307905, "eval_accuracy": 0.9863191477856487, "eval_f1": 0.927992590877518, "eval_loss": 0.058113399893045425, "eval_precision": 0.925207756232687, "eval_recall": 0.9307942405945193, "eval_runtime": 35.9178, "eval_samples_per_second": 5.54, "eval_steps_per_second": 1.392, "step": 765 }, { "epoch": 8.654646324549237, "grad_norm": 3.095200538635254, "learning_rate": 7.4e-06, "loss": 0.0582, "step": 780 }, { "epoch": 8.654646324549237, "eval_accuracy": 0.9872384145352295, "eval_f1": 0.9288837744533948, "eval_loss": 0.05814095214009285, "eval_precision": 0.9206204379562044, "eval_recall": 0.9372967951695309, "eval_runtime": 36.2273, "eval_samples_per_second": 5.493, "eval_steps_per_second": 1.38, "step": 780 }, { "epoch": 8.821081830790568, "grad_norm": 1.0786473751068115, "learning_rate": 7.350000000000001e-06, "loss": 0.0514, "step": 795 }, { "epoch": 8.821081830790568, "eval_accuracy": 0.9872924890499107, "eval_f1": 0.9313047487321346, "eval_loss": 0.055727362632751465, "eval_precision": 0.9244851258581236, "eval_recall": 0.9382257315373896, "eval_runtime": 36.0241, "eval_samples_per_second": 5.524, "eval_steps_per_second": 1.388, "step": 795 }, { "epoch": 8.9875173370319, "grad_norm": 1.6077920198440552, "learning_rate": 7.3e-06, "loss": 0.0467, "step": 810 }, { "epoch": 8.9875173370319, "eval_accuracy": 0.9883199048288541, "eval_f1": 0.9393661001378043, "eval_loss": 0.05200658738613129, "eval_precision": 0.9291231258518855, "eval_recall": 0.9498374361356247, "eval_runtime": 35.9411, "eval_samples_per_second": 5.537, "eval_steps_per_second": 1.391, "step": 810 }, { "epoch": 9.153952843273231, "grad_norm": 1.601219892501831, "learning_rate": 7.25e-06, "loss": 0.0435, "step": 825 }, { "epoch": 9.153952843273231, "eval_accuracy": 0.9879954577407668, "eval_f1": 0.9336699563920129, "eval_loss": 0.05260741710662842, "eval_precision": 0.9228675136116152, "eval_recall": 0.9447282861124013, "eval_runtime": 35.7996, "eval_samples_per_second": 5.559, "eval_steps_per_second": 1.397, "step": 825 }, { "epoch": 9.320388349514563, "grad_norm": 0.7272451519966125, "learning_rate": 7.2000000000000005e-06, "loss": 0.0531, "step": 840 }, { "epoch": 9.320388349514563, "eval_accuracy": 0.9883739793435354, "eval_f1": 0.9344978165938865, "eval_loss": 0.05022520199418068, "eval_precision": 0.9249317561419472, "eval_recall": 0.9442638179284719, "eval_runtime": 36.0285, "eval_samples_per_second": 5.523, "eval_steps_per_second": 1.388, "step": 840 }, { "epoch": 9.486823855755894, "grad_norm": 0.9556881189346313, "learning_rate": 7.15e-06, "loss": 0.0502, "step": 855 }, { "epoch": 9.486823855755894, "eval_accuracy": 0.9874006380792733, "eval_f1": 0.9309240622140896, "eval_loss": 0.05446859449148178, "eval_precision": 0.9170797656602073, "eval_recall": 0.9451927542963307, "eval_runtime": 36.0609, "eval_samples_per_second": 5.518, "eval_steps_per_second": 1.387, "step": 855 }, { "epoch": 9.653259361997225, "grad_norm": 1.0404924154281616, "learning_rate": 7.100000000000001e-06, "loss": 0.0377, "step": 870 }, { "epoch": 9.653259361997225, "eval_accuracy": 0.9850754339479804, "eval_f1": 0.9220571428571429, "eval_loss": 0.06175297126173973, "eval_precision": 0.9077407740774077, "eval_recall": 0.9368323269856015, "eval_runtime": 36.326, "eval_samples_per_second": 5.478, "eval_steps_per_second": 1.376, "step": 870 }, { "epoch": 9.819694868238557, "grad_norm": 1.1249316930770874, "learning_rate": 7.05e-06, "loss": 0.0416, "step": 885 }, { "epoch": 9.819694868238557, "eval_accuracy": 0.9881036067701292, "eval_f1": 0.9328719723183392, "eval_loss": 0.05493583530187607, "eval_precision": 0.9266727772685609, "eval_recall": 0.9391546679052485, "eval_runtime": 36.1852, "eval_samples_per_second": 5.499, "eval_steps_per_second": 1.382, "step": 885 }, { "epoch": 9.986130374479888, "grad_norm": 1.0846829414367676, "learning_rate": 7e-06, "loss": 0.044, "step": 900 }, { "epoch": 9.986130374479888, "eval_accuracy": 0.9884280538582166, "eval_f1": 0.9420457169244978, "eval_loss": 0.05289188027381897, "eval_precision": 0.9366391184573003, "eval_recall": 0.9475150952159777, "eval_runtime": 36.0505, "eval_samples_per_second": 5.52, "eval_steps_per_second": 1.387, "step": 900 }, { "epoch": 10.152565880721221, "grad_norm": 0.8957504630088806, "learning_rate": 6.95e-06, "loss": 0.0383, "step": 915 }, { "epoch": 10.152565880721221, "eval_accuracy": 0.9889147244903477, "eval_f1": 0.9403088269186448, "eval_loss": 0.048978183418512344, "eval_precision": 0.9332113449222323, "eval_recall": 0.9475150952159777, "eval_runtime": 36.1551, "eval_samples_per_second": 5.504, "eval_steps_per_second": 1.383, "step": 915 }, { "epoch": 10.319001386962553, "grad_norm": 1.6940028667449951, "learning_rate": 6.9e-06, "loss": 0.0454, "step": 930 }, { "epoch": 10.319001386962553, "eval_accuracy": 0.988536202887579, "eval_f1": 0.9366100137804317, "eval_loss": 0.05073446407914162, "eval_precision": 0.9263970922308041, "eval_recall": 0.9470506270320483, "eval_runtime": 36.1642, "eval_samples_per_second": 5.503, "eval_steps_per_second": 1.383, "step": 930 }, { "epoch": 10.485436893203884, "grad_norm": 0.9225968718528748, "learning_rate": 6.850000000000001e-06, "loss": 0.0416, "step": 945 }, { "epoch": 10.485436893203884, "eval_accuracy": 0.9891310225490726, "eval_f1": 0.9430481899930827, "eval_loss": 0.046711865812540054, "eval_precision": 0.9363553113553114, "eval_recall": 0.9498374361356247, "eval_runtime": 36.5741, "eval_samples_per_second": 5.441, "eval_steps_per_second": 1.367, "step": 945 }, { "epoch": 10.651872399445216, "grad_norm": 2.7210068702697754, "learning_rate": 6.800000000000001e-06, "loss": 0.0403, "step": 960 }, { "epoch": 10.651872399445216, "eval_accuracy": 0.9886443519169416, "eval_f1": 0.9384650841207652, "eval_loss": 0.04987097531557083, "eval_precision": 0.9313815187557182, "eval_recall": 0.9456572224802601, "eval_runtime": 36.5567, "eval_samples_per_second": 5.444, "eval_steps_per_second": 1.368, "step": 960 }, { "epoch": 10.818307905686547, "grad_norm": 1.160333275794983, "learning_rate": 6.750000000000001e-06, "loss": 0.0354, "step": 975 }, { "epoch": 10.818307905686547, "eval_accuracy": 0.9882658303141729, "eval_f1": 0.9354171454837968, "eval_loss": 0.05233873799443245, "eval_precision": 0.9258416742493175, "eval_recall": 0.9451927542963307, "eval_runtime": 36.44, "eval_samples_per_second": 5.461, "eval_steps_per_second": 1.372, "step": 975 }, { "epoch": 10.984743411927878, "grad_norm": 0.8807191848754883, "learning_rate": 6.700000000000001e-06, "loss": 0.0338, "step": 990 }, { "epoch": 10.984743411927878, "eval_accuracy": 0.9879954577407668, "eval_f1": 0.9318025258323767, "eval_loss": 0.052071038633584976, "eval_precision": 0.9214350590372389, "eval_recall": 0.9424059451927543, "eval_runtime": 36.6322, "eval_samples_per_second": 5.432, "eval_steps_per_second": 1.365, "step": 990 }, { "epoch": 11.15117891816921, "grad_norm": 1.1557176113128662, "learning_rate": 6.650000000000001e-06, "loss": 0.0347, "step": 1005 }, { "epoch": 11.15117891816921, "eval_accuracy": 0.988049532255448, "eval_f1": 0.9353507565337001, "eval_loss": 0.053912434726953506, "eval_precision": 0.9234947940244455, "eval_recall": 0.9475150952159777, "eval_runtime": 36.5986, "eval_samples_per_second": 5.437, "eval_steps_per_second": 1.366, "step": 1005 }, { "epoch": 11.317614424410541, "grad_norm": 1.668484091758728, "learning_rate": 6.600000000000001e-06, "loss": 0.0364, "step": 1020 }, { "epoch": 11.317614424410541, "eval_accuracy": 0.9870761909911858, "eval_f1": 0.9334552938486165, "eval_loss": 0.055973075330257416, "eval_precision": 0.9193693693693694, "eval_recall": 0.9479795633999071, "eval_runtime": 36.6625, "eval_samples_per_second": 5.428, "eval_steps_per_second": 1.364, "step": 1020 }, { "epoch": 11.484049930651873, "grad_norm": 2.5720293521881104, "learning_rate": 6.550000000000001e-06, "loss": 0.0363, "step": 1035 }, { "epoch": 11.484049930651873, "eval_accuracy": 0.9889147244903477, "eval_f1": 0.9381751321535279, "eval_loss": 0.050925422459840775, "eval_precision": 0.9285714285714286, "eval_recall": 0.9479795633999071, "eval_runtime": 36.4069, "eval_samples_per_second": 5.466, "eval_steps_per_second": 1.373, "step": 1035 }, { "epoch": 11.650485436893204, "grad_norm": 2.5676207542419434, "learning_rate": 6.5000000000000004e-06, "loss": 0.0308, "step": 1050 }, { "epoch": 11.650485436893204, "eval_accuracy": 0.9893473206077975, "eval_f1": 0.94362292051756, "eval_loss": 0.04982053115963936, "eval_precision": 0.9388505747126437, "eval_recall": 0.9484440315838365, "eval_runtime": 36.3679, "eval_samples_per_second": 5.472, "eval_steps_per_second": 1.375, "step": 1050 }, { "epoch": 11.816920943134535, "grad_norm": 0.9586185812950134, "learning_rate": 6.450000000000001e-06, "loss": 0.032, "step": 1065 }, { "epoch": 11.816920943134535, "eval_accuracy": 0.9891310225490726, "eval_f1": 0.9403330249768733, "eval_loss": 0.04908496141433716, "eval_precision": 0.9364348226623675, "eval_recall": 0.9442638179284719, "eval_runtime": 35.9979, "eval_samples_per_second": 5.528, "eval_steps_per_second": 1.389, "step": 1065 }, { "epoch": 11.983356449375867, "grad_norm": 1.067063331604004, "learning_rate": 6.4000000000000006e-06, "loss": 0.0331, "step": 1080 }, { "epoch": 11.983356449375867, "eval_accuracy": 0.9891850970637539, "eval_f1": 0.940768162887552, "eval_loss": 0.0454898327589035, "eval_precision": 0.9372982941447672, "eval_recall": 0.9442638179284719, "eval_runtime": 36.1674, "eval_samples_per_second": 5.502, "eval_steps_per_second": 1.382, "step": 1080 }, { "epoch": 12.149791955617198, "grad_norm": 1.4905815124511719, "learning_rate": 6.35e-06, "loss": 0.0301, "step": 1095 }, { "epoch": 12.149791955617198, "eval_accuracy": 0.9891850970637539, "eval_f1": 0.9423431734317342, "eval_loss": 0.04859260097146034, "eval_precision": 0.9358680714612918, "eval_recall": 0.9489084997677659, "eval_runtime": 36.286, "eval_samples_per_second": 5.484, "eval_steps_per_second": 1.378, "step": 1095 }, { "epoch": 12.31622746185853, "grad_norm": 1.3888496160507202, "learning_rate": 6.300000000000001e-06, "loss": 0.0308, "step": 1110 }, { "epoch": 12.31622746185853, "eval_accuracy": 0.9891310225490726, "eval_f1": 0.9413388543823326, "eval_loss": 0.051349248737096786, "eval_precision": 0.9325432999088423, "eval_recall": 0.9503019043195541, "eval_runtime": 36.2143, "eval_samples_per_second": 5.495, "eval_steps_per_second": 1.381, "step": 1110 }, { "epoch": 12.482662968099861, "grad_norm": 0.5457278490066528, "learning_rate": 6.25e-06, "loss": 0.0253, "step": 1125 }, { "epoch": 12.482662968099861, "eval_accuracy": 0.9891850970637539, "eval_f1": 0.939825447864033, "eval_loss": 0.05103699862957001, "eval_precision": 0.9295774647887324, "eval_recall": 0.9503019043195541, "eval_runtime": 36.4491, "eval_samples_per_second": 5.46, "eval_steps_per_second": 1.372, "step": 1125 }, { "epoch": 12.649098474341192, "grad_norm": 1.106314778327942, "learning_rate": 6.200000000000001e-06, "loss": 0.0301, "step": 1140 }, { "epoch": 12.649098474341192, "eval_accuracy": 0.9886443519169416, "eval_f1": 0.9397424103035878, "eval_loss": 0.053277622908353806, "eval_precision": 0.9307517084282461, "eval_recall": 0.9489084997677659, "eval_runtime": 36.4299, "eval_samples_per_second": 5.463, "eval_steps_per_second": 1.372, "step": 1140 }, { "epoch": 12.815533980582524, "grad_norm": 0.9172839522361755, "learning_rate": 6.15e-06, "loss": 0.0328, "step": 1155 }, { "epoch": 12.815533980582524, "eval_accuracy": 0.9884821283728978, "eval_f1": 0.9364348226623675, "eval_loss": 0.0548846460878849, "eval_precision": 0.9287345820009136, "eval_recall": 0.9442638179284719, "eval_runtime": 36.3929, "eval_samples_per_second": 5.468, "eval_steps_per_second": 1.374, "step": 1155 }, { "epoch": 12.981969486823855, "grad_norm": 1.9091347455978394, "learning_rate": 6.1e-06, "loss": 0.0298, "step": 1170 }, { "epoch": 12.981969486823855, "eval_accuracy": 0.98945546963716, "eval_f1": 0.9450092421441775, "eval_loss": 0.05042650178074837, "eval_precision": 0.9402298850574713, "eval_recall": 0.9498374361356247, "eval_runtime": 35.8371, "eval_samples_per_second": 5.553, "eval_steps_per_second": 1.395, "step": 1170 }, { "epoch": 13.148404993065187, "grad_norm": 1.2674860954284668, "learning_rate": 6.0500000000000005e-06, "loss": 0.0256, "step": 1185 }, { "epoch": 13.148404993065187, "eval_accuracy": 0.988752500946304, "eval_f1": 0.9386716037954178, "eval_loss": 0.051467474550008774, "eval_precision": 0.9354243542435424, "eval_recall": 0.9419414770088249, "eval_runtime": 36.0333, "eval_samples_per_second": 5.523, "eval_steps_per_second": 1.388, "step": 1185 }, { "epoch": 13.314840499306518, "grad_norm": 1.406807780265808, "learning_rate": 6e-06, "loss": 0.0313, "step": 1200 }, { "epoch": 13.314840499306518, "eval_accuracy": 0.9905369599307846, "eval_f1": 0.9480968858131489, "eval_loss": 0.048274096101522446, "eval_precision": 0.9417965169569202, "eval_recall": 0.9544821179749187, "eval_runtime": 35.8422, "eval_samples_per_second": 5.552, "eval_steps_per_second": 1.395, "step": 1200 }, { "epoch": 13.48127600554785, "grad_norm": 0.5426374673843384, "learning_rate": 5.950000000000001e-06, "loss": 0.022, "step": 1215 }, { "epoch": 13.48127600554785, "eval_accuracy": 0.9898880657546099, "eval_f1": 0.9445339470655927, "eval_loss": 0.0463298000395298, "eval_precision": 0.9361313868613139, "eval_recall": 0.9530887134231305, "eval_runtime": 36.2558, "eval_samples_per_second": 5.489, "eval_steps_per_second": 1.379, "step": 1215 }, { "epoch": 13.647711511789181, "grad_norm": 2.050182342529297, "learning_rate": 5.9e-06, "loss": 0.0245, "step": 1230 }, { "epoch": 13.647711511789181, "eval_accuracy": 0.9893473206077975, "eval_f1": 0.9430219146482123, "eval_loss": 0.04942420497536659, "eval_precision": 0.9367552703941339, "eval_recall": 0.9493729679516953, "eval_runtime": 36.4711, "eval_samples_per_second": 5.456, "eval_steps_per_second": 1.371, "step": 1230 }, { "epoch": 13.814147018030512, "grad_norm": 1.7617555856704712, "learning_rate": 5.85e-06, "loss": 0.0251, "step": 1245 }, { "epoch": 13.814147018030512, "eval_accuracy": 0.9897799167252473, "eval_f1": 0.9467128027681662, "eval_loss": 0.049306854605674744, "eval_precision": 0.9404216315307058, "eval_recall": 0.9530887134231305, "eval_runtime": 36.1814, "eval_samples_per_second": 5.5, "eval_steps_per_second": 1.382, "step": 1245 }, { "epoch": 13.980582524271846, "grad_norm": 1.183014154434204, "learning_rate": 5.8e-06, "loss": 0.0259, "step": 1260 }, { "epoch": 13.980582524271846, "eval_accuracy": 0.98945546963716, "eval_f1": 0.9453539312889093, "eval_loss": 0.05114530399441719, "eval_precision": 0.9386446886446886, "eval_recall": 0.9521597770552717, "eval_runtime": 36.0831, "eval_samples_per_second": 5.515, "eval_steps_per_second": 1.386, "step": 1260 }, { "epoch": 14.147018030513177, "grad_norm": 0.6956959962844849, "learning_rate": 5.75e-06, "loss": 0.03, "step": 1275 }, { "epoch": 14.147018030513177, "eval_accuracy": 0.9888606499756665, "eval_f1": 0.9399815327793166, "eval_loss": 0.053482603281736374, "eval_precision": 0.9343735658558971, "eval_recall": 0.9456572224802601, "eval_runtime": 35.8745, "eval_samples_per_second": 5.547, "eval_steps_per_second": 1.394, "step": 1275 }, { "epoch": 14.313453536754508, "grad_norm": 1.2064058780670166, "learning_rate": 5.7e-06, "loss": 0.0192, "step": 1290 }, { "epoch": 14.313453536754508, "eval_accuracy": 0.9898880657546099, "eval_f1": 0.9460772969220087, "eval_loss": 0.049094799906015396, "eval_precision": 0.9428044280442804, "eval_recall": 0.9493729679516953, "eval_runtime": 35.7923, "eval_samples_per_second": 5.56, "eval_steps_per_second": 1.397, "step": 1290 }, { "epoch": 14.47988904299584, "grad_norm": 1.727489948272705, "learning_rate": 5.65e-06, "loss": 0.0267, "step": 1305 }, { "epoch": 14.47988904299584, "eval_accuracy": 0.9901043638133348, "eval_f1": 0.9500693481276006, "eval_loss": 0.04895344376564026, "eval_precision": 0.9456971928209849, "eval_recall": 0.9544821179749187, "eval_runtime": 36.349, "eval_samples_per_second": 5.475, "eval_steps_per_second": 1.376, "step": 1305 }, { "epoch": 14.646324549237171, "grad_norm": 0.6142871379852295, "learning_rate": 5.600000000000001e-06, "loss": 0.0241, "step": 1320 }, { "epoch": 14.646324549237171, "eval_accuracy": 0.9899421402692911, "eval_f1": 0.948729792147806, "eval_loss": 0.050602879375219345, "eval_precision": 0.9435002296738632, "eval_recall": 0.9540176497909894, "eval_runtime": 36.205, "eval_samples_per_second": 5.496, "eval_steps_per_second": 1.381, "step": 1320 }, { "epoch": 14.812760055478503, "grad_norm": 1.6362483501434326, "learning_rate": 5.550000000000001e-06, "loss": 0.0211, "step": 1335 }, { "epoch": 14.812760055478503, "eval_accuracy": 0.9903206618720597, "eval_f1": 0.9491682070240296, "eval_loss": 0.050954435020685196, "eval_precision": 0.944367816091954, "eval_recall": 0.9540176497909894, "eval_runtime": 36.469, "eval_samples_per_second": 5.457, "eval_steps_per_second": 1.371, "step": 1335 }, { "epoch": 14.979195561719834, "grad_norm": 0.9267581105232239, "learning_rate": 5.500000000000001e-06, "loss": 0.0171, "step": 1350 }, { "epoch": 14.979195561719834, "eval_accuracy": 0.9897799167252473, "eval_f1": 0.9474412171507607, "eval_loss": 0.04994847625494003, "eval_precision": 0.9405034324942791, "eval_recall": 0.9544821179749187, "eval_runtime": 36.7159, "eval_samples_per_second": 5.42, "eval_steps_per_second": 1.362, "step": 1350 }, { "epoch": 15.145631067961165, "grad_norm": 0.6142176389694214, "learning_rate": 5.450000000000001e-06, "loss": 0.0226, "step": 1365 }, { "epoch": 15.145631067961165, "eval_accuracy": 0.9894013951224788, "eval_f1": 0.9452369995398067, "eval_loss": 0.05113999918103218, "eval_precision": 0.9366165070679434, "eval_recall": 0.9540176497909894, "eval_runtime": 36.247, "eval_samples_per_second": 5.49, "eval_steps_per_second": 1.379, "step": 1365 }, { "epoch": 15.312066574202497, "grad_norm": 0.46341672539711, "learning_rate": 5.400000000000001e-06, "loss": 0.024, "step": 1380 }, { "epoch": 15.312066574202497, "eval_accuracy": 0.9899421402692911, "eval_f1": 0.9501385041551247, "eval_loss": 0.04835886508226395, "eval_precision": 0.9444699403396053, "eval_recall": 0.9558755225267069, "eval_runtime": 35.8678, "eval_samples_per_second": 5.548, "eval_steps_per_second": 1.394, "step": 1380 }, { "epoch": 15.478502080443828, "grad_norm": 1.446049690246582, "learning_rate": 5.3500000000000004e-06, "loss": 0.018, "step": 1395 }, { "epoch": 15.478502080443828, "eval_accuracy": 0.9903206618720597, "eval_f1": 0.9492703266157054, "eval_loss": 0.04823274910449982, "eval_precision": 0.9468576709796673, "eval_recall": 0.9516953088713423, "eval_runtime": 35.9765, "eval_samples_per_second": 5.531, "eval_steps_per_second": 1.39, "step": 1395 }, { "epoch": 15.64493758668516, "grad_norm": 0.7485630512237549, "learning_rate": 5.300000000000001e-06, "loss": 0.0191, "step": 1410 }, { "epoch": 15.64493758668516, "eval_accuracy": 0.9899421402692911, "eval_f1": 0.947709393799167, "eval_loss": 0.04913439229130745, "eval_precision": 0.9442139234670355, "eval_recall": 0.9512308406874129, "eval_runtime": 36.5589, "eval_samples_per_second": 5.443, "eval_steps_per_second": 1.368, "step": 1410 }, { "epoch": 15.811373092926491, "grad_norm": 0.8376514911651611, "learning_rate": 5.2500000000000006e-06, "loss": 0.0203, "step": 1425 }, { "epoch": 15.811373092926491, "eval_accuracy": 0.9912399286216407, "eval_f1": 0.9531974050046339, "eval_loss": 0.04510456323623657, "eval_precision": 0.9509939898289412, "eval_recall": 0.9554110543427775, "eval_runtime": 36.8157, "eval_samples_per_second": 5.405, "eval_steps_per_second": 1.358, "step": 1425 }, { "epoch": 15.977808599167822, "grad_norm": 1.1797449588775635, "learning_rate": 5.2e-06, "loss": 0.0198, "step": 1440 }, { "epoch": 15.977808599167822, "eval_accuracy": 0.9911317795922782, "eval_f1": 0.952292728114868, "eval_loss": 0.04465332254767418, "eval_precision": 0.9496535796766744, "eval_recall": 0.9549465861588481, "eval_runtime": 36.3506, "eval_samples_per_second": 5.474, "eval_steps_per_second": 1.375, "step": 1440 }, { "epoch": 16.144244105409154, "grad_norm": 2.32300066947937, "learning_rate": 5.150000000000001e-06, "loss": 0.0167, "step": 1455 }, { "epoch": 16.144244105409154, "eval_accuracy": 0.9909154815335532, "eval_f1": 0.9513663733209818, "eval_loss": 0.044419851154088974, "eval_precision": 0.948729792147806, "eval_recall": 0.9540176497909894, "eval_runtime": 36.4511, "eval_samples_per_second": 5.459, "eval_steps_per_second": 1.372, "step": 1455 }, { "epoch": 16.310679611650485, "grad_norm": 1.4079307317733765, "learning_rate": 5.1e-06, "loss": 0.0178, "step": 1470 }, { "epoch": 16.310679611650485, "eval_accuracy": 0.9891850970637539, "eval_f1": 0.9448673587081892, "eval_loss": 0.05134458467364311, "eval_precision": 0.9385884509624198, "eval_recall": 0.9512308406874129, "eval_runtime": 35.9882, "eval_samples_per_second": 5.53, "eval_steps_per_second": 1.389, "step": 1470 }, { "epoch": 16.477115117891817, "grad_norm": 1.1276496648788452, "learning_rate": 5.050000000000001e-06, "loss": 0.024, "step": 1485 }, { "epoch": 16.477115117891817, "eval_accuracy": 0.9899421402692911, "eval_f1": 0.9482678983833718, "eval_loss": 0.0502447672188282, "eval_precision": 0.9430408819476344, "eval_recall": 0.9535531816070599, "eval_runtime": 36.2001, "eval_samples_per_second": 5.497, "eval_steps_per_second": 1.381, "step": 1485 }, { "epoch": 16.643550624133148, "grad_norm": 1.1420115232467651, "learning_rate": 5e-06, "loss": 0.0206, "step": 1500 }, { "epoch": 16.643550624133148, "eval_accuracy": 0.9907532579895095, "eval_f1": 0.9513888888888888, "eval_loss": 0.045851416885852814, "eval_precision": 0.9483156437471159, "eval_recall": 0.9544821179749187, "eval_runtime": 36.0375, "eval_samples_per_second": 5.522, "eval_steps_per_second": 1.387, "step": 1500 }, { "epoch": 16.80998613037448, "grad_norm": 0.6803048849105835, "learning_rate": 4.95e-06, "loss": 0.0188, "step": 1515 }, { "epoch": 16.80998613037448, "eval_accuracy": 0.9906451089601471, "eval_f1": 0.9507058551261283, "eval_loss": 0.04693201929330826, "eval_precision": 0.9474169741697417, "eval_recall": 0.9540176497909894, "eval_runtime": 36.4292, "eval_samples_per_second": 5.463, "eval_steps_per_second": 1.373, "step": 1515 }, { "epoch": 16.97642163661581, "grad_norm": 0.6494084000587463, "learning_rate": 4.9000000000000005e-06, "loss": 0.016, "step": 1530 }, { "epoch": 16.97642163661581, "eval_accuracy": 0.9905910344454658, "eval_f1": 0.9524469067405354, "eval_loss": 0.04632224142551422, "eval_precision": 0.9467645709040844, "eval_recall": 0.9581978634463539, "eval_runtime": 36.8269, "eval_samples_per_second": 5.404, "eval_steps_per_second": 1.358, "step": 1530 }, { "epoch": 17.142857142857142, "grad_norm": 0.9313808083534241, "learning_rate": 4.85e-06, "loss": 0.0161, "step": 1545 }, { "epoch": 17.142857142857142, "eval_accuracy": 0.991077705077597, "eval_f1": 0.9555966697502312, "eval_loss": 0.045460253953933716, "eval_precision": 0.9516351911561493, "eval_recall": 0.9595912679981421, "eval_runtime": 36.5267, "eval_samples_per_second": 5.448, "eval_steps_per_second": 1.369, "step": 1545 }, { "epoch": 17.309292649098474, "grad_norm": 0.6977990865707397, "learning_rate": 4.800000000000001e-06, "loss": 0.0135, "step": 1560 }, { "epoch": 17.309292649098474, "eval_accuracy": 0.9909154815335532, "eval_f1": 0.9548297428769978, "eval_loss": 0.04745380952954292, "eval_precision": 0.9524029574861368, "eval_recall": 0.9572689270784951, "eval_runtime": 36.396, "eval_samples_per_second": 5.468, "eval_steps_per_second": 1.374, "step": 1560 }, { "epoch": 17.475728155339805, "grad_norm": 0.7467624545097351, "learning_rate": 4.75e-06, "loss": 0.0148, "step": 1575 }, { "epoch": 17.475728155339805, "eval_accuracy": 0.9904828854161034, "eval_f1": 0.9491916859122401, "eval_loss": 0.047850631177425385, "eval_precision": 0.9439595774000918, "eval_recall": 0.9544821179749187, "eval_runtime": 36.2126, "eval_samples_per_second": 5.495, "eval_steps_per_second": 1.381, "step": 1575 }, { "epoch": 17.642163661581137, "grad_norm": 0.7804221510887146, "learning_rate": 4.7e-06, "loss": 0.0173, "step": 1590 }, { "epoch": 17.642163661581137, "eval_accuracy": 0.9915103011950468, "eval_f1": 0.9571858366118954, "eval_loss": 0.04551170393824577, "eval_precision": 0.9538745387453874, "eval_recall": 0.9605202043660009, "eval_runtime": 36.006, "eval_samples_per_second": 5.527, "eval_steps_per_second": 1.389, "step": 1590 }, { "epoch": 17.808599167822468, "grad_norm": 1.0907295942306519, "learning_rate": 4.65e-06, "loss": 0.0173, "step": 1605 }, { "epoch": 17.808599167822468, "eval_accuracy": 0.9913480776510031, "eval_f1": 0.9514338575393155, "eval_loss": 0.04557771980762482, "eval_precision": 0.9474896361123906, "eval_recall": 0.9554110543427775, "eval_runtime": 36.2064, "eval_samples_per_second": 5.496, "eval_steps_per_second": 1.381, "step": 1605 }, { "epoch": 17.9750346740638, "grad_norm": 1.295432209968567, "learning_rate": 4.600000000000001e-06, "loss": 0.0185, "step": 1620 }, { "epoch": 17.9750346740638, "eval_accuracy": 0.9907532579895095, "eval_f1": 0.9537465309898243, "eval_loss": 0.04614636301994324, "eval_precision": 0.9497927222478121, "eval_recall": 0.9577333952624245, "eval_runtime": 36.269, "eval_samples_per_second": 5.487, "eval_steps_per_second": 1.379, "step": 1620 }, { "epoch": 18.14147018030513, "grad_norm": 1.0728676319122314, "learning_rate": 4.5500000000000005e-06, "loss": 0.0153, "step": 1635 }, { "epoch": 18.14147018030513, "eval_accuracy": 0.991077705077597, "eval_f1": 0.9547553093259464, "eval_loss": 0.04719853028655052, "eval_precision": 0.9490592014685636, "eval_recall": 0.9605202043660009, "eval_runtime": 36.7621, "eval_samples_per_second": 5.413, "eval_steps_per_second": 1.36, "step": 1635 }, { "epoch": 18.307905686546462, "grad_norm": 0.848417341709137, "learning_rate": 4.5e-06, "loss": 0.0148, "step": 1650 }, { "epoch": 18.307905686546462, "eval_accuracy": 0.9913480776510031, "eval_f1": 0.9546716003700277, "eval_loss": 0.04460978880524635, "eval_precision": 0.9507139567019807, "eval_recall": 0.9586623316302834, "eval_runtime": 36.6036, "eval_samples_per_second": 5.437, "eval_steps_per_second": 1.366, "step": 1650 }, { "epoch": 18.474341192787794, "grad_norm": 0.8914014101028442, "learning_rate": 4.450000000000001e-06, "loss": 0.0136, "step": 1665 }, { "epoch": 18.474341192787794, "eval_accuracy": 0.9914021521656843, "eval_f1": 0.9542936288088641, "eval_loss": 0.044093821197748184, "eval_precision": 0.9486002753556677, "eval_recall": 0.9600557361820715, "eval_runtime": 36.4626, "eval_samples_per_second": 5.458, "eval_steps_per_second": 1.371, "step": 1665 }, { "epoch": 18.640776699029125, "grad_norm": 1.768336534500122, "learning_rate": 4.4e-06, "loss": 0.0185, "step": 1680 }, { "epoch": 18.640776699029125, "eval_accuracy": 0.9914562266803656, "eval_f1": 0.9550509731232623, "eval_loss": 0.047818973660469055, "eval_precision": 0.9528432732316228, "eval_recall": 0.9572689270784951, "eval_runtime": 35.9606, "eval_samples_per_second": 5.534, "eval_steps_per_second": 1.39, "step": 1680 }, { "epoch": 18.807212205270456, "grad_norm": 0.8891735672950745, "learning_rate": 4.353333333333334e-06, "loss": 0.0147, "step": 1695 }, { "epoch": 18.807212205270456, "eval_accuracy": 0.9911858541069594, "eval_f1": 0.9582660825455385, "eval_loss": 0.04927229881286621, "eval_precision": 0.9514652014652014, "eval_recall": 0.965164886205295, "eval_runtime": 36.0431, "eval_samples_per_second": 5.521, "eval_steps_per_second": 1.387, "step": 1695 }, { "epoch": 18.973647711511788, "grad_norm": 0.860618531703949, "learning_rate": 4.303333333333334e-06, "loss": 0.0156, "step": 1710 }, { "epoch": 18.973647711511788, "eval_accuracy": 0.9902665873573785, "eval_f1": 0.9491916859122401, "eval_loss": 0.05092372000217438, "eval_precision": 0.9439595774000918, "eval_recall": 0.9544821179749187, "eval_runtime": 36.549, "eval_samples_per_second": 5.445, "eval_steps_per_second": 1.368, "step": 1710 }, { "epoch": 19.14008321775312, "grad_norm": 0.4298454821109772, "learning_rate": 4.253333333333334e-06, "loss": 0.0113, "step": 1725 }, { "epoch": 19.14008321775312, "eval_accuracy": 0.9911317795922782, "eval_f1": 0.9566024599675098, "eval_loss": 0.046022918075323105, "eval_precision": 0.9559369202226345, "eval_recall": 0.9572689270784951, "eval_runtime": 36.802, "eval_samples_per_second": 5.407, "eval_steps_per_second": 1.359, "step": 1725 }, { "epoch": 19.30651872399445, "grad_norm": 0.7119155526161194, "learning_rate": 4.2033333333333335e-06, "loss": 0.014, "step": 1740 }, { "epoch": 19.30651872399445, "eval_accuracy": 0.9904828854161034, "eval_f1": 0.948220064724919, "eval_loss": 0.04928451031446457, "eval_precision": 0.9438564196962724, "eval_recall": 0.9526242452392011, "eval_runtime": 36.4604, "eval_samples_per_second": 5.458, "eval_steps_per_second": 1.371, "step": 1740 }, { "epoch": 19.472954230235782, "grad_norm": 0.6270649433135986, "learning_rate": 4.153333333333334e-06, "loss": 0.0147, "step": 1755 }, { "epoch": 19.472954230235782, "eval_accuracy": 0.9906451089601471, "eval_f1": 0.9521608504737693, "eval_loss": 0.04984944686293602, "eval_precision": 0.9475620975160993, "eval_recall": 0.9568044588945657, "eval_runtime": 36.1824, "eval_samples_per_second": 5.5, "eval_steps_per_second": 1.382, "step": 1755 }, { "epoch": 19.639389736477114, "grad_norm": 0.9536636471748352, "learning_rate": 4.1033333333333336e-06, "loss": 0.0126, "step": 1770 }, { "epoch": 19.639389736477114, "eval_accuracy": 0.9905910344454658, "eval_f1": 0.9502199583236861, "eval_loss": 0.04928808659315109, "eval_precision": 0.9473684210526315, "eval_recall": 0.9530887134231305, "eval_runtime": 36.346, "eval_samples_per_second": 5.475, "eval_steps_per_second": 1.376, "step": 1770 }, { "epoch": 19.805825242718445, "grad_norm": 2.24277925491333, "learning_rate": 4.053333333333333e-06, "loss": 0.0167, "step": 1785 }, { "epoch": 19.805825242718445, "eval_accuracy": 0.9903747363867409, "eval_f1": 0.9519852262234534, "eval_loss": 0.04912427067756653, "eval_precision": 0.9463056447911886, "eval_recall": 0.9577333952624245, "eval_runtime": 36.3829, "eval_samples_per_second": 5.47, "eval_steps_per_second": 1.374, "step": 1785 }, { "epoch": 19.972260748959776, "grad_norm": 1.1929985284805298, "learning_rate": 4.003333333333334e-06, "loss": 0.0126, "step": 1800 }, { "epoch": 19.972260748959776, "eval_accuracy": 0.9907532579895095, "eval_f1": 0.9515867500579105, "eval_loss": 0.04741891101002693, "eval_precision": 0.9491682070240296, "eval_recall": 0.9540176497909894, "eval_runtime": 36.3224, "eval_samples_per_second": 5.479, "eval_steps_per_second": 1.377, "step": 1800 }, { "epoch": 20.13869625520111, "grad_norm": 0.5980396866798401, "learning_rate": 3.953333333333333e-06, "loss": 0.0107, "step": 1815 }, { "epoch": 20.13869625520111, "eval_accuracy": 0.9914021521656843, "eval_f1": 0.9550717924965262, "eval_loss": 0.04617602005600929, "eval_precision": 0.9524249422632795, "eval_recall": 0.9577333952624245, "eval_runtime": 36.2289, "eval_samples_per_second": 5.493, "eval_steps_per_second": 1.38, "step": 1815 }, { "epoch": 20.305131761442443, "grad_norm": 0.5774451494216919, "learning_rate": 3.903333333333334e-06, "loss": 0.0115, "step": 1830 }, { "epoch": 20.305131761442443, "eval_accuracy": 0.9911317795922782, "eval_f1": 0.9558993304086816, "eval_loss": 0.048068635165691376, "eval_precision": 0.9504132231404959, "eval_recall": 0.9614491407338597, "eval_runtime": 36.6091, "eval_samples_per_second": 5.436, "eval_steps_per_second": 1.366, "step": 1830 }, { "epoch": 20.471567267683774, "grad_norm": 0.8061049580574036, "learning_rate": 3.853333333333334e-06, "loss": 0.0128, "step": 1845 }, { "epoch": 20.471567267683774, "eval_accuracy": 0.9906991834748283, "eval_f1": 0.951918631530282, "eval_loss": 0.04859815165400505, "eval_precision": 0.9475379659456972, "eval_recall": 0.9563399907106364, "eval_runtime": 36.2061, "eval_samples_per_second": 5.496, "eval_steps_per_second": 1.381, "step": 1845 }, { "epoch": 20.638002773925106, "grad_norm": 0.5735962986946106, "learning_rate": 3.803333333333334e-06, "loss": 0.0113, "step": 1860 }, { "epoch": 20.638002773925106, "eval_accuracy": 0.9910236305629156, "eval_f1": 0.9533702677746998, "eval_loss": 0.04910165071487427, "eval_precision": 0.947682423129876, "eval_recall": 0.9591267998142127, "eval_runtime": 36.2871, "eval_samples_per_second": 5.484, "eval_steps_per_second": 1.378, "step": 1860 }, { "epoch": 20.804438280166437, "grad_norm": 0.5703373551368713, "learning_rate": 3.753333333333334e-06, "loss": 0.0119, "step": 1875 }, { "epoch": 20.804438280166437, "eval_accuracy": 0.9901043638133348, "eval_f1": 0.9498607242339832, "eval_loss": 0.05141424015164375, "eval_precision": 0.9494199535962877, "eval_recall": 0.9503019043195541, "eval_runtime": 36.2237, "eval_samples_per_second": 5.494, "eval_steps_per_second": 1.38, "step": 1875 }, { "epoch": 20.97087378640777, "grad_norm": 0.8812251091003418, "learning_rate": 3.7033333333333336e-06, "loss": 0.0122, "step": 1890 }, { "epoch": 20.97087378640777, "eval_accuracy": 0.9911317795922782, "eval_f1": 0.9535903948279844, "eval_loss": 0.04799521341919899, "eval_precision": 0.94811753902663, "eval_recall": 0.9591267998142127, "eval_runtime": 36.3818, "eval_samples_per_second": 5.47, "eval_steps_per_second": 1.374, "step": 1890 }, { "epoch": 21.1373092926491, "grad_norm": 0.729183554649353, "learning_rate": 3.6533333333333336e-06, "loss": 0.0123, "step": 1905 }, { "epoch": 21.1373092926491, "eval_accuracy": 0.9909154815335532, "eval_f1": 0.9522050334795659, "eval_loss": 0.04769909009337425, "eval_precision": 0.9467401285583104, "eval_recall": 0.9577333952624245, "eval_runtime": 36.5345, "eval_samples_per_second": 5.447, "eval_steps_per_second": 1.369, "step": 1905 }, { "epoch": 21.30374479889043, "grad_norm": 0.3428969085216522, "learning_rate": 3.6033333333333337e-06, "loss": 0.0116, "step": 1920 }, { "epoch": 21.30374479889043, "eval_accuracy": 0.9910236305629156, "eval_f1": 0.9533271719038817, "eval_loss": 0.04861655458807945, "eval_precision": 0.9485057471264368, "eval_recall": 0.9581978634463539, "eval_runtime": 36.8199, "eval_samples_per_second": 5.405, "eval_steps_per_second": 1.358, "step": 1920 }, { "epoch": 21.470180305131763, "grad_norm": 0.4823513925075531, "learning_rate": 3.5533333333333338e-06, "loss": 0.0108, "step": 1935 }, { "epoch": 21.470180305131763, "eval_accuracy": 0.9904828854161034, "eval_f1": 0.9511295527893039, "eval_loss": 0.048778366297483444, "eval_precision": 0.9441647597254005, "eval_recall": 0.9581978634463539, "eval_runtime": 36.5655, "eval_samples_per_second": 5.442, "eval_steps_per_second": 1.367, "step": 1935 }, { "epoch": 21.636615811373094, "grad_norm": 0.3686061203479767, "learning_rate": 3.5033333333333334e-06, "loss": 0.0115, "step": 1950 }, { "epoch": 21.636615811373094, "eval_accuracy": 0.9912940031363219, "eval_f1": 0.9542302357836339, "eval_loss": 0.04717012122273445, "eval_precision": 0.9498389323515877, "eval_recall": 0.9586623316302834, "eval_runtime": 36.5437, "eval_samples_per_second": 5.446, "eval_steps_per_second": 1.368, "step": 1950 }, { "epoch": 21.803051317614425, "grad_norm": 1.0370802879333496, "learning_rate": 3.4533333333333334e-06, "loss": 0.0083, "step": 1965 }, { "epoch": 21.803051317614425, "eval_accuracy": 0.991077705077597, "eval_f1": 0.954272517321016, "eval_loss": 0.04759324714541435, "eval_precision": 0.9490124023886082, "eval_recall": 0.9595912679981421, "eval_runtime": 36.2291, "eval_samples_per_second": 5.493, "eval_steps_per_second": 1.38, "step": 1965 }, { "epoch": 21.969486823855757, "grad_norm": 1.2627676725387573, "learning_rate": 3.4033333333333335e-06, "loss": 0.0094, "step": 1980 }, { "epoch": 21.969486823855757, "eval_accuracy": 0.990861407018872, "eval_f1": 0.9543147208121827, "eval_loss": 0.047525253146886826, "eval_precision": 0.948188904172398, "eval_recall": 0.9605202043660009, "eval_runtime": 36.2268, "eval_samples_per_second": 5.493, "eval_steps_per_second": 1.38, "step": 1980 }, { "epoch": 22.135922330097088, "grad_norm": 0.2426026463508606, "learning_rate": 3.3533333333333336e-06, "loss": 0.0118, "step": 1995 }, { "epoch": 22.135922330097088, "eval_accuracy": 0.9904288109014222, "eval_f1": 0.9501154734411085, "eval_loss": 0.049215689301490784, "eval_precision": 0.9448782728525493, "eval_recall": 0.9554110543427775, "eval_runtime": 36.1992, "eval_samples_per_second": 5.497, "eval_steps_per_second": 1.381, "step": 1995 }, { "epoch": 22.30235783633842, "grad_norm": 0.6006263494491577, "learning_rate": 3.303333333333333e-06, "loss": 0.01, "step": 2010 }, { "epoch": 22.30235783633842, "eval_accuracy": 0.990861407018872, "eval_f1": 0.9523148148148148, "eval_loss": 0.048562802374362946, "eval_precision": 0.949238578680203, "eval_recall": 0.9554110543427775, "eval_runtime": 36.2887, "eval_samples_per_second": 5.484, "eval_steps_per_second": 1.378, "step": 2010 }, { "epoch": 22.46879334257975, "grad_norm": 0.7383334040641785, "learning_rate": 3.2533333333333332e-06, "loss": 0.0114, "step": 2025 }, { "epoch": 22.46879334257975, "eval_accuracy": 0.9910236305629156, "eval_f1": 0.9539671524404348, "eval_loss": 0.04967198148369789, "eval_precision": 0.9502304147465438, "eval_recall": 0.9577333952624245, "eval_runtime": 36.3824, "eval_samples_per_second": 5.47, "eval_steps_per_second": 1.374, "step": 2025 }, { "epoch": 22.635228848821082, "grad_norm": 0.5105836987495422, "learning_rate": 3.2033333333333337e-06, "loss": 0.0091, "step": 2040 }, { "epoch": 22.635228848821082, "eval_accuracy": 0.9909695560482344, "eval_f1": 0.954209065679926, "eval_loss": 0.049895454198122025, "eval_precision": 0.9502533394748963, "eval_recall": 0.9581978634463539, "eval_runtime": 36.2966, "eval_samples_per_second": 5.483, "eval_steps_per_second": 1.378, "step": 2040 }, { "epoch": 22.801664355062414, "grad_norm": 0.8460143804550171, "learning_rate": 3.1533333333333338e-06, "loss": 0.0077, "step": 2055 }, { "epoch": 22.801664355062414, "eval_accuracy": 0.991077705077597, "eval_f1": 0.9563409563409564, "eval_loss": 0.05023453012108803, "eval_precision": 0.9512867647058824, "eval_recall": 0.9614491407338597, "eval_runtime": 36.5792, "eval_samples_per_second": 5.44, "eval_steps_per_second": 1.367, "step": 2055 }, { "epoch": 22.968099861303745, "grad_norm": 0.46876421570777893, "learning_rate": 3.103333333333334e-06, "loss": 0.01, "step": 2070 }, { "epoch": 22.968099861303745, "eval_accuracy": 0.9912940031363219, "eval_f1": 0.9586127167630057, "eval_loss": 0.05132585018873215, "eval_precision": 0.9544198895027625, "eval_recall": 0.9628425452856479, "eval_runtime": 36.5943, "eval_samples_per_second": 5.438, "eval_steps_per_second": 1.366, "step": 2070 }, { "epoch": 23.134535367545077, "grad_norm": 0.26761332154273987, "learning_rate": 3.053333333333334e-06, "loss": 0.0087, "step": 2085 }, { "epoch": 23.134535367545077, "eval_accuracy": 0.9911858541069594, "eval_f1": 0.9554375432925422, "eval_loss": 0.04853161796927452, "eval_precision": 0.9499540863177227, "eval_recall": 0.9609846725499304, "eval_runtime": 36.2471, "eval_samples_per_second": 5.49, "eval_steps_per_second": 1.379, "step": 2085 }, { "epoch": 23.300970873786408, "grad_norm": 0.32841914892196655, "learning_rate": 3.0033333333333335e-06, "loss": 0.0073, "step": 2100 }, { "epoch": 23.300970873786408, "eval_accuracy": 0.9916725247390905, "eval_f1": 0.959278111985192, "eval_loss": 0.04846283420920372, "eval_precision": 0.9557399723374828, "eval_recall": 0.9628425452856479, "eval_runtime": 36.0113, "eval_samples_per_second": 5.526, "eval_steps_per_second": 1.388, "step": 2100 }, { "epoch": 23.46740638002774, "grad_norm": 0.3114074766635895, "learning_rate": 2.9533333333333336e-06, "loss": 0.0083, "step": 2115 }, { "epoch": 23.46740638002774, "eval_accuracy": 0.9913480776510031, "eval_f1": 0.957205644228545, "eval_loss": 0.04847896471619606, "eval_precision": 0.9534562211981567, "eval_recall": 0.9609846725499304, "eval_runtime": 36.2766, "eval_samples_per_second": 5.486, "eval_steps_per_second": 1.378, "step": 2115 }, { "epoch": 23.63384188626907, "grad_norm": 0.815006673336029, "learning_rate": 2.9033333333333336e-06, "loss": 0.0117, "step": 2130 }, { "epoch": 23.63384188626907, "eval_accuracy": 0.991564375709728, "eval_f1": 0.9590372598935432, "eval_loss": 0.04786692187190056, "eval_precision": 0.955719557195572, "eval_recall": 0.9623780771017185, "eval_runtime": 36.5057, "eval_samples_per_second": 5.451, "eval_steps_per_second": 1.37, "step": 2130 }, { "epoch": 23.800277392510402, "grad_norm": 0.34551236033439636, "learning_rate": 2.8533333333333337e-06, "loss": 0.0095, "step": 2145 }, { "epoch": 23.800277392510402, "eval_accuracy": 0.991077705077597, "eval_f1": 0.9542302357836339, "eval_loss": 0.05084284767508507, "eval_precision": 0.9498389323515877, "eval_recall": 0.9586623316302834, "eval_runtime": 36.5022, "eval_samples_per_second": 5.452, "eval_steps_per_second": 1.37, "step": 2145 }, { "epoch": 23.966712898751734, "grad_norm": 0.988761305809021, "learning_rate": 2.8033333333333333e-06, "loss": 0.009, "step": 2160 }, { "epoch": 23.966712898751734, "eval_accuracy": 0.9909695560482344, "eval_f1": 0.9559603412497119, "eval_loss": 0.051338665187358856, "eval_precision": 0.9491758241758241, "eval_recall": 0.9628425452856479, "eval_runtime": 36.4961, "eval_samples_per_second": 5.453, "eval_steps_per_second": 1.37, "step": 2160 }, { "epoch": 24.133148404993065, "grad_norm": 0.20439928770065308, "learning_rate": 2.7533333333333334e-06, "loss": 0.0077, "step": 2175 }, { "epoch": 24.133148404993065, "eval_accuracy": 0.9915103011950468, "eval_f1": 0.9590562109646079, "eval_loss": 0.050405893474817276, "eval_precision": 0.9552995391705069, "eval_recall": 0.9628425452856479, "eval_runtime": 36.5792, "eval_samples_per_second": 5.44, "eval_steps_per_second": 1.367, "step": 2175 }, { "epoch": 24.299583911234397, "grad_norm": 0.6065575480461121, "learning_rate": 2.7033333333333334e-06, "loss": 0.0087, "step": 2190 }, { "epoch": 24.299583911234397, "eval_accuracy": 0.9912940031363219, "eval_f1": 0.9565418400369857, "eval_loss": 0.04999900609254837, "eval_precision": 0.9521398987574782, "eval_recall": 0.9609846725499304, "eval_runtime": 36.2889, "eval_samples_per_second": 5.484, "eval_steps_per_second": 1.378, "step": 2190 }, { "epoch": 24.466019417475728, "grad_norm": 0.4505390226840973, "learning_rate": 2.6533333333333335e-06, "loss": 0.0068, "step": 2205 }, { "epoch": 24.466019417475728, "eval_accuracy": 0.9912940031363219, "eval_f1": 0.9574271170754282, "eval_loss": 0.05055619403719902, "eval_precision": 0.9538958045182112, "eval_recall": 0.9609846725499304, "eval_runtime": 35.8763, "eval_samples_per_second": 5.547, "eval_steps_per_second": 1.394, "step": 2205 }, { "epoch": 24.63245492371706, "grad_norm": 0.2784092128276825, "learning_rate": 2.603333333333334e-06, "loss": 0.0094, "step": 2220 }, { "epoch": 24.63245492371706, "eval_accuracy": 0.9913480776510031, "eval_f1": 0.9549132947976879, "eval_loss": 0.050024211406707764, "eval_precision": 0.9507366482504604, "eval_recall": 0.9591267998142127, "eval_runtime": 36.3659, "eval_samples_per_second": 5.472, "eval_steps_per_second": 1.375, "step": 2220 }, { "epoch": 24.79889042995839, "grad_norm": 0.24667127430438995, "learning_rate": 2.5533333333333336e-06, "loss": 0.0088, "step": 2235 }, { "epoch": 24.79889042995839, "eval_accuracy": 0.9914021521656843, "eval_f1": 0.9551548774849746, "eval_loss": 0.048643559217453, "eval_precision": 0.9507593189139438, "eval_recall": 0.9595912679981421, "eval_runtime": 36.4912, "eval_samples_per_second": 5.453, "eval_steps_per_second": 1.37, "step": 2235 }, { "epoch": 24.965325936199722, "grad_norm": 0.10884588211774826, "learning_rate": 2.5033333333333336e-06, "loss": 0.0089, "step": 2250 }, { "epoch": 24.965325936199722, "eval_accuracy": 0.991077705077597, "eval_f1": 0.9558789558789559, "eval_loss": 0.05070747807621956, "eval_precision": 0.9508272058823529, "eval_recall": 0.9609846725499304, "eval_runtime": 36.2816, "eval_samples_per_second": 5.485, "eval_steps_per_second": 1.378, "step": 2250 }, { "epoch": 25.131761442441054, "grad_norm": 0.6150490641593933, "learning_rate": 2.4533333333333333e-06, "loss": 0.0063, "step": 2265 }, { "epoch": 25.131761442441054, "eval_accuracy": 0.9916725247390905, "eval_f1": 0.9585360203845263, "eval_loss": 0.04789712280035019, "eval_precision": 0.9560998151571165, "eval_recall": 0.9609846725499304, "eval_runtime": 36.2469, "eval_samples_per_second": 5.49, "eval_steps_per_second": 1.379, "step": 2265 }, { "epoch": 25.298196948682385, "grad_norm": 0.7432591319084167, "learning_rate": 2.4033333333333338e-06, "loss": 0.0058, "step": 2280 }, { "epoch": 25.298196948682385, "eval_accuracy": 0.991077705077597, "eval_f1": 0.9572452045296973, "eval_loss": 0.050580546259880066, "eval_precision": 0.952621895124195, "eval_recall": 0.9619136089177891, "eval_runtime": 36.4588, "eval_samples_per_second": 5.458, "eval_steps_per_second": 1.371, "step": 2280 }, { "epoch": 25.464632454923716, "grad_norm": 0.742586612701416, "learning_rate": 2.3533333333333334e-06, "loss": 0.0102, "step": 2295 }, { "epoch": 25.464632454923716, "eval_accuracy": 0.9912399286216407, "eval_f1": 0.9574861367837338, "eval_loss": 0.04992222413420677, "eval_precision": 0.9526436781609195, "eval_recall": 0.9623780771017185, "eval_runtime": 36.5208, "eval_samples_per_second": 5.449, "eval_steps_per_second": 1.369, "step": 2295 }, { "epoch": 25.631067961165048, "grad_norm": 0.9237321019172668, "learning_rate": 2.3033333333333334e-06, "loss": 0.0079, "step": 2310 }, { "epoch": 25.631067961165048, "eval_accuracy": 0.9905369599307846, "eval_f1": 0.9541368979027426, "eval_loss": 0.05427027493715286, "eval_precision": 0.9469350411710887, "eval_recall": 0.9614491407338597, "eval_runtime": 36.0226, "eval_samples_per_second": 5.524, "eval_steps_per_second": 1.388, "step": 2310 }, { "epoch": 25.79750346740638, "grad_norm": 0.2974264621734619, "learning_rate": 2.2533333333333335e-06, "loss": 0.009, "step": 2325 }, { "epoch": 25.79750346740638, "eval_accuracy": 0.9914562266803656, "eval_f1": 0.9572452045296973, "eval_loss": 0.049834854900836945, "eval_precision": 0.952621895124195, "eval_recall": 0.9619136089177891, "eval_runtime": 36.6625, "eval_samples_per_second": 5.428, "eval_steps_per_second": 1.364, "step": 2325 }, { "epoch": 25.96393897364771, "grad_norm": 0.6791291236877441, "learning_rate": 2.2033333333333336e-06, "loss": 0.0068, "step": 2340 }, { "epoch": 25.96393897364771, "eval_accuracy": 0.991077705077597, "eval_f1": 0.9563611175248211, "eval_loss": 0.05109778791666031, "eval_precision": 0.950872359963269, "eval_recall": 0.9619136089177891, "eval_runtime": 35.8655, "eval_samples_per_second": 5.549, "eval_steps_per_second": 1.394, "step": 2340 }, { "epoch": 26.130374479889042, "grad_norm": 0.5723872184753418, "learning_rate": 2.153333333333333e-06, "loss": 0.007, "step": 2355 }, { "epoch": 26.130374479889042, "eval_accuracy": 0.9914021521656843, "eval_f1": 0.9579676674364895, "eval_loss": 0.049178168177604675, "eval_precision": 0.9526871841984382, "eval_recall": 0.9633070134695774, "eval_runtime": 35.9503, "eval_samples_per_second": 5.535, "eval_steps_per_second": 1.391, "step": 2355 }, { "epoch": 26.296809986130373, "grad_norm": 0.3830583393573761, "learning_rate": 2.1033333333333337e-06, "loss": 0.0086, "step": 2370 }, { "epoch": 26.296809986130373, "eval_accuracy": 0.9912940031363219, "eval_f1": 0.9554375432925422, "eval_loss": 0.05156167596578598, "eval_precision": 0.9499540863177227, "eval_recall": 0.9609846725499304, "eval_runtime": 35.8417, "eval_samples_per_second": 5.552, "eval_steps_per_second": 1.395, "step": 2370 }, { "epoch": 26.463245492371705, "grad_norm": 0.14329634606838226, "learning_rate": 2.0533333333333337e-06, "loss": 0.0078, "step": 2385 }, { "epoch": 26.463245492371705, "eval_accuracy": 0.9914021521656843, "eval_f1": 0.9556581986143187, "eval_loss": 0.05027909576892853, "eval_precision": 0.9503904455672945, "eval_recall": 0.9609846725499304, "eval_runtime": 35.8472, "eval_samples_per_second": 5.551, "eval_steps_per_second": 1.395, "step": 2385 }, { "epoch": 26.629680998613036, "grad_norm": 0.17582757771015167, "learning_rate": 2.0033333333333334e-06, "loss": 0.0067, "step": 2400 }, { "epoch": 26.629680998613036, "eval_accuracy": 0.9915103011950468, "eval_f1": 0.9577269577269578, "eval_loss": 0.05140436813235283, "eval_precision": 0.9526654411764706, "eval_recall": 0.9628425452856479, "eval_runtime": 35.8691, "eval_samples_per_second": 5.548, "eval_steps_per_second": 1.394, "step": 2400 }, { "epoch": 26.796116504854368, "grad_norm": 0.6374102830886841, "learning_rate": 1.9533333333333334e-06, "loss": 0.0059, "step": 2415 }, { "epoch": 26.796116504854368, "eval_accuracy": 0.9918888227978154, "eval_f1": 0.9588344125809436, "eval_loss": 0.05035752058029175, "eval_precision": 0.9548595117457392, "eval_recall": 0.9628425452856479, "eval_runtime": 35.7794, "eval_samples_per_second": 5.562, "eval_steps_per_second": 1.397, "step": 2415 }, { "epoch": 26.9625520110957, "grad_norm": 0.5752395987510681, "learning_rate": 1.9033333333333335e-06, "loss": 0.0089, "step": 2430 }, { "epoch": 26.9625520110957, "eval_accuracy": 0.9916184502244092, "eval_f1": 0.9560795191863154, "eval_loss": 0.051971472799777985, "eval_precision": 0.9516797054763001, "eval_recall": 0.9605202043660009, "eval_runtime": 36.1279, "eval_samples_per_second": 5.508, "eval_steps_per_second": 1.384, "step": 2430 }, { "epoch": 27.12898751733703, "grad_norm": 0.40148672461509705, "learning_rate": 1.8533333333333333e-06, "loss": 0.0059, "step": 2445 }, { "epoch": 27.12898751733703, "eval_accuracy": 0.9916725247390905, "eval_f1": 0.9572649572649573, "eval_loss": 0.05115849897265434, "eval_precision": 0.9522058823529411, "eval_recall": 0.9623780771017185, "eval_runtime": 36.2324, "eval_samples_per_second": 5.492, "eval_steps_per_second": 1.38, "step": 2445 }, { "epoch": 27.295423023578362, "grad_norm": 0.19672174751758575, "learning_rate": 1.8033333333333336e-06, "loss": 0.0073, "step": 2460 }, { "epoch": 27.295423023578362, "eval_accuracy": 0.9916184502244092, "eval_f1": 0.9569842738205366, "eval_loss": 0.05259764939546585, "eval_precision": 0.9530170428374021, "eval_recall": 0.9609846725499304, "eval_runtime": 36.3768, "eval_samples_per_second": 5.471, "eval_steps_per_second": 1.375, "step": 2460 }, { "epoch": 27.461858529819693, "grad_norm": 1.178671956062317, "learning_rate": 1.7533333333333336e-06, "loss": 0.0065, "step": 2475 }, { "epoch": 27.461858529819693, "eval_accuracy": 0.991564375709728, "eval_f1": 0.9577269577269578, "eval_loss": 0.052951879799366, "eval_precision": 0.9526654411764706, "eval_recall": 0.9628425452856479, "eval_runtime": 36.7573, "eval_samples_per_second": 5.414, "eval_steps_per_second": 1.36, "step": 2475 }, { "epoch": 27.628294036061025, "grad_norm": 0.8156425356864929, "learning_rate": 1.7033333333333335e-06, "loss": 0.0064, "step": 2490 }, { "epoch": 27.628294036061025, "eval_accuracy": 0.9916725247390905, "eval_f1": 0.957205644228545, "eval_loss": 0.05146779865026474, "eval_precision": 0.9534562211981567, "eval_recall": 0.9609846725499304, "eval_runtime": 35.9308, "eval_samples_per_second": 5.538, "eval_steps_per_second": 1.392, "step": 2490 }, { "epoch": 27.794729542302356, "grad_norm": 0.4098323881626129, "learning_rate": 1.6533333333333335e-06, "loss": 0.0072, "step": 2505 }, { "epoch": 27.794729542302356, "eval_accuracy": 0.9906991834748283, "eval_f1": 0.9545559400230681, "eval_loss": 0.054223690181970596, "eval_precision": 0.9482126489459212, "eval_recall": 0.9609846725499304, "eval_runtime": 35.9196, "eval_samples_per_second": 5.54, "eval_steps_per_second": 1.392, "step": 2505 }, { "epoch": 27.96116504854369, "grad_norm": 0.5159748792648315, "learning_rate": 1.6033333333333334e-06, "loss": 0.0066, "step": 2520 }, { "epoch": 27.96116504854369, "eval_accuracy": 0.990861407018872, "eval_f1": 0.9549965381952458, "eval_loss": 0.05374361574649811, "eval_precision": 0.9490825688073394, "eval_recall": 0.9609846725499304, "eval_runtime": 35.7031, "eval_samples_per_second": 5.574, "eval_steps_per_second": 1.4, "step": 2520 }, { "epoch": 28.127600554785023, "grad_norm": 0.499012291431427, "learning_rate": 1.5533333333333334e-06, "loss": 0.006, "step": 2535 }, { "epoch": 28.127600554785023, "eval_accuracy": 0.9915103011950468, "eval_f1": 0.9579482439926063, "eval_loss": 0.05182594433426857, "eval_precision": 0.953103448275862, "eval_recall": 0.9628425452856479, "eval_runtime": 35.8174, "eval_samples_per_second": 5.556, "eval_steps_per_second": 1.396, "step": 2535 }, { "epoch": 28.294036061026354, "grad_norm": 0.5842483639717102, "learning_rate": 1.5033333333333337e-06, "loss": 0.0074, "step": 2550 }, { "epoch": 28.294036061026354, "eval_accuracy": 0.9914021521656843, "eval_f1": 0.9565418400369857, "eval_loss": 0.05230095610022545, "eval_precision": 0.9521398987574782, "eval_recall": 0.9609846725499304, "eval_runtime": 35.928, "eval_samples_per_second": 5.539, "eval_steps_per_second": 1.392, "step": 2550 }, { "epoch": 28.460471567267685, "grad_norm": 0.4897175431251526, "learning_rate": 1.4533333333333335e-06, "loss": 0.0068, "step": 2565 }, { "epoch": 28.460471567267685, "eval_accuracy": 0.9912940031363219, "eval_f1": 0.955458112162474, "eval_loss": 0.05341142788529396, "eval_precision": 0.9495412844036697, "eval_recall": 0.9614491407338597, "eval_runtime": 36.0278, "eval_samples_per_second": 5.524, "eval_steps_per_second": 1.388, "step": 2565 }, { "epoch": 28.626907073509017, "grad_norm": 0.4191240668296814, "learning_rate": 1.4033333333333336e-06, "loss": 0.0055, "step": 2580 }, { "epoch": 28.626907073509017, "eval_accuracy": 0.9916725247390905, "eval_f1": 0.9583526145303101, "eval_loss": 0.05210199952125549, "eval_precision": 0.954817888427847, "eval_recall": 0.9619136089177891, "eval_runtime": 36.2636, "eval_samples_per_second": 5.488, "eval_steps_per_second": 1.379, "step": 2580 }, { "epoch": 28.793342579750348, "grad_norm": 0.6655350923538208, "learning_rate": 1.3533333333333334e-06, "loss": 0.0056, "step": 2595 }, { "epoch": 28.793342579750348, "eval_accuracy": 0.9912940031363219, "eval_f1": 0.9567829905246129, "eval_loss": 0.05259960889816284, "eval_precision": 0.952161913523459, "eval_recall": 0.9614491407338597, "eval_runtime": 36.0456, "eval_samples_per_second": 5.521, "eval_steps_per_second": 1.387, "step": 2595 }, { "epoch": 28.95977808599168, "grad_norm": 0.9510291814804077, "learning_rate": 1.3033333333333335e-06, "loss": 0.0066, "step": 2610 }, { "epoch": 28.95977808599168, "eval_accuracy": 0.9913480776510031, "eval_f1": 0.9570240295748613, "eval_loss": 0.05272991955280304, "eval_precision": 0.952183908045977, "eval_recall": 0.9619136089177891, "eval_runtime": 36.3753, "eval_samples_per_second": 5.471, "eval_steps_per_second": 1.375, "step": 2610 }, { "epoch": 29.12621359223301, "grad_norm": 0.33463072776794434, "learning_rate": 1.2533333333333333e-06, "loss": 0.0053, "step": 2625 }, { "epoch": 29.12621359223301, "eval_accuracy": 0.9912940031363219, "eval_f1": 0.9579482439926063, "eval_loss": 0.0533275306224823, "eval_precision": 0.953103448275862, "eval_recall": 0.9628425452856479, "eval_runtime": 35.8945, "eval_samples_per_second": 5.544, "eval_steps_per_second": 1.393, "step": 2625 }, { "epoch": 29.292649098474342, "grad_norm": 0.2936910092830658, "learning_rate": 1.2033333333333334e-06, "loss": 0.0063, "step": 2640 }, { "epoch": 29.292649098474342, "eval_accuracy": 0.9912940031363219, "eval_f1": 0.9569842738205366, "eval_loss": 0.05200694501399994, "eval_precision": 0.9530170428374021, "eval_recall": 0.9609846725499304, "eval_runtime": 35.7745, "eval_samples_per_second": 5.563, "eval_steps_per_second": 1.398, "step": 2640 }, { "epoch": 29.459084604715674, "grad_norm": 0.45608168840408325, "learning_rate": 1.1533333333333334e-06, "loss": 0.0059, "step": 2655 }, { "epoch": 29.459084604715674, "eval_accuracy": 0.9910236305629156, "eval_f1": 0.9554169554169554, "eval_loss": 0.0532723143696785, "eval_precision": 0.9503676470588235, "eval_recall": 0.9605202043660009, "eval_runtime": 35.9196, "eval_samples_per_second": 5.54, "eval_steps_per_second": 1.392, "step": 2655 }, { "epoch": 29.625520110957005, "grad_norm": 0.46974512934684753, "learning_rate": 1.1033333333333335e-06, "loss": 0.0059, "step": 2670 }, { "epoch": 29.625520110957005, "eval_accuracy": 0.9911858541069594, "eval_f1": 0.9572452045296973, "eval_loss": 0.05324824899435043, "eval_precision": 0.952621895124195, "eval_recall": 0.9619136089177891, "eval_runtime": 36.0296, "eval_samples_per_second": 5.523, "eval_steps_per_second": 1.388, "step": 2670 }, { "epoch": 29.791955617198337, "grad_norm": 0.6280196309089661, "learning_rate": 1.0533333333333333e-06, "loss": 0.0062, "step": 2685 }, { "epoch": 29.791955617198337, "eval_accuracy": 0.9916725247390905, "eval_f1": 0.9579288025889968, "eval_loss": 0.05163406580686569, "eval_precision": 0.9535204786010124, "eval_recall": 0.9623780771017185, "eval_runtime": 35.8797, "eval_samples_per_second": 5.546, "eval_steps_per_second": 1.394, "step": 2685 }, { "epoch": 29.958391123439668, "grad_norm": 0.3609830439090729, "learning_rate": 1.0033333333333334e-06, "loss": 0.0064, "step": 2700 }, { "epoch": 29.958391123439668, "eval_accuracy": 0.9914562266803656, "eval_f1": 0.9572649572649573, "eval_loss": 0.05152719095349312, "eval_precision": 0.9522058823529411, "eval_recall": 0.9623780771017185, "eval_runtime": 36.0059, "eval_samples_per_second": 5.527, "eval_steps_per_second": 1.389, "step": 2700 }, { "epoch": 30.124826629681, "grad_norm": 0.37590721249580383, "learning_rate": 9.533333333333335e-07, "loss": 0.0055, "step": 2715 }, { "epoch": 30.124826629681, "eval_accuracy": 0.9917265992537717, "eval_f1": 0.9590751445086704, "eval_loss": 0.05128318816423416, "eval_precision": 0.9548802946593001, "eval_recall": 0.9633070134695774, "eval_runtime": 36.0097, "eval_samples_per_second": 5.526, "eval_steps_per_second": 1.389, "step": 2715 }, { "epoch": 30.29126213592233, "grad_norm": 0.4574069678783417, "learning_rate": 9.033333333333334e-07, "loss": 0.0064, "step": 2730 }, { "epoch": 30.29126213592233, "eval_accuracy": 0.991564375709728, "eval_f1": 0.9583911234396673, "eval_loss": 0.052385713905096054, "eval_precision": 0.9539806718821905, "eval_recall": 0.9628425452856479, "eval_runtime": 35.8265, "eval_samples_per_second": 5.555, "eval_steps_per_second": 1.396, "step": 2730 }, { "epoch": 30.457697642163662, "grad_norm": 1.509279489517212, "learning_rate": 8.533333333333334e-07, "loss": 0.0055, "step": 2745 }, { "epoch": 30.457697642163662, "eval_accuracy": 0.9915103011950468, "eval_f1": 0.9581889581889582, "eval_loss": 0.05304015427827835, "eval_precision": 0.953125, "eval_recall": 0.9633070134695774, "eval_runtime": 35.8068, "eval_samples_per_second": 5.558, "eval_steps_per_second": 1.396, "step": 2745 }, { "epoch": 30.624133148404994, "grad_norm": 0.08701591938734055, "learning_rate": 8.033333333333335e-07, "loss": 0.0065, "step": 2760 }, { "epoch": 30.624133148404994, "eval_accuracy": 0.9916725247390905, "eval_f1": 0.9588914549653579, "eval_loss": 0.05279012396931648, "eval_precision": 0.9536058796508957, "eval_recall": 0.9642359498374361, "eval_runtime": 36.0763, "eval_samples_per_second": 5.516, "eval_steps_per_second": 1.386, "step": 2760 }, { "epoch": 30.790568654646325, "grad_norm": 0.39128488302230835, "learning_rate": 7.533333333333335e-07, "loss": 0.0068, "step": 2775 }, { "epoch": 30.790568654646325, "eval_accuracy": 0.991564375709728, "eval_f1": 0.9575253924284395, "eval_loss": 0.05296061187982559, "eval_precision": 0.9518127581459385, "eval_recall": 0.9633070134695774, "eval_runtime": 35.9916, "eval_samples_per_second": 5.529, "eval_steps_per_second": 1.389, "step": 2775 }, { "epoch": 30.957004160887656, "grad_norm": 0.20628976821899414, "learning_rate": 7.033333333333334e-07, "loss": 0.0047, "step": 2790 }, { "epoch": 30.957004160887656, "eval_accuracy": 0.991564375709728, "eval_f1": 0.958910433979686, "eval_loss": 0.05448687821626663, "eval_precision": 0.953189536484626, "eval_recall": 0.9647004180213655, "eval_runtime": 35.9295, "eval_samples_per_second": 5.539, "eval_steps_per_second": 1.392, "step": 2790 }, { "epoch": 31.123439667128988, "grad_norm": 0.3910321295261383, "learning_rate": 6.533333333333334e-07, "loss": 0.0051, "step": 2805 }, { "epoch": 31.123439667128988, "eval_accuracy": 0.9916725247390905, "eval_f1": 0.9595749595749595, "eval_loss": 0.05336242541670799, "eval_precision": 0.9545036764705882, "eval_recall": 0.9647004180213655, "eval_runtime": 36.0288, "eval_samples_per_second": 5.523, "eval_steps_per_second": 1.388, "step": 2805 }, { "epoch": 31.28987517337032, "grad_norm": 0.2049601525068283, "learning_rate": 6.033333333333334e-07, "loss": 0.0044, "step": 2820 }, { "epoch": 31.28987517337032, "eval_accuracy": 0.9914021521656843, "eval_f1": 0.9581889581889582, "eval_loss": 0.053161416202783585, "eval_precision": 0.953125, "eval_recall": 0.9633070134695774, "eval_runtime": 35.9772, "eval_samples_per_second": 5.531, "eval_steps_per_second": 1.39, "step": 2820 }, { "epoch": 31.45631067961165, "grad_norm": 0.4429149329662323, "learning_rate": 5.533333333333334e-07, "loss": 0.0068, "step": 2835 }, { "epoch": 31.45631067961165, "eval_accuracy": 0.9913480776510031, "eval_f1": 0.9579676674364895, "eval_loss": 0.05317556858062744, "eval_precision": 0.9526871841984382, "eval_recall": 0.9633070134695774, "eval_runtime": 35.6808, "eval_samples_per_second": 5.577, "eval_steps_per_second": 1.401, "step": 2835 }, { "epoch": 31.622746185852982, "grad_norm": 0.4102032482624054, "learning_rate": 5.033333333333334e-07, "loss": 0.0045, "step": 2850 }, { "epoch": 31.622746185852982, "eval_accuracy": 0.9915103011950468, "eval_f1": 0.9590940605500345, "eval_loss": 0.053103264421224594, "eval_precision": 0.9544618215271389, "eval_recall": 0.9637714816535068, "eval_runtime": 35.706, "eval_samples_per_second": 5.573, "eval_steps_per_second": 1.4, "step": 2850 }, { "epoch": 31.789181692094314, "grad_norm": 0.8468719720840454, "learning_rate": 4.533333333333334e-07, "loss": 0.0047, "step": 2865 }, { "epoch": 31.789181692094314, "eval_accuracy": 0.991564375709728, "eval_f1": 0.9586318465449504, "eval_loss": 0.05298003926873207, "eval_precision": 0.954001839926403, "eval_recall": 0.9633070134695774, "eval_runtime": 35.8749, "eval_samples_per_second": 5.547, "eval_steps_per_second": 1.394, "step": 2865 }, { "epoch": 31.955617198335645, "grad_norm": 0.2063705176115036, "learning_rate": 4.0333333333333337e-07, "loss": 0.0075, "step": 2880 }, { "epoch": 31.955617198335645, "eval_accuracy": 0.9916184502244092, "eval_f1": 0.9593157651410079, "eval_loss": 0.05329431965947151, "eval_precision": 0.9549010584445468, "eval_recall": 0.9637714816535068, "eval_runtime": 36.0809, "eval_samples_per_second": 5.515, "eval_steps_per_second": 1.386, "step": 2880 }, { "epoch": 32.12205270457698, "grad_norm": 0.3478763997554779, "learning_rate": 3.533333333333334e-07, "loss": 0.0055, "step": 2895 }, { "epoch": 32.12205270457698, "eval_accuracy": 0.9917265992537717, "eval_f1": 0.9595375722543353, "eval_loss": 0.05245138704776764, "eval_precision": 0.9553406998158379, "eval_recall": 0.9637714816535068, "eval_runtime": 35.9673, "eval_samples_per_second": 5.533, "eval_steps_per_second": 1.39, "step": 2895 }, { "epoch": 32.28848821081831, "grad_norm": 0.721191942691803, "learning_rate": 3.033333333333334e-07, "loss": 0.006, "step": 2910 }, { "epoch": 32.28848821081831, "eval_accuracy": 0.9917265992537717, "eval_f1": 0.9595375722543353, "eval_loss": 0.05226488783955574, "eval_precision": 0.9553406998158379, "eval_recall": 0.9637714816535068, "eval_runtime": 35.9385, "eval_samples_per_second": 5.537, "eval_steps_per_second": 1.391, "step": 2910 }, { "epoch": 32.45492371705964, "grad_norm": 0.3022706210613251, "learning_rate": 2.533333333333333e-07, "loss": 0.0062, "step": 2925 }, { "epoch": 32.45492371705964, "eval_accuracy": 0.9916725247390905, "eval_f1": 0.9588534442903375, "eval_loss": 0.05245348811149597, "eval_precision": 0.9544408651633686, "eval_recall": 0.9633070134695774, "eval_runtime": 35.8766, "eval_samples_per_second": 5.547, "eval_steps_per_second": 1.394, "step": 2925 }, { "epoch": 32.62135922330097, "grad_norm": 0.4700392186641693, "learning_rate": 2.0333333333333333e-07, "loss": 0.0059, "step": 2940 }, { "epoch": 32.62135922330097, "eval_accuracy": 0.9917265992537717, "eval_f1": 0.9593157651410079, "eval_loss": 0.05246575176715851, "eval_precision": 0.9549010584445468, "eval_recall": 0.9637714816535068, "eval_runtime": 35.8779, "eval_samples_per_second": 5.547, "eval_steps_per_second": 1.394, "step": 2940 }, { "epoch": 32.787794729542306, "grad_norm": 0.7413909435272217, "learning_rate": 1.5333333333333333e-07, "loss": 0.0058, "step": 2955 }, { "epoch": 32.787794729542306, "eval_accuracy": 0.9917265992537717, "eval_f1": 0.959556274555119, "eval_loss": 0.053051915019750595, "eval_precision": 0.9549218031278749, "eval_recall": 0.9642359498374361, "eval_runtime": 35.8838, "eval_samples_per_second": 5.546, "eval_steps_per_second": 1.393, "step": 2955 }, { "epoch": 32.95423023578363, "grad_norm": 0.7399964332580566, "learning_rate": 1.0333333333333335e-07, "loss": 0.005, "step": 2970 }, { "epoch": 32.95423023578363, "eval_accuracy": 0.991564375709728, "eval_f1": 0.9584103512014789, "eval_loss": 0.05329006537795067, "eval_precision": 0.9535632183908046, "eval_recall": 0.9633070134695774, "eval_runtime": 35.9193, "eval_samples_per_second": 5.54, "eval_steps_per_second": 1.392, "step": 2970 }, { "epoch": 33.12066574202497, "grad_norm": 0.3113914728164673, "learning_rate": 5.3333333333333334e-08, "loss": 0.007, "step": 2985 }, { "epoch": 33.12066574202497, "eval_accuracy": 0.991564375709728, "eval_f1": 0.9584103512014789, "eval_loss": 0.05327802523970604, "eval_precision": 0.9535632183908046, "eval_recall": 0.9633070134695774, "eval_runtime": 35.914, "eval_samples_per_second": 5.541, "eval_steps_per_second": 1.392, "step": 2985 }, { "epoch": 33.287101248266296, "grad_norm": 0.33092889189720154, "learning_rate": 3.3333333333333334e-09, "loss": 0.0047, "step": 3000 }, { "epoch": 33.287101248266296, "eval_accuracy": 0.991564375709728, "eval_f1": 0.9584103512014789, "eval_loss": 0.05324762314558029, "eval_precision": 0.9535632183908046, "eval_recall": 0.9633070134695774, "eval_runtime": 35.8608, "eval_samples_per_second": 5.549, "eval_steps_per_second": 1.394, "step": 3000 }, { "epoch": 33.287101248266296, "step": 3000, "total_flos": 8.9780255686656e+16, "train_loss": 0.0764370101193587, "train_runtime": 55223.9534, "train_samples_per_second": 1.738, "train_steps_per_second": 0.054 } ], "logging_steps": 15, "max_steps": 3000, "num_input_tokens_seen": 0, "num_train_epochs": 34, "save_steps": 15, "total_flos": 8.9780255686656e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }