|
{ |
|
"best_metric": 0.9595749595749595, |
|
"best_model_checkpoint": "ds-v6-large/checkpoint-2805", |
|
"epoch": 33.287101248266296, |
|
"eval_steps": 15, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1664355062413315, |
|
"grad_norm": 2.6665048599243164, |
|
"learning_rate": 9.950000000000001e-06, |
|
"loss": 1.9852, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.1664355062413315, |
|
"eval_accuracy": 0.8101443789541989, |
|
"eval_f1": 0.0, |
|
"eval_loss": 1.1499630212783813, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 35.775, |
|
"eval_samples_per_second": 5.563, |
|
"eval_steps_per_second": 1.398, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.332871012482663, |
|
"grad_norm": 2.08683443069458, |
|
"learning_rate": 9.9e-06, |
|
"loss": 1.0244, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.332871012482663, |
|
"eval_accuracy": 0.8122532850267669, |
|
"eval_f1": 0.012939749292357462, |
|
"eval_loss": 0.834208607673645, |
|
"eval_precision": 0.05, |
|
"eval_recall": 0.0074314909428704135, |
|
"eval_runtime": 35.1999, |
|
"eval_samples_per_second": 5.653, |
|
"eval_steps_per_second": 1.42, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.49930651872399445, |
|
"grad_norm": 2.1074297428131104, |
|
"learning_rate": 9.85e-06, |
|
"loss": 0.7826, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.49930651872399445, |
|
"eval_accuracy": 0.8479424647163791, |
|
"eval_f1": 0.09320905459387482, |
|
"eval_loss": 0.6794766187667847, |
|
"eval_precision": 0.07893041237113402, |
|
"eval_recall": 0.1137947050627032, |
|
"eval_runtime": 35.557, |
|
"eval_samples_per_second": 5.597, |
|
"eval_steps_per_second": 1.406, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.665742024965326, |
|
"grad_norm": 1.8543498516082764, |
|
"learning_rate": 9.800000000000001e-06, |
|
"loss": 0.6767, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.665742024965326, |
|
"eval_accuracy": 0.8578381009030444, |
|
"eval_f1": 0.13828125, |
|
"eval_loss": 0.5963338613510132, |
|
"eval_precision": 0.11931243680485339, |
|
"eval_recall": 0.1644217371110079, |
|
"eval_runtime": 35.7897, |
|
"eval_samples_per_second": 5.56, |
|
"eval_steps_per_second": 1.397, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.8321775312066574, |
|
"grad_norm": 1.9123793840408325, |
|
"learning_rate": 9.75e-06, |
|
"loss": 0.6031, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.8321775312066574, |
|
"eval_accuracy": 0.8690855999567404, |
|
"eval_f1": 0.1916831683168317, |
|
"eval_loss": 0.5405648946762085, |
|
"eval_precision": 0.1670693821194339, |
|
"eval_recall": 0.22480260102183, |
|
"eval_runtime": 35.6034, |
|
"eval_samples_per_second": 5.589, |
|
"eval_steps_per_second": 1.404, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.9986130374479889, |
|
"grad_norm": 1.9384328126907349, |
|
"learning_rate": 9.7e-06, |
|
"loss": 0.5756, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.9986130374479889, |
|
"eval_accuracy": 0.8777915968204185, |
|
"eval_f1": 0.26393539491825885, |
|
"eval_loss": 0.49346938729286194, |
|
"eval_precision": 0.22913816689466485, |
|
"eval_recall": 0.3111936832326986, |
|
"eval_runtime": 35.6547, |
|
"eval_samples_per_second": 5.581, |
|
"eval_steps_per_second": 1.402, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.1650485436893203, |
|
"grad_norm": 1.751382827758789, |
|
"learning_rate": 9.65e-06, |
|
"loss": 0.5215, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.1650485436893203, |
|
"eval_accuracy": 0.8904991077705078, |
|
"eval_f1": 0.3575184016824396, |
|
"eval_loss": 0.43015486001968384, |
|
"eval_precision": 0.32667179093005383, |
|
"eval_recall": 0.3947979563399907, |
|
"eval_runtime": 35.5042, |
|
"eval_samples_per_second": 5.605, |
|
"eval_steps_per_second": 1.408, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.331484049930652, |
|
"grad_norm": 1.7430224418640137, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 0.4782, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.331484049930652, |
|
"eval_accuracy": 0.9020169793976099, |
|
"eval_f1": 0.4266553119012136, |
|
"eval_loss": 0.37819600105285645, |
|
"eval_precision": 0.3938679245283019, |
|
"eval_recall": 0.46539712029725966, |
|
"eval_runtime": 35.9551, |
|
"eval_samples_per_second": 5.535, |
|
"eval_steps_per_second": 1.391, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.4979195561719834, |
|
"grad_norm": 2.754100799560547, |
|
"learning_rate": 9.55e-06, |
|
"loss": 0.4208, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.4979195561719834, |
|
"eval_accuracy": 0.9080733250419077, |
|
"eval_f1": 0.44783505154639175, |
|
"eval_loss": 0.34046444296836853, |
|
"eval_precision": 0.40266963292547275, |
|
"eval_recall": 0.5044124477473293, |
|
"eval_runtime": 35.1015, |
|
"eval_samples_per_second": 5.669, |
|
"eval_steps_per_second": 1.424, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.664355062413315, |
|
"grad_norm": 1.3271350860595703, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.3532, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.664355062413315, |
|
"eval_accuracy": 0.9251608716811767, |
|
"eval_f1": 0.5355845266082496, |
|
"eval_loss": 0.2930045425891876, |
|
"eval_precision": 0.49604117181314333, |
|
"eval_recall": 0.5819786344635393, |
|
"eval_runtime": 34.7775, |
|
"eval_samples_per_second": 5.722, |
|
"eval_steps_per_second": 1.438, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.8307905686546464, |
|
"grad_norm": 1.9117140769958496, |
|
"learning_rate": 9.450000000000001e-06, |
|
"loss": 0.3458, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.8307905686546464, |
|
"eval_accuracy": 0.9301357270318499, |
|
"eval_f1": 0.5559597688850845, |
|
"eval_loss": 0.2658008933067322, |
|
"eval_precision": 0.5154761904761904, |
|
"eval_recall": 0.6033441709242917, |
|
"eval_runtime": 34.9787, |
|
"eval_samples_per_second": 5.689, |
|
"eval_steps_per_second": 1.429, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.9972260748959778, |
|
"grad_norm": 1.9700042009353638, |
|
"learning_rate": 9.4e-06, |
|
"loss": 0.302, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.9972260748959778, |
|
"eval_accuracy": 0.9474395717298437, |
|
"eval_f1": 0.6529640848117698, |
|
"eval_loss": 0.2320590764284134, |
|
"eval_precision": 0.6111786148238153, |
|
"eval_recall": 0.7008824895494659, |
|
"eval_runtime": 35.1938, |
|
"eval_samples_per_second": 5.654, |
|
"eval_steps_per_second": 1.421, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.163661581137309, |
|
"grad_norm": 1.2119841575622559, |
|
"learning_rate": 9.350000000000002e-06, |
|
"loss": 0.2655, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.163661581137309, |
|
"eval_accuracy": 0.9519818309630671, |
|
"eval_f1": 0.6844638949671772, |
|
"eval_loss": 0.20933493971824646, |
|
"eval_precision": 0.6470831609433182, |
|
"eval_recall": 0.7264282396655829, |
|
"eval_runtime": 35.2683, |
|
"eval_samples_per_second": 5.642, |
|
"eval_steps_per_second": 1.418, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.3300970873786406, |
|
"grad_norm": 2.050490617752075, |
|
"learning_rate": 9.3e-06, |
|
"loss": 0.2598, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.3300970873786406, |
|
"eval_accuracy": 0.9570107608284215, |
|
"eval_f1": 0.7274759669125868, |
|
"eval_loss": 0.1951305866241455, |
|
"eval_precision": 0.7012931034482759, |
|
"eval_recall": 0.7556897352531352, |
|
"eval_runtime": 35.2296, |
|
"eval_samples_per_second": 5.649, |
|
"eval_steps_per_second": 1.419, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.496532593619972, |
|
"grad_norm": 2.1060705184936523, |
|
"learning_rate": 9.250000000000001e-06, |
|
"loss": 0.2364, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.496532593619972, |
|
"eval_accuracy": 0.9590115178716271, |
|
"eval_f1": 0.7402309058614565, |
|
"eval_loss": 0.17936836183071136, |
|
"eval_precision": 0.7090599744789451, |
|
"eval_recall": 0.7742684626103112, |
|
"eval_runtime": 35.4911, |
|
"eval_samples_per_second": 5.607, |
|
"eval_steps_per_second": 1.409, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.662968099861304, |
|
"grad_norm": 1.8435375690460205, |
|
"learning_rate": 9.200000000000002e-06, |
|
"loss": 0.2218, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.662968099861304, |
|
"eval_accuracy": 0.9621478397231384, |
|
"eval_f1": 0.7557522123893805, |
|
"eval_loss": 0.1675911545753479, |
|
"eval_precision": 0.721588508660752, |
|
"eval_recall": 0.7933116581514166, |
|
"eval_runtime": 35.3833, |
|
"eval_samples_per_second": 5.624, |
|
"eval_steps_per_second": 1.413, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.8294036061026353, |
|
"grad_norm": 2.065732479095459, |
|
"learning_rate": 9.15e-06, |
|
"loss": 0.206, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.8294036061026353, |
|
"eval_accuracy": 0.9650137890012437, |
|
"eval_f1": 0.7758275938680294, |
|
"eval_loss": 0.15723256766796112, |
|
"eval_precision": 0.7436115843270868, |
|
"eval_recall": 0.8109614491407339, |
|
"eval_runtime": 35.4693, |
|
"eval_samples_per_second": 5.61, |
|
"eval_steps_per_second": 1.41, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.9958391123439667, |
|
"grad_norm": 2.1758480072021484, |
|
"learning_rate": 9.100000000000001e-06, |
|
"loss": 0.2053, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.9958391123439667, |
|
"eval_accuracy": 0.9640404477369816, |
|
"eval_f1": 0.7730088495575222, |
|
"eval_loss": 0.15795043110847473, |
|
"eval_precision": 0.7380650612589776, |
|
"eval_recall": 0.8114259173246633, |
|
"eval_runtime": 35.4002, |
|
"eval_samples_per_second": 5.621, |
|
"eval_steps_per_second": 1.412, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.162274618585298, |
|
"grad_norm": 1.6404600143432617, |
|
"learning_rate": 9.050000000000001e-06, |
|
"loss": 0.1876, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 3.162274618585298, |
|
"eval_accuracy": 0.9687449305142486, |
|
"eval_f1": 0.801343784994401, |
|
"eval_loss": 0.1406078040599823, |
|
"eval_precision": 0.7737889273356401, |
|
"eval_recall": 0.8309335810496981, |
|
"eval_runtime": 35.6674, |
|
"eval_samples_per_second": 5.579, |
|
"eval_steps_per_second": 1.402, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 3.3287101248266295, |
|
"grad_norm": 1.8430469036102295, |
|
"learning_rate": 9e-06, |
|
"loss": 0.1602, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.3287101248266295, |
|
"eval_accuracy": 0.9670686205591305, |
|
"eval_f1": 0.7985659870042572, |
|
"eval_loss": 0.14204147458076477, |
|
"eval_precision": 0.7714285714285715, |
|
"eval_recall": 0.8276823037621923, |
|
"eval_runtime": 35.5741, |
|
"eval_samples_per_second": 5.594, |
|
"eval_steps_per_second": 1.406, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.4951456310679614, |
|
"grad_norm": 2.2237956523895264, |
|
"learning_rate": 8.95e-06, |
|
"loss": 0.1706, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 3.4951456310679614, |
|
"eval_accuracy": 0.969069377602336, |
|
"eval_f1": 0.8149988705669754, |
|
"eval_loss": 0.13229934871196747, |
|
"eval_precision": 0.793315743183817, |
|
"eval_recall": 0.8379006038086391, |
|
"eval_runtime": 36.0972, |
|
"eval_samples_per_second": 5.513, |
|
"eval_steps_per_second": 1.385, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 3.661581137309293, |
|
"grad_norm": 2.04622220993042, |
|
"learning_rate": 8.900000000000001e-06, |
|
"loss": 0.1585, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.661581137309293, |
|
"eval_accuracy": 0.9700427188665982, |
|
"eval_f1": 0.8298399819697994, |
|
"eval_loss": 0.13131560385227203, |
|
"eval_precision": 0.8060420315236427, |
|
"eval_recall": 0.8550859266140269, |
|
"eval_runtime": 35.6467, |
|
"eval_samples_per_second": 5.583, |
|
"eval_steps_per_second": 1.403, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.828016643550624, |
|
"grad_norm": 2.0790255069732666, |
|
"learning_rate": 8.85e-06, |
|
"loss": 0.1574, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 3.828016643550624, |
|
"eval_accuracy": 0.9717190288217163, |
|
"eval_f1": 0.8376491781130375, |
|
"eval_loss": 0.12674090266227722, |
|
"eval_precision": 0.8129370629370629, |
|
"eval_recall": 0.8639108221086855, |
|
"eval_runtime": 35.5835, |
|
"eval_samples_per_second": 5.592, |
|
"eval_steps_per_second": 1.405, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 3.9944521497919556, |
|
"grad_norm": 2.3372180461883545, |
|
"learning_rate": 8.8e-06, |
|
"loss": 0.15, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.9944521497919556, |
|
"eval_accuracy": 0.97539609582004, |
|
"eval_f1": 0.8535811423390752, |
|
"eval_loss": 0.11569273471832275, |
|
"eval_precision": 0.8335546702080566, |
|
"eval_recall": 0.8745935903390618, |
|
"eval_runtime": 35.7391, |
|
"eval_samples_per_second": 5.568, |
|
"eval_steps_per_second": 1.399, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 4.160887656033287, |
|
"grad_norm": 2.763075828552246, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 0.1192, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 4.160887656033287, |
|
"eval_accuracy": 0.9740983074676904, |
|
"eval_f1": 0.8524664696521937, |
|
"eval_loss": 0.11200679838657379, |
|
"eval_precision": 0.8348174532502226, |
|
"eval_recall": 0.8708778448676265, |
|
"eval_runtime": 36.0008, |
|
"eval_samples_per_second": 5.528, |
|
"eval_steps_per_second": 1.389, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 4.327323162274618, |
|
"grad_norm": 1.7937551736831665, |
|
"learning_rate": 8.700000000000001e-06, |
|
"loss": 0.1313, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 4.327323162274618, |
|
"eval_accuracy": 0.9745309035851403, |
|
"eval_f1": 0.8588929219600727, |
|
"eval_loss": 0.1129654049873352, |
|
"eval_precision": 0.8394678492239468, |
|
"eval_recall": 0.8792382721783558, |
|
"eval_runtime": 36.0013, |
|
"eval_samples_per_second": 5.528, |
|
"eval_steps_per_second": 1.389, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 4.49375866851595, |
|
"grad_norm": 1.4142848253250122, |
|
"learning_rate": 8.65e-06, |
|
"loss": 0.1179, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 4.49375866851595, |
|
"eval_accuracy": 0.9755042448494025, |
|
"eval_f1": 0.8613303269447576, |
|
"eval_loss": 0.109279565513134, |
|
"eval_precision": 0.8369851007887817, |
|
"eval_recall": 0.8871342313051556, |
|
"eval_runtime": 36.0609, |
|
"eval_samples_per_second": 5.518, |
|
"eval_steps_per_second": 1.387, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 4.660194174757281, |
|
"grad_norm": 1.6794809103012085, |
|
"learning_rate": 8.6e-06, |
|
"loss": 0.1327, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.660194174757281, |
|
"eval_accuracy": 0.9745849780998216, |
|
"eval_f1": 0.862053369516056, |
|
"eval_loss": 0.11022669076919556, |
|
"eval_precision": 0.8400176289114147, |
|
"eval_recall": 0.885276358569438, |
|
"eval_runtime": 36.0549, |
|
"eval_samples_per_second": 5.519, |
|
"eval_steps_per_second": 1.387, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.826629680998613, |
|
"grad_norm": 1.8358403444290161, |
|
"learning_rate": 8.550000000000001e-06, |
|
"loss": 0.1323, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 4.826629680998613, |
|
"eval_accuracy": 0.978207970583464, |
|
"eval_f1": 0.8795454545454546, |
|
"eval_loss": 0.09974753856658936, |
|
"eval_precision": 0.8611481975967957, |
|
"eval_recall": 0.8987459359033906, |
|
"eval_runtime": 36.3053, |
|
"eval_samples_per_second": 5.481, |
|
"eval_steps_per_second": 1.377, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 4.993065187239944, |
|
"grad_norm": 2.1321513652801514, |
|
"learning_rate": 8.5e-06, |
|
"loss": 0.1254, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 4.993065187239944, |
|
"eval_accuracy": 0.9774509273779268, |
|
"eval_f1": 0.8727683615819208, |
|
"eval_loss": 0.094924695789814, |
|
"eval_precision": 0.8499119718309859, |
|
"eval_recall": 0.896888063167673, |
|
"eval_runtime": 36.229, |
|
"eval_samples_per_second": 5.493, |
|
"eval_steps_per_second": 1.38, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 5.159500693481276, |
|
"grad_norm": 1.3562971353530884, |
|
"learning_rate": 8.45e-06, |
|
"loss": 0.0999, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 5.159500693481276, |
|
"eval_accuracy": 0.9797220569945385, |
|
"eval_f1": 0.8822055137844612, |
|
"eval_loss": 0.08469922095537186, |
|
"eval_precision": 0.8658318425760286, |
|
"eval_recall": 0.89921040408732, |
|
"eval_runtime": 36.2946, |
|
"eval_samples_per_second": 5.483, |
|
"eval_steps_per_second": 1.378, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 5.325936199722608, |
|
"grad_norm": 1.683296799659729, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 0.1017, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.325936199722608, |
|
"eval_accuracy": 0.981019845346888, |
|
"eval_f1": 0.8923777019340159, |
|
"eval_loss": 0.08026640117168427, |
|
"eval_precision": 0.8746654772524531, |
|
"eval_recall": 0.910822108685555, |
|
"eval_runtime": 36.4129, |
|
"eval_samples_per_second": 5.465, |
|
"eval_steps_per_second": 1.373, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.492371705963939, |
|
"grad_norm": 2.303062677383423, |
|
"learning_rate": 8.35e-06, |
|
"loss": 0.091, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 5.492371705963939, |
|
"eval_accuracy": 0.9805872492294382, |
|
"eval_f1": 0.8918362680082322, |
|
"eval_loss": 0.07959215342998505, |
|
"eval_precision": 0.8783783783783784, |
|
"eval_recall": 0.9057129586623316, |
|
"eval_runtime": 36.4104, |
|
"eval_samples_per_second": 5.465, |
|
"eval_steps_per_second": 1.373, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 5.658807212205271, |
|
"grad_norm": 3.383983850479126, |
|
"learning_rate": 8.3e-06, |
|
"loss": 0.0979, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 5.658807212205271, |
|
"eval_accuracy": 0.9773427783485643, |
|
"eval_f1": 0.8775045537340619, |
|
"eval_loss": 0.09432032704353333, |
|
"eval_precision": 0.8606520768200089, |
|
"eval_recall": 0.8950301904319554, |
|
"eval_runtime": 36.0505, |
|
"eval_samples_per_second": 5.52, |
|
"eval_steps_per_second": 1.387, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 5.825242718446602, |
|
"grad_norm": 2.1892480850219727, |
|
"learning_rate": 8.25e-06, |
|
"loss": 0.1024, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 5.825242718446602, |
|
"eval_accuracy": 0.980533174714757, |
|
"eval_f1": 0.8882312770316413, |
|
"eval_loss": 0.08036847412586212, |
|
"eval_precision": 0.8709821428571428, |
|
"eval_recall": 0.906177426846261, |
|
"eval_runtime": 36.1406, |
|
"eval_samples_per_second": 5.506, |
|
"eval_steps_per_second": 1.383, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 5.991678224687933, |
|
"grad_norm": 1.8490287065505981, |
|
"learning_rate": 8.2e-06, |
|
"loss": 0.0952, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 5.991678224687933, |
|
"eval_accuracy": 0.9816146650083816, |
|
"eval_f1": 0.900843400957374, |
|
"eval_loss": 0.07866356521844864, |
|
"eval_precision": 0.8845120859444942, |
|
"eval_recall": 0.917789131444496, |
|
"eval_runtime": 36.4974, |
|
"eval_samples_per_second": 5.452, |
|
"eval_steps_per_second": 1.37, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 6.158113730929265, |
|
"grad_norm": 3.0108256340026855, |
|
"learning_rate": 8.15e-06, |
|
"loss": 0.0742, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 6.158113730929265, |
|
"eval_accuracy": 0.9823176336992375, |
|
"eval_f1": 0.9032553874369554, |
|
"eval_loss": 0.07755902409553528, |
|
"eval_precision": 0.8918062471706655, |
|
"eval_recall": 0.9150023223409196, |
|
"eval_runtime": 36.3184, |
|
"eval_samples_per_second": 5.479, |
|
"eval_steps_per_second": 1.377, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 6.324549237170596, |
|
"grad_norm": 2.533155679702759, |
|
"learning_rate": 8.1e-06, |
|
"loss": 0.0764, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 6.324549237170596, |
|
"eval_accuracy": 0.9837235710809495, |
|
"eval_f1": 0.9106813996316758, |
|
"eval_loss": 0.07210895419120789, |
|
"eval_precision": 0.9027841168416249, |
|
"eval_recall": 0.9187180678123549, |
|
"eval_runtime": 36.5311, |
|
"eval_samples_per_second": 5.447, |
|
"eval_steps_per_second": 1.369, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 6.490984743411928, |
|
"grad_norm": 1.943320631980896, |
|
"learning_rate": 8.050000000000001e-06, |
|
"loss": 0.0813, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 6.490984743411928, |
|
"eval_accuracy": 0.9844265397718055, |
|
"eval_f1": 0.914614499424626, |
|
"eval_loss": 0.06643209606409073, |
|
"eval_precision": 0.906478102189781, |
|
"eval_recall": 0.9228982814677195, |
|
"eval_runtime": 36.3904, |
|
"eval_samples_per_second": 5.468, |
|
"eval_steps_per_second": 1.374, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 6.657420249653259, |
|
"grad_norm": 1.322831392288208, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.0791, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.657420249653259, |
|
"eval_accuracy": 0.9848050613745741, |
|
"eval_f1": 0.9137614678899082, |
|
"eval_loss": 0.06415116786956787, |
|
"eval_precision": 0.902582691436339, |
|
"eval_recall": 0.9252206223873665, |
|
"eval_runtime": 36.27, |
|
"eval_samples_per_second": 5.487, |
|
"eval_steps_per_second": 1.379, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.8238557558945905, |
|
"grad_norm": 1.5891202688217163, |
|
"learning_rate": 7.950000000000002e-06, |
|
"loss": 0.0792, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 6.8238557558945905, |
|
"eval_accuracy": 0.9841020926837182, |
|
"eval_f1": 0.9103795153177869, |
|
"eval_loss": 0.06728328764438629, |
|
"eval_precision": 0.8964430436740207, |
|
"eval_recall": 0.924756154203437, |
|
"eval_runtime": 35.8847, |
|
"eval_samples_per_second": 5.546, |
|
"eval_steps_per_second": 1.393, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 6.990291262135923, |
|
"grad_norm": 3.470646858215332, |
|
"learning_rate": 7.9e-06, |
|
"loss": 0.078, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 6.990291262135923, |
|
"eval_accuracy": 0.9832909749634997, |
|
"eval_f1": 0.9078857142857143, |
|
"eval_loss": 0.06933122873306274, |
|
"eval_precision": 0.8937893789378938, |
|
"eval_recall": 0.92243381328379, |
|
"eval_runtime": 36.1839, |
|
"eval_samples_per_second": 5.5, |
|
"eval_steps_per_second": 1.382, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 7.156726768377254, |
|
"grad_norm": 2.4168286323547363, |
|
"learning_rate": 7.850000000000001e-06, |
|
"loss": 0.0678, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 7.156726768377254, |
|
"eval_accuracy": 0.985237657492024, |
|
"eval_f1": 0.92025664527956, |
|
"eval_loss": 0.06722652167081833, |
|
"eval_precision": 0.9081863410221619, |
|
"eval_recall": 0.9326521133302369, |
|
"eval_runtime": 36.716, |
|
"eval_samples_per_second": 5.42, |
|
"eval_steps_per_second": 1.362, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 7.323162274618586, |
|
"grad_norm": 1.048614501953125, |
|
"learning_rate": 7.800000000000002e-06, |
|
"loss": 0.0685, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 7.323162274618586, |
|
"eval_accuracy": 0.9839939436543557, |
|
"eval_f1": 0.9072635906806761, |
|
"eval_loss": 0.06548429280519485, |
|
"eval_precision": 0.8925842696629214, |
|
"eval_recall": 0.92243381328379, |
|
"eval_runtime": 36.897, |
|
"eval_samples_per_second": 5.393, |
|
"eval_steps_per_second": 1.355, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 7.489597780859917, |
|
"grad_norm": 2.5844979286193848, |
|
"learning_rate": 7.75e-06, |
|
"loss": 0.0555, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 7.489597780859917, |
|
"eval_accuracy": 0.9856161790947926, |
|
"eval_f1": 0.9213016385875836, |
|
"eval_loss": 0.06148982420563698, |
|
"eval_precision": 0.9155963302752294, |
|
"eval_recall": 0.927078495123084, |
|
"eval_runtime": 36.1847, |
|
"eval_samples_per_second": 5.5, |
|
"eval_steps_per_second": 1.382, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 7.656033287101248, |
|
"grad_norm": 1.9488413333892822, |
|
"learning_rate": 7.7e-06, |
|
"loss": 0.07, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 7.656033287101248, |
|
"eval_accuracy": 0.9867517439030985, |
|
"eval_f1": 0.927176659774868, |
|
"eval_loss": 0.058708589524030685, |
|
"eval_precision": 0.9172727272727272, |
|
"eval_recall": 0.9372967951695309, |
|
"eval_runtime": 36.4405, |
|
"eval_samples_per_second": 5.461, |
|
"eval_steps_per_second": 1.372, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 7.82246879334258, |
|
"grad_norm": 1.7437242269515991, |
|
"learning_rate": 7.650000000000001e-06, |
|
"loss": 0.065, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 7.82246879334258, |
|
"eval_accuracy": 0.9874547125939545, |
|
"eval_f1": 0.9303928325292902, |
|
"eval_loss": 0.0557989701628685, |
|
"eval_precision": 0.9204545454545454, |
|
"eval_recall": 0.9405480724570366, |
|
"eval_runtime": 36.0661, |
|
"eval_samples_per_second": 5.518, |
|
"eval_steps_per_second": 1.386, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 7.988904299583911, |
|
"grad_norm": 1.0527422428131104, |
|
"learning_rate": 7.600000000000001e-06, |
|
"loss": 0.0599, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 7.988904299583911, |
|
"eval_accuracy": 0.9878332341967231, |
|
"eval_f1": 0.9342226310947562, |
|
"eval_loss": 0.05789622664451599, |
|
"eval_precision": 0.9252847380410023, |
|
"eval_recall": 0.9433348815606131, |
|
"eval_runtime": 36.3712, |
|
"eval_samples_per_second": 5.471, |
|
"eval_steps_per_second": 1.375, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 8.155339805825243, |
|
"grad_norm": 1.6904972791671753, |
|
"learning_rate": 7.5500000000000006e-06, |
|
"loss": 0.0571, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 8.155339805825243, |
|
"eval_accuracy": 0.9865895203590548, |
|
"eval_f1": 0.9238905495516211, |
|
"eval_loss": 0.059290919452905655, |
|
"eval_precision": 0.9148451730418944, |
|
"eval_recall": 0.9331165815141663, |
|
"eval_runtime": 36.0084, |
|
"eval_samples_per_second": 5.526, |
|
"eval_steps_per_second": 1.389, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 8.321775312066574, |
|
"grad_norm": 1.9831328392028809, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.0563, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 8.321775312066574, |
|
"eval_accuracy": 0.9863191477856487, |
|
"eval_f1": 0.9236079153244362, |
|
"eval_loss": 0.06046581640839577, |
|
"eval_precision": 0.9151846785225718, |
|
"eval_recall": 0.9321876451463075, |
|
"eval_runtime": 36.0693, |
|
"eval_samples_per_second": 5.517, |
|
"eval_steps_per_second": 1.386, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 8.488210818307905, |
|
"grad_norm": 2.0379467010498047, |
|
"learning_rate": 7.450000000000001e-06, |
|
"loss": 0.0602, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 8.488210818307905, |
|
"eval_accuracy": 0.9863191477856487, |
|
"eval_f1": 0.927992590877518, |
|
"eval_loss": 0.058113399893045425, |
|
"eval_precision": 0.925207756232687, |
|
"eval_recall": 0.9307942405945193, |
|
"eval_runtime": 35.9178, |
|
"eval_samples_per_second": 5.54, |
|
"eval_steps_per_second": 1.392, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 8.654646324549237, |
|
"grad_norm": 3.095200538635254, |
|
"learning_rate": 7.4e-06, |
|
"loss": 0.0582, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 8.654646324549237, |
|
"eval_accuracy": 0.9872384145352295, |
|
"eval_f1": 0.9288837744533948, |
|
"eval_loss": 0.05814095214009285, |
|
"eval_precision": 0.9206204379562044, |
|
"eval_recall": 0.9372967951695309, |
|
"eval_runtime": 36.2273, |
|
"eval_samples_per_second": 5.493, |
|
"eval_steps_per_second": 1.38, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 8.821081830790568, |
|
"grad_norm": 1.0786473751068115, |
|
"learning_rate": 7.350000000000001e-06, |
|
"loss": 0.0514, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 8.821081830790568, |
|
"eval_accuracy": 0.9872924890499107, |
|
"eval_f1": 0.9313047487321346, |
|
"eval_loss": 0.055727362632751465, |
|
"eval_precision": 0.9244851258581236, |
|
"eval_recall": 0.9382257315373896, |
|
"eval_runtime": 36.0241, |
|
"eval_samples_per_second": 5.524, |
|
"eval_steps_per_second": 1.388, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 8.9875173370319, |
|
"grad_norm": 1.6077920198440552, |
|
"learning_rate": 7.3e-06, |
|
"loss": 0.0467, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 8.9875173370319, |
|
"eval_accuracy": 0.9883199048288541, |
|
"eval_f1": 0.9393661001378043, |
|
"eval_loss": 0.05200658738613129, |
|
"eval_precision": 0.9291231258518855, |
|
"eval_recall": 0.9498374361356247, |
|
"eval_runtime": 35.9411, |
|
"eval_samples_per_second": 5.537, |
|
"eval_steps_per_second": 1.391, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 9.153952843273231, |
|
"grad_norm": 1.601219892501831, |
|
"learning_rate": 7.25e-06, |
|
"loss": 0.0435, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 9.153952843273231, |
|
"eval_accuracy": 0.9879954577407668, |
|
"eval_f1": 0.9336699563920129, |
|
"eval_loss": 0.05260741710662842, |
|
"eval_precision": 0.9228675136116152, |
|
"eval_recall": 0.9447282861124013, |
|
"eval_runtime": 35.7996, |
|
"eval_samples_per_second": 5.559, |
|
"eval_steps_per_second": 1.397, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 9.320388349514563, |
|
"grad_norm": 0.7272451519966125, |
|
"learning_rate": 7.2000000000000005e-06, |
|
"loss": 0.0531, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 9.320388349514563, |
|
"eval_accuracy": 0.9883739793435354, |
|
"eval_f1": 0.9344978165938865, |
|
"eval_loss": 0.05022520199418068, |
|
"eval_precision": 0.9249317561419472, |
|
"eval_recall": 0.9442638179284719, |
|
"eval_runtime": 36.0285, |
|
"eval_samples_per_second": 5.523, |
|
"eval_steps_per_second": 1.388, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 9.486823855755894, |
|
"grad_norm": 0.9556881189346313, |
|
"learning_rate": 7.15e-06, |
|
"loss": 0.0502, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 9.486823855755894, |
|
"eval_accuracy": 0.9874006380792733, |
|
"eval_f1": 0.9309240622140896, |
|
"eval_loss": 0.05446859449148178, |
|
"eval_precision": 0.9170797656602073, |
|
"eval_recall": 0.9451927542963307, |
|
"eval_runtime": 36.0609, |
|
"eval_samples_per_second": 5.518, |
|
"eval_steps_per_second": 1.387, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 9.653259361997225, |
|
"grad_norm": 1.0404924154281616, |
|
"learning_rate": 7.100000000000001e-06, |
|
"loss": 0.0377, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 9.653259361997225, |
|
"eval_accuracy": 0.9850754339479804, |
|
"eval_f1": 0.9220571428571429, |
|
"eval_loss": 0.06175297126173973, |
|
"eval_precision": 0.9077407740774077, |
|
"eval_recall": 0.9368323269856015, |
|
"eval_runtime": 36.326, |
|
"eval_samples_per_second": 5.478, |
|
"eval_steps_per_second": 1.376, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 9.819694868238557, |
|
"grad_norm": 1.1249316930770874, |
|
"learning_rate": 7.05e-06, |
|
"loss": 0.0416, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 9.819694868238557, |
|
"eval_accuracy": 0.9881036067701292, |
|
"eval_f1": 0.9328719723183392, |
|
"eval_loss": 0.05493583530187607, |
|
"eval_precision": 0.9266727772685609, |
|
"eval_recall": 0.9391546679052485, |
|
"eval_runtime": 36.1852, |
|
"eval_samples_per_second": 5.499, |
|
"eval_steps_per_second": 1.382, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 9.986130374479888, |
|
"grad_norm": 1.0846829414367676, |
|
"learning_rate": 7e-06, |
|
"loss": 0.044, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.986130374479888, |
|
"eval_accuracy": 0.9884280538582166, |
|
"eval_f1": 0.9420457169244978, |
|
"eval_loss": 0.05289188027381897, |
|
"eval_precision": 0.9366391184573003, |
|
"eval_recall": 0.9475150952159777, |
|
"eval_runtime": 36.0505, |
|
"eval_samples_per_second": 5.52, |
|
"eval_steps_per_second": 1.387, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 10.152565880721221, |
|
"grad_norm": 0.8957504630088806, |
|
"learning_rate": 6.95e-06, |
|
"loss": 0.0383, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 10.152565880721221, |
|
"eval_accuracy": 0.9889147244903477, |
|
"eval_f1": 0.9403088269186448, |
|
"eval_loss": 0.048978183418512344, |
|
"eval_precision": 0.9332113449222323, |
|
"eval_recall": 0.9475150952159777, |
|
"eval_runtime": 36.1551, |
|
"eval_samples_per_second": 5.504, |
|
"eval_steps_per_second": 1.383, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 10.319001386962553, |
|
"grad_norm": 1.6940028667449951, |
|
"learning_rate": 6.9e-06, |
|
"loss": 0.0454, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 10.319001386962553, |
|
"eval_accuracy": 0.988536202887579, |
|
"eval_f1": 0.9366100137804317, |
|
"eval_loss": 0.05073446407914162, |
|
"eval_precision": 0.9263970922308041, |
|
"eval_recall": 0.9470506270320483, |
|
"eval_runtime": 36.1642, |
|
"eval_samples_per_second": 5.503, |
|
"eval_steps_per_second": 1.383, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 10.485436893203884, |
|
"grad_norm": 0.9225968718528748, |
|
"learning_rate": 6.850000000000001e-06, |
|
"loss": 0.0416, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 10.485436893203884, |
|
"eval_accuracy": 0.9891310225490726, |
|
"eval_f1": 0.9430481899930827, |
|
"eval_loss": 0.046711865812540054, |
|
"eval_precision": 0.9363553113553114, |
|
"eval_recall": 0.9498374361356247, |
|
"eval_runtime": 36.5741, |
|
"eval_samples_per_second": 5.441, |
|
"eval_steps_per_second": 1.367, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 10.651872399445216, |
|
"grad_norm": 2.7210068702697754, |
|
"learning_rate": 6.800000000000001e-06, |
|
"loss": 0.0403, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 10.651872399445216, |
|
"eval_accuracy": 0.9886443519169416, |
|
"eval_f1": 0.9384650841207652, |
|
"eval_loss": 0.04987097531557083, |
|
"eval_precision": 0.9313815187557182, |
|
"eval_recall": 0.9456572224802601, |
|
"eval_runtime": 36.5567, |
|
"eval_samples_per_second": 5.444, |
|
"eval_steps_per_second": 1.368, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 10.818307905686547, |
|
"grad_norm": 1.160333275794983, |
|
"learning_rate": 6.750000000000001e-06, |
|
"loss": 0.0354, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 10.818307905686547, |
|
"eval_accuracy": 0.9882658303141729, |
|
"eval_f1": 0.9354171454837968, |
|
"eval_loss": 0.05233873799443245, |
|
"eval_precision": 0.9258416742493175, |
|
"eval_recall": 0.9451927542963307, |
|
"eval_runtime": 36.44, |
|
"eval_samples_per_second": 5.461, |
|
"eval_steps_per_second": 1.372, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 10.984743411927878, |
|
"grad_norm": 0.8807191848754883, |
|
"learning_rate": 6.700000000000001e-06, |
|
"loss": 0.0338, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 10.984743411927878, |
|
"eval_accuracy": 0.9879954577407668, |
|
"eval_f1": 0.9318025258323767, |
|
"eval_loss": 0.052071038633584976, |
|
"eval_precision": 0.9214350590372389, |
|
"eval_recall": 0.9424059451927543, |
|
"eval_runtime": 36.6322, |
|
"eval_samples_per_second": 5.432, |
|
"eval_steps_per_second": 1.365, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 11.15117891816921, |
|
"grad_norm": 1.1557176113128662, |
|
"learning_rate": 6.650000000000001e-06, |
|
"loss": 0.0347, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 11.15117891816921, |
|
"eval_accuracy": 0.988049532255448, |
|
"eval_f1": 0.9353507565337001, |
|
"eval_loss": 0.053912434726953506, |
|
"eval_precision": 0.9234947940244455, |
|
"eval_recall": 0.9475150952159777, |
|
"eval_runtime": 36.5986, |
|
"eval_samples_per_second": 5.437, |
|
"eval_steps_per_second": 1.366, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 11.317614424410541, |
|
"grad_norm": 1.668484091758728, |
|
"learning_rate": 6.600000000000001e-06, |
|
"loss": 0.0364, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 11.317614424410541, |
|
"eval_accuracy": 0.9870761909911858, |
|
"eval_f1": 0.9334552938486165, |
|
"eval_loss": 0.055973075330257416, |
|
"eval_precision": 0.9193693693693694, |
|
"eval_recall": 0.9479795633999071, |
|
"eval_runtime": 36.6625, |
|
"eval_samples_per_second": 5.428, |
|
"eval_steps_per_second": 1.364, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 11.484049930651873, |
|
"grad_norm": 2.5720293521881104, |
|
"learning_rate": 6.550000000000001e-06, |
|
"loss": 0.0363, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 11.484049930651873, |
|
"eval_accuracy": 0.9889147244903477, |
|
"eval_f1": 0.9381751321535279, |
|
"eval_loss": 0.050925422459840775, |
|
"eval_precision": 0.9285714285714286, |
|
"eval_recall": 0.9479795633999071, |
|
"eval_runtime": 36.4069, |
|
"eval_samples_per_second": 5.466, |
|
"eval_steps_per_second": 1.373, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 11.650485436893204, |
|
"grad_norm": 2.5676207542419434, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.0308, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 11.650485436893204, |
|
"eval_accuracy": 0.9893473206077975, |
|
"eval_f1": 0.94362292051756, |
|
"eval_loss": 0.04982053115963936, |
|
"eval_precision": 0.9388505747126437, |
|
"eval_recall": 0.9484440315838365, |
|
"eval_runtime": 36.3679, |
|
"eval_samples_per_second": 5.472, |
|
"eval_steps_per_second": 1.375, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 11.816920943134535, |
|
"grad_norm": 0.9586185812950134, |
|
"learning_rate": 6.450000000000001e-06, |
|
"loss": 0.032, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 11.816920943134535, |
|
"eval_accuracy": 0.9891310225490726, |
|
"eval_f1": 0.9403330249768733, |
|
"eval_loss": 0.04908496141433716, |
|
"eval_precision": 0.9364348226623675, |
|
"eval_recall": 0.9442638179284719, |
|
"eval_runtime": 35.9979, |
|
"eval_samples_per_second": 5.528, |
|
"eval_steps_per_second": 1.389, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 11.983356449375867, |
|
"grad_norm": 1.067063331604004, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 0.0331, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 11.983356449375867, |
|
"eval_accuracy": 0.9891850970637539, |
|
"eval_f1": 0.940768162887552, |
|
"eval_loss": 0.0454898327589035, |
|
"eval_precision": 0.9372982941447672, |
|
"eval_recall": 0.9442638179284719, |
|
"eval_runtime": 36.1674, |
|
"eval_samples_per_second": 5.502, |
|
"eval_steps_per_second": 1.382, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 12.149791955617198, |
|
"grad_norm": 1.4905815124511719, |
|
"learning_rate": 6.35e-06, |
|
"loss": 0.0301, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 12.149791955617198, |
|
"eval_accuracy": 0.9891850970637539, |
|
"eval_f1": 0.9423431734317342, |
|
"eval_loss": 0.04859260097146034, |
|
"eval_precision": 0.9358680714612918, |
|
"eval_recall": 0.9489084997677659, |
|
"eval_runtime": 36.286, |
|
"eval_samples_per_second": 5.484, |
|
"eval_steps_per_second": 1.378, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 12.31622746185853, |
|
"grad_norm": 1.3888496160507202, |
|
"learning_rate": 6.300000000000001e-06, |
|
"loss": 0.0308, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 12.31622746185853, |
|
"eval_accuracy": 0.9891310225490726, |
|
"eval_f1": 0.9413388543823326, |
|
"eval_loss": 0.051349248737096786, |
|
"eval_precision": 0.9325432999088423, |
|
"eval_recall": 0.9503019043195541, |
|
"eval_runtime": 36.2143, |
|
"eval_samples_per_second": 5.495, |
|
"eval_steps_per_second": 1.381, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 12.482662968099861, |
|
"grad_norm": 0.5457278490066528, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.0253, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 12.482662968099861, |
|
"eval_accuracy": 0.9891850970637539, |
|
"eval_f1": 0.939825447864033, |
|
"eval_loss": 0.05103699862957001, |
|
"eval_precision": 0.9295774647887324, |
|
"eval_recall": 0.9503019043195541, |
|
"eval_runtime": 36.4491, |
|
"eval_samples_per_second": 5.46, |
|
"eval_steps_per_second": 1.372, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 12.649098474341192, |
|
"grad_norm": 1.106314778327942, |
|
"learning_rate": 6.200000000000001e-06, |
|
"loss": 0.0301, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 12.649098474341192, |
|
"eval_accuracy": 0.9886443519169416, |
|
"eval_f1": 0.9397424103035878, |
|
"eval_loss": 0.053277622908353806, |
|
"eval_precision": 0.9307517084282461, |
|
"eval_recall": 0.9489084997677659, |
|
"eval_runtime": 36.4299, |
|
"eval_samples_per_second": 5.463, |
|
"eval_steps_per_second": 1.372, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 12.815533980582524, |
|
"grad_norm": 0.9172839522361755, |
|
"learning_rate": 6.15e-06, |
|
"loss": 0.0328, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 12.815533980582524, |
|
"eval_accuracy": 0.9884821283728978, |
|
"eval_f1": 0.9364348226623675, |
|
"eval_loss": 0.0548846460878849, |
|
"eval_precision": 0.9287345820009136, |
|
"eval_recall": 0.9442638179284719, |
|
"eval_runtime": 36.3929, |
|
"eval_samples_per_second": 5.468, |
|
"eval_steps_per_second": 1.374, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 12.981969486823855, |
|
"grad_norm": 1.9091347455978394, |
|
"learning_rate": 6.1e-06, |
|
"loss": 0.0298, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 12.981969486823855, |
|
"eval_accuracy": 0.98945546963716, |
|
"eval_f1": 0.9450092421441775, |
|
"eval_loss": 0.05042650178074837, |
|
"eval_precision": 0.9402298850574713, |
|
"eval_recall": 0.9498374361356247, |
|
"eval_runtime": 35.8371, |
|
"eval_samples_per_second": 5.553, |
|
"eval_steps_per_second": 1.395, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 13.148404993065187, |
|
"grad_norm": 1.2674860954284668, |
|
"learning_rate": 6.0500000000000005e-06, |
|
"loss": 0.0256, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 13.148404993065187, |
|
"eval_accuracy": 0.988752500946304, |
|
"eval_f1": 0.9386716037954178, |
|
"eval_loss": 0.051467474550008774, |
|
"eval_precision": 0.9354243542435424, |
|
"eval_recall": 0.9419414770088249, |
|
"eval_runtime": 36.0333, |
|
"eval_samples_per_second": 5.523, |
|
"eval_steps_per_second": 1.388, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 13.314840499306518, |
|
"grad_norm": 1.406807780265808, |
|
"learning_rate": 6e-06, |
|
"loss": 0.0313, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 13.314840499306518, |
|
"eval_accuracy": 0.9905369599307846, |
|
"eval_f1": 0.9480968858131489, |
|
"eval_loss": 0.048274096101522446, |
|
"eval_precision": 0.9417965169569202, |
|
"eval_recall": 0.9544821179749187, |
|
"eval_runtime": 35.8422, |
|
"eval_samples_per_second": 5.552, |
|
"eval_steps_per_second": 1.395, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 13.48127600554785, |
|
"grad_norm": 0.5426374673843384, |
|
"learning_rate": 5.950000000000001e-06, |
|
"loss": 0.022, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 13.48127600554785, |
|
"eval_accuracy": 0.9898880657546099, |
|
"eval_f1": 0.9445339470655927, |
|
"eval_loss": 0.0463298000395298, |
|
"eval_precision": 0.9361313868613139, |
|
"eval_recall": 0.9530887134231305, |
|
"eval_runtime": 36.2558, |
|
"eval_samples_per_second": 5.489, |
|
"eval_steps_per_second": 1.379, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 13.647711511789181, |
|
"grad_norm": 2.050182342529297, |
|
"learning_rate": 5.9e-06, |
|
"loss": 0.0245, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 13.647711511789181, |
|
"eval_accuracy": 0.9893473206077975, |
|
"eval_f1": 0.9430219146482123, |
|
"eval_loss": 0.04942420497536659, |
|
"eval_precision": 0.9367552703941339, |
|
"eval_recall": 0.9493729679516953, |
|
"eval_runtime": 36.4711, |
|
"eval_samples_per_second": 5.456, |
|
"eval_steps_per_second": 1.371, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 13.814147018030512, |
|
"grad_norm": 1.7617555856704712, |
|
"learning_rate": 5.85e-06, |
|
"loss": 0.0251, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 13.814147018030512, |
|
"eval_accuracy": 0.9897799167252473, |
|
"eval_f1": 0.9467128027681662, |
|
"eval_loss": 0.049306854605674744, |
|
"eval_precision": 0.9404216315307058, |
|
"eval_recall": 0.9530887134231305, |
|
"eval_runtime": 36.1814, |
|
"eval_samples_per_second": 5.5, |
|
"eval_steps_per_second": 1.382, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 13.980582524271846, |
|
"grad_norm": 1.183014154434204, |
|
"learning_rate": 5.8e-06, |
|
"loss": 0.0259, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 13.980582524271846, |
|
"eval_accuracy": 0.98945546963716, |
|
"eval_f1": 0.9453539312889093, |
|
"eval_loss": 0.05114530399441719, |
|
"eval_precision": 0.9386446886446886, |
|
"eval_recall": 0.9521597770552717, |
|
"eval_runtime": 36.0831, |
|
"eval_samples_per_second": 5.515, |
|
"eval_steps_per_second": 1.386, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 14.147018030513177, |
|
"grad_norm": 0.6956959962844849, |
|
"learning_rate": 5.75e-06, |
|
"loss": 0.03, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 14.147018030513177, |
|
"eval_accuracy": 0.9888606499756665, |
|
"eval_f1": 0.9399815327793166, |
|
"eval_loss": 0.053482603281736374, |
|
"eval_precision": 0.9343735658558971, |
|
"eval_recall": 0.9456572224802601, |
|
"eval_runtime": 35.8745, |
|
"eval_samples_per_second": 5.547, |
|
"eval_steps_per_second": 1.394, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 14.313453536754508, |
|
"grad_norm": 1.2064058780670166, |
|
"learning_rate": 5.7e-06, |
|
"loss": 0.0192, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 14.313453536754508, |
|
"eval_accuracy": 0.9898880657546099, |
|
"eval_f1": 0.9460772969220087, |
|
"eval_loss": 0.049094799906015396, |
|
"eval_precision": 0.9428044280442804, |
|
"eval_recall": 0.9493729679516953, |
|
"eval_runtime": 35.7923, |
|
"eval_samples_per_second": 5.56, |
|
"eval_steps_per_second": 1.397, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 14.47988904299584, |
|
"grad_norm": 1.727489948272705, |
|
"learning_rate": 5.65e-06, |
|
"loss": 0.0267, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 14.47988904299584, |
|
"eval_accuracy": 0.9901043638133348, |
|
"eval_f1": 0.9500693481276006, |
|
"eval_loss": 0.04895344376564026, |
|
"eval_precision": 0.9456971928209849, |
|
"eval_recall": 0.9544821179749187, |
|
"eval_runtime": 36.349, |
|
"eval_samples_per_second": 5.475, |
|
"eval_steps_per_second": 1.376, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 14.646324549237171, |
|
"grad_norm": 0.6142871379852295, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 0.0241, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 14.646324549237171, |
|
"eval_accuracy": 0.9899421402692911, |
|
"eval_f1": 0.948729792147806, |
|
"eval_loss": 0.050602879375219345, |
|
"eval_precision": 0.9435002296738632, |
|
"eval_recall": 0.9540176497909894, |
|
"eval_runtime": 36.205, |
|
"eval_samples_per_second": 5.496, |
|
"eval_steps_per_second": 1.381, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 14.812760055478503, |
|
"grad_norm": 1.6362483501434326, |
|
"learning_rate": 5.550000000000001e-06, |
|
"loss": 0.0211, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 14.812760055478503, |
|
"eval_accuracy": 0.9903206618720597, |
|
"eval_f1": 0.9491682070240296, |
|
"eval_loss": 0.050954435020685196, |
|
"eval_precision": 0.944367816091954, |
|
"eval_recall": 0.9540176497909894, |
|
"eval_runtime": 36.469, |
|
"eval_samples_per_second": 5.457, |
|
"eval_steps_per_second": 1.371, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 14.979195561719834, |
|
"grad_norm": 0.9267581105232239, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.0171, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 14.979195561719834, |
|
"eval_accuracy": 0.9897799167252473, |
|
"eval_f1": 0.9474412171507607, |
|
"eval_loss": 0.04994847625494003, |
|
"eval_precision": 0.9405034324942791, |
|
"eval_recall": 0.9544821179749187, |
|
"eval_runtime": 36.7159, |
|
"eval_samples_per_second": 5.42, |
|
"eval_steps_per_second": 1.362, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 15.145631067961165, |
|
"grad_norm": 0.6142176389694214, |
|
"learning_rate": 5.450000000000001e-06, |
|
"loss": 0.0226, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 15.145631067961165, |
|
"eval_accuracy": 0.9894013951224788, |
|
"eval_f1": 0.9452369995398067, |
|
"eval_loss": 0.05113999918103218, |
|
"eval_precision": 0.9366165070679434, |
|
"eval_recall": 0.9540176497909894, |
|
"eval_runtime": 36.247, |
|
"eval_samples_per_second": 5.49, |
|
"eval_steps_per_second": 1.379, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 15.312066574202497, |
|
"grad_norm": 0.46341672539711, |
|
"learning_rate": 5.400000000000001e-06, |
|
"loss": 0.024, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 15.312066574202497, |
|
"eval_accuracy": 0.9899421402692911, |
|
"eval_f1": 0.9501385041551247, |
|
"eval_loss": 0.04835886508226395, |
|
"eval_precision": 0.9444699403396053, |
|
"eval_recall": 0.9558755225267069, |
|
"eval_runtime": 35.8678, |
|
"eval_samples_per_second": 5.548, |
|
"eval_steps_per_second": 1.394, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 15.478502080443828, |
|
"grad_norm": 1.446049690246582, |
|
"learning_rate": 5.3500000000000004e-06, |
|
"loss": 0.018, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 15.478502080443828, |
|
"eval_accuracy": 0.9903206618720597, |
|
"eval_f1": 0.9492703266157054, |
|
"eval_loss": 0.04823274910449982, |
|
"eval_precision": 0.9468576709796673, |
|
"eval_recall": 0.9516953088713423, |
|
"eval_runtime": 35.9765, |
|
"eval_samples_per_second": 5.531, |
|
"eval_steps_per_second": 1.39, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 15.64493758668516, |
|
"grad_norm": 0.7485630512237549, |
|
"learning_rate": 5.300000000000001e-06, |
|
"loss": 0.0191, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 15.64493758668516, |
|
"eval_accuracy": 0.9899421402692911, |
|
"eval_f1": 0.947709393799167, |
|
"eval_loss": 0.04913439229130745, |
|
"eval_precision": 0.9442139234670355, |
|
"eval_recall": 0.9512308406874129, |
|
"eval_runtime": 36.5589, |
|
"eval_samples_per_second": 5.443, |
|
"eval_steps_per_second": 1.368, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 15.811373092926491, |
|
"grad_norm": 0.8376514911651611, |
|
"learning_rate": 5.2500000000000006e-06, |
|
"loss": 0.0203, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 15.811373092926491, |
|
"eval_accuracy": 0.9912399286216407, |
|
"eval_f1": 0.9531974050046339, |
|
"eval_loss": 0.04510456323623657, |
|
"eval_precision": 0.9509939898289412, |
|
"eval_recall": 0.9554110543427775, |
|
"eval_runtime": 36.8157, |
|
"eval_samples_per_second": 5.405, |
|
"eval_steps_per_second": 1.358, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 15.977808599167822, |
|
"grad_norm": 1.1797449588775635, |
|
"learning_rate": 5.2e-06, |
|
"loss": 0.0198, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 15.977808599167822, |
|
"eval_accuracy": 0.9911317795922782, |
|
"eval_f1": 0.952292728114868, |
|
"eval_loss": 0.04465332254767418, |
|
"eval_precision": 0.9496535796766744, |
|
"eval_recall": 0.9549465861588481, |
|
"eval_runtime": 36.3506, |
|
"eval_samples_per_second": 5.474, |
|
"eval_steps_per_second": 1.375, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 16.144244105409154, |
|
"grad_norm": 2.32300066947937, |
|
"learning_rate": 5.150000000000001e-06, |
|
"loss": 0.0167, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 16.144244105409154, |
|
"eval_accuracy": 0.9909154815335532, |
|
"eval_f1": 0.9513663733209818, |
|
"eval_loss": 0.044419851154088974, |
|
"eval_precision": 0.948729792147806, |
|
"eval_recall": 0.9540176497909894, |
|
"eval_runtime": 36.4511, |
|
"eval_samples_per_second": 5.459, |
|
"eval_steps_per_second": 1.372, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 16.310679611650485, |
|
"grad_norm": 1.4079307317733765, |
|
"learning_rate": 5.1e-06, |
|
"loss": 0.0178, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 16.310679611650485, |
|
"eval_accuracy": 0.9891850970637539, |
|
"eval_f1": 0.9448673587081892, |
|
"eval_loss": 0.05134458467364311, |
|
"eval_precision": 0.9385884509624198, |
|
"eval_recall": 0.9512308406874129, |
|
"eval_runtime": 35.9882, |
|
"eval_samples_per_second": 5.53, |
|
"eval_steps_per_second": 1.389, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 16.477115117891817, |
|
"grad_norm": 1.1276496648788452, |
|
"learning_rate": 5.050000000000001e-06, |
|
"loss": 0.024, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 16.477115117891817, |
|
"eval_accuracy": 0.9899421402692911, |
|
"eval_f1": 0.9482678983833718, |
|
"eval_loss": 0.0502447672188282, |
|
"eval_precision": 0.9430408819476344, |
|
"eval_recall": 0.9535531816070599, |
|
"eval_runtime": 36.2001, |
|
"eval_samples_per_second": 5.497, |
|
"eval_steps_per_second": 1.381, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 16.643550624133148, |
|
"grad_norm": 1.1420115232467651, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0206, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 16.643550624133148, |
|
"eval_accuracy": 0.9907532579895095, |
|
"eval_f1": 0.9513888888888888, |
|
"eval_loss": 0.045851416885852814, |
|
"eval_precision": 0.9483156437471159, |
|
"eval_recall": 0.9544821179749187, |
|
"eval_runtime": 36.0375, |
|
"eval_samples_per_second": 5.522, |
|
"eval_steps_per_second": 1.387, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 16.80998613037448, |
|
"grad_norm": 0.6803048849105835, |
|
"learning_rate": 4.95e-06, |
|
"loss": 0.0188, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 16.80998613037448, |
|
"eval_accuracy": 0.9906451089601471, |
|
"eval_f1": 0.9507058551261283, |
|
"eval_loss": 0.04693201929330826, |
|
"eval_precision": 0.9474169741697417, |
|
"eval_recall": 0.9540176497909894, |
|
"eval_runtime": 36.4292, |
|
"eval_samples_per_second": 5.463, |
|
"eval_steps_per_second": 1.373, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 16.97642163661581, |
|
"grad_norm": 0.6494084000587463, |
|
"learning_rate": 4.9000000000000005e-06, |
|
"loss": 0.016, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 16.97642163661581, |
|
"eval_accuracy": 0.9905910344454658, |
|
"eval_f1": 0.9524469067405354, |
|
"eval_loss": 0.04632224142551422, |
|
"eval_precision": 0.9467645709040844, |
|
"eval_recall": 0.9581978634463539, |
|
"eval_runtime": 36.8269, |
|
"eval_samples_per_second": 5.404, |
|
"eval_steps_per_second": 1.358, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 17.142857142857142, |
|
"grad_norm": 0.9313808083534241, |
|
"learning_rate": 4.85e-06, |
|
"loss": 0.0161, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 17.142857142857142, |
|
"eval_accuracy": 0.991077705077597, |
|
"eval_f1": 0.9555966697502312, |
|
"eval_loss": 0.045460253953933716, |
|
"eval_precision": 0.9516351911561493, |
|
"eval_recall": 0.9595912679981421, |
|
"eval_runtime": 36.5267, |
|
"eval_samples_per_second": 5.448, |
|
"eval_steps_per_second": 1.369, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 17.309292649098474, |
|
"grad_norm": 0.6977990865707397, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 0.0135, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 17.309292649098474, |
|
"eval_accuracy": 0.9909154815335532, |
|
"eval_f1": 0.9548297428769978, |
|
"eval_loss": 0.04745380952954292, |
|
"eval_precision": 0.9524029574861368, |
|
"eval_recall": 0.9572689270784951, |
|
"eval_runtime": 36.396, |
|
"eval_samples_per_second": 5.468, |
|
"eval_steps_per_second": 1.374, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 17.475728155339805, |
|
"grad_norm": 0.7467624545097351, |
|
"learning_rate": 4.75e-06, |
|
"loss": 0.0148, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 17.475728155339805, |
|
"eval_accuracy": 0.9904828854161034, |
|
"eval_f1": 0.9491916859122401, |
|
"eval_loss": 0.047850631177425385, |
|
"eval_precision": 0.9439595774000918, |
|
"eval_recall": 0.9544821179749187, |
|
"eval_runtime": 36.2126, |
|
"eval_samples_per_second": 5.495, |
|
"eval_steps_per_second": 1.381, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 17.642163661581137, |
|
"grad_norm": 0.7804221510887146, |
|
"learning_rate": 4.7e-06, |
|
"loss": 0.0173, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 17.642163661581137, |
|
"eval_accuracy": 0.9915103011950468, |
|
"eval_f1": 0.9571858366118954, |
|
"eval_loss": 0.04551170393824577, |
|
"eval_precision": 0.9538745387453874, |
|
"eval_recall": 0.9605202043660009, |
|
"eval_runtime": 36.006, |
|
"eval_samples_per_second": 5.527, |
|
"eval_steps_per_second": 1.389, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 17.808599167822468, |
|
"grad_norm": 1.0907295942306519, |
|
"learning_rate": 4.65e-06, |
|
"loss": 0.0173, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 17.808599167822468, |
|
"eval_accuracy": 0.9913480776510031, |
|
"eval_f1": 0.9514338575393155, |
|
"eval_loss": 0.04557771980762482, |
|
"eval_precision": 0.9474896361123906, |
|
"eval_recall": 0.9554110543427775, |
|
"eval_runtime": 36.2064, |
|
"eval_samples_per_second": 5.496, |
|
"eval_steps_per_second": 1.381, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 17.9750346740638, |
|
"grad_norm": 1.295432209968567, |
|
"learning_rate": 4.600000000000001e-06, |
|
"loss": 0.0185, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 17.9750346740638, |
|
"eval_accuracy": 0.9907532579895095, |
|
"eval_f1": 0.9537465309898243, |
|
"eval_loss": 0.04614636301994324, |
|
"eval_precision": 0.9497927222478121, |
|
"eval_recall": 0.9577333952624245, |
|
"eval_runtime": 36.269, |
|
"eval_samples_per_second": 5.487, |
|
"eval_steps_per_second": 1.379, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 18.14147018030513, |
|
"grad_norm": 1.0728676319122314, |
|
"learning_rate": 4.5500000000000005e-06, |
|
"loss": 0.0153, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 18.14147018030513, |
|
"eval_accuracy": 0.991077705077597, |
|
"eval_f1": 0.9547553093259464, |
|
"eval_loss": 0.04719853028655052, |
|
"eval_precision": 0.9490592014685636, |
|
"eval_recall": 0.9605202043660009, |
|
"eval_runtime": 36.7621, |
|
"eval_samples_per_second": 5.413, |
|
"eval_steps_per_second": 1.36, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 18.307905686546462, |
|
"grad_norm": 0.848417341709137, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.0148, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 18.307905686546462, |
|
"eval_accuracy": 0.9913480776510031, |
|
"eval_f1": 0.9546716003700277, |
|
"eval_loss": 0.04460978880524635, |
|
"eval_precision": 0.9507139567019807, |
|
"eval_recall": 0.9586623316302834, |
|
"eval_runtime": 36.6036, |
|
"eval_samples_per_second": 5.437, |
|
"eval_steps_per_second": 1.366, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 18.474341192787794, |
|
"grad_norm": 0.8914014101028442, |
|
"learning_rate": 4.450000000000001e-06, |
|
"loss": 0.0136, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 18.474341192787794, |
|
"eval_accuracy": 0.9914021521656843, |
|
"eval_f1": 0.9542936288088641, |
|
"eval_loss": 0.044093821197748184, |
|
"eval_precision": 0.9486002753556677, |
|
"eval_recall": 0.9600557361820715, |
|
"eval_runtime": 36.4626, |
|
"eval_samples_per_second": 5.458, |
|
"eval_steps_per_second": 1.371, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 18.640776699029125, |
|
"grad_norm": 1.768336534500122, |
|
"learning_rate": 4.4e-06, |
|
"loss": 0.0185, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 18.640776699029125, |
|
"eval_accuracy": 0.9914562266803656, |
|
"eval_f1": 0.9550509731232623, |
|
"eval_loss": 0.047818973660469055, |
|
"eval_precision": 0.9528432732316228, |
|
"eval_recall": 0.9572689270784951, |
|
"eval_runtime": 35.9606, |
|
"eval_samples_per_second": 5.534, |
|
"eval_steps_per_second": 1.39, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 18.807212205270456, |
|
"grad_norm": 0.8891735672950745, |
|
"learning_rate": 4.353333333333334e-06, |
|
"loss": 0.0147, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 18.807212205270456, |
|
"eval_accuracy": 0.9911858541069594, |
|
"eval_f1": 0.9582660825455385, |
|
"eval_loss": 0.04927229881286621, |
|
"eval_precision": 0.9514652014652014, |
|
"eval_recall": 0.965164886205295, |
|
"eval_runtime": 36.0431, |
|
"eval_samples_per_second": 5.521, |
|
"eval_steps_per_second": 1.387, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 18.973647711511788, |
|
"grad_norm": 0.860618531703949, |
|
"learning_rate": 4.303333333333334e-06, |
|
"loss": 0.0156, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 18.973647711511788, |
|
"eval_accuracy": 0.9902665873573785, |
|
"eval_f1": 0.9491916859122401, |
|
"eval_loss": 0.05092372000217438, |
|
"eval_precision": 0.9439595774000918, |
|
"eval_recall": 0.9544821179749187, |
|
"eval_runtime": 36.549, |
|
"eval_samples_per_second": 5.445, |
|
"eval_steps_per_second": 1.368, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 19.14008321775312, |
|
"grad_norm": 0.4298454821109772, |
|
"learning_rate": 4.253333333333334e-06, |
|
"loss": 0.0113, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 19.14008321775312, |
|
"eval_accuracy": 0.9911317795922782, |
|
"eval_f1": 0.9566024599675098, |
|
"eval_loss": 0.046022918075323105, |
|
"eval_precision": 0.9559369202226345, |
|
"eval_recall": 0.9572689270784951, |
|
"eval_runtime": 36.802, |
|
"eval_samples_per_second": 5.407, |
|
"eval_steps_per_second": 1.359, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 19.30651872399445, |
|
"grad_norm": 0.7119155526161194, |
|
"learning_rate": 4.2033333333333335e-06, |
|
"loss": 0.014, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 19.30651872399445, |
|
"eval_accuracy": 0.9904828854161034, |
|
"eval_f1": 0.948220064724919, |
|
"eval_loss": 0.04928451031446457, |
|
"eval_precision": 0.9438564196962724, |
|
"eval_recall": 0.9526242452392011, |
|
"eval_runtime": 36.4604, |
|
"eval_samples_per_second": 5.458, |
|
"eval_steps_per_second": 1.371, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 19.472954230235782, |
|
"grad_norm": 0.6270649433135986, |
|
"learning_rate": 4.153333333333334e-06, |
|
"loss": 0.0147, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 19.472954230235782, |
|
"eval_accuracy": 0.9906451089601471, |
|
"eval_f1": 0.9521608504737693, |
|
"eval_loss": 0.04984944686293602, |
|
"eval_precision": 0.9475620975160993, |
|
"eval_recall": 0.9568044588945657, |
|
"eval_runtime": 36.1824, |
|
"eval_samples_per_second": 5.5, |
|
"eval_steps_per_second": 1.382, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 19.639389736477114, |
|
"grad_norm": 0.9536636471748352, |
|
"learning_rate": 4.1033333333333336e-06, |
|
"loss": 0.0126, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 19.639389736477114, |
|
"eval_accuracy": 0.9905910344454658, |
|
"eval_f1": 0.9502199583236861, |
|
"eval_loss": 0.04928808659315109, |
|
"eval_precision": 0.9473684210526315, |
|
"eval_recall": 0.9530887134231305, |
|
"eval_runtime": 36.346, |
|
"eval_samples_per_second": 5.475, |
|
"eval_steps_per_second": 1.376, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 19.805825242718445, |
|
"grad_norm": 2.24277925491333, |
|
"learning_rate": 4.053333333333333e-06, |
|
"loss": 0.0167, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 19.805825242718445, |
|
"eval_accuracy": 0.9903747363867409, |
|
"eval_f1": 0.9519852262234534, |
|
"eval_loss": 0.04912427067756653, |
|
"eval_precision": 0.9463056447911886, |
|
"eval_recall": 0.9577333952624245, |
|
"eval_runtime": 36.3829, |
|
"eval_samples_per_second": 5.47, |
|
"eval_steps_per_second": 1.374, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 19.972260748959776, |
|
"grad_norm": 1.1929985284805298, |
|
"learning_rate": 4.003333333333334e-06, |
|
"loss": 0.0126, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 19.972260748959776, |
|
"eval_accuracy": 0.9907532579895095, |
|
"eval_f1": 0.9515867500579105, |
|
"eval_loss": 0.04741891101002693, |
|
"eval_precision": 0.9491682070240296, |
|
"eval_recall": 0.9540176497909894, |
|
"eval_runtime": 36.3224, |
|
"eval_samples_per_second": 5.479, |
|
"eval_steps_per_second": 1.377, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 20.13869625520111, |
|
"grad_norm": 0.5980396866798401, |
|
"learning_rate": 3.953333333333333e-06, |
|
"loss": 0.0107, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 20.13869625520111, |
|
"eval_accuracy": 0.9914021521656843, |
|
"eval_f1": 0.9550717924965262, |
|
"eval_loss": 0.04617602005600929, |
|
"eval_precision": 0.9524249422632795, |
|
"eval_recall": 0.9577333952624245, |
|
"eval_runtime": 36.2289, |
|
"eval_samples_per_second": 5.493, |
|
"eval_steps_per_second": 1.38, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 20.305131761442443, |
|
"grad_norm": 0.5774451494216919, |
|
"learning_rate": 3.903333333333334e-06, |
|
"loss": 0.0115, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 20.305131761442443, |
|
"eval_accuracy": 0.9911317795922782, |
|
"eval_f1": 0.9558993304086816, |
|
"eval_loss": 0.048068635165691376, |
|
"eval_precision": 0.9504132231404959, |
|
"eval_recall": 0.9614491407338597, |
|
"eval_runtime": 36.6091, |
|
"eval_samples_per_second": 5.436, |
|
"eval_steps_per_second": 1.366, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 20.471567267683774, |
|
"grad_norm": 0.8061049580574036, |
|
"learning_rate": 3.853333333333334e-06, |
|
"loss": 0.0128, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 20.471567267683774, |
|
"eval_accuracy": 0.9906991834748283, |
|
"eval_f1": 0.951918631530282, |
|
"eval_loss": 0.04859815165400505, |
|
"eval_precision": 0.9475379659456972, |
|
"eval_recall": 0.9563399907106364, |
|
"eval_runtime": 36.2061, |
|
"eval_samples_per_second": 5.496, |
|
"eval_steps_per_second": 1.381, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 20.638002773925106, |
|
"grad_norm": 0.5735962986946106, |
|
"learning_rate": 3.803333333333334e-06, |
|
"loss": 0.0113, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 20.638002773925106, |
|
"eval_accuracy": 0.9910236305629156, |
|
"eval_f1": 0.9533702677746998, |
|
"eval_loss": 0.04910165071487427, |
|
"eval_precision": 0.947682423129876, |
|
"eval_recall": 0.9591267998142127, |
|
"eval_runtime": 36.2871, |
|
"eval_samples_per_second": 5.484, |
|
"eval_steps_per_second": 1.378, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 20.804438280166437, |
|
"grad_norm": 0.5703373551368713, |
|
"learning_rate": 3.753333333333334e-06, |
|
"loss": 0.0119, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 20.804438280166437, |
|
"eval_accuracy": 0.9901043638133348, |
|
"eval_f1": 0.9498607242339832, |
|
"eval_loss": 0.05141424015164375, |
|
"eval_precision": 0.9494199535962877, |
|
"eval_recall": 0.9503019043195541, |
|
"eval_runtime": 36.2237, |
|
"eval_samples_per_second": 5.494, |
|
"eval_steps_per_second": 1.38, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 20.97087378640777, |
|
"grad_norm": 0.8812251091003418, |
|
"learning_rate": 3.7033333333333336e-06, |
|
"loss": 0.0122, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 20.97087378640777, |
|
"eval_accuracy": 0.9911317795922782, |
|
"eval_f1": 0.9535903948279844, |
|
"eval_loss": 0.04799521341919899, |
|
"eval_precision": 0.94811753902663, |
|
"eval_recall": 0.9591267998142127, |
|
"eval_runtime": 36.3818, |
|
"eval_samples_per_second": 5.47, |
|
"eval_steps_per_second": 1.374, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 21.1373092926491, |
|
"grad_norm": 0.729183554649353, |
|
"learning_rate": 3.6533333333333336e-06, |
|
"loss": 0.0123, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 21.1373092926491, |
|
"eval_accuracy": 0.9909154815335532, |
|
"eval_f1": 0.9522050334795659, |
|
"eval_loss": 0.04769909009337425, |
|
"eval_precision": 0.9467401285583104, |
|
"eval_recall": 0.9577333952624245, |
|
"eval_runtime": 36.5345, |
|
"eval_samples_per_second": 5.447, |
|
"eval_steps_per_second": 1.369, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 21.30374479889043, |
|
"grad_norm": 0.3428969085216522, |
|
"learning_rate": 3.6033333333333337e-06, |
|
"loss": 0.0116, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 21.30374479889043, |
|
"eval_accuracy": 0.9910236305629156, |
|
"eval_f1": 0.9533271719038817, |
|
"eval_loss": 0.04861655458807945, |
|
"eval_precision": 0.9485057471264368, |
|
"eval_recall": 0.9581978634463539, |
|
"eval_runtime": 36.8199, |
|
"eval_samples_per_second": 5.405, |
|
"eval_steps_per_second": 1.358, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 21.470180305131763, |
|
"grad_norm": 0.4823513925075531, |
|
"learning_rate": 3.5533333333333338e-06, |
|
"loss": 0.0108, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 21.470180305131763, |
|
"eval_accuracy": 0.9904828854161034, |
|
"eval_f1": 0.9511295527893039, |
|
"eval_loss": 0.048778366297483444, |
|
"eval_precision": 0.9441647597254005, |
|
"eval_recall": 0.9581978634463539, |
|
"eval_runtime": 36.5655, |
|
"eval_samples_per_second": 5.442, |
|
"eval_steps_per_second": 1.367, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 21.636615811373094, |
|
"grad_norm": 0.3686061203479767, |
|
"learning_rate": 3.5033333333333334e-06, |
|
"loss": 0.0115, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 21.636615811373094, |
|
"eval_accuracy": 0.9912940031363219, |
|
"eval_f1": 0.9542302357836339, |
|
"eval_loss": 0.04717012122273445, |
|
"eval_precision": 0.9498389323515877, |
|
"eval_recall": 0.9586623316302834, |
|
"eval_runtime": 36.5437, |
|
"eval_samples_per_second": 5.446, |
|
"eval_steps_per_second": 1.368, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 21.803051317614425, |
|
"grad_norm": 1.0370802879333496, |
|
"learning_rate": 3.4533333333333334e-06, |
|
"loss": 0.0083, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 21.803051317614425, |
|
"eval_accuracy": 0.991077705077597, |
|
"eval_f1": 0.954272517321016, |
|
"eval_loss": 0.04759324714541435, |
|
"eval_precision": 0.9490124023886082, |
|
"eval_recall": 0.9595912679981421, |
|
"eval_runtime": 36.2291, |
|
"eval_samples_per_second": 5.493, |
|
"eval_steps_per_second": 1.38, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 21.969486823855757, |
|
"grad_norm": 1.2627676725387573, |
|
"learning_rate": 3.4033333333333335e-06, |
|
"loss": 0.0094, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 21.969486823855757, |
|
"eval_accuracy": 0.990861407018872, |
|
"eval_f1": 0.9543147208121827, |
|
"eval_loss": 0.047525253146886826, |
|
"eval_precision": 0.948188904172398, |
|
"eval_recall": 0.9605202043660009, |
|
"eval_runtime": 36.2268, |
|
"eval_samples_per_second": 5.493, |
|
"eval_steps_per_second": 1.38, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 22.135922330097088, |
|
"grad_norm": 0.2426026463508606, |
|
"learning_rate": 3.3533333333333336e-06, |
|
"loss": 0.0118, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 22.135922330097088, |
|
"eval_accuracy": 0.9904288109014222, |
|
"eval_f1": 0.9501154734411085, |
|
"eval_loss": 0.049215689301490784, |
|
"eval_precision": 0.9448782728525493, |
|
"eval_recall": 0.9554110543427775, |
|
"eval_runtime": 36.1992, |
|
"eval_samples_per_second": 5.497, |
|
"eval_steps_per_second": 1.381, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 22.30235783633842, |
|
"grad_norm": 0.6006263494491577, |
|
"learning_rate": 3.303333333333333e-06, |
|
"loss": 0.01, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 22.30235783633842, |
|
"eval_accuracy": 0.990861407018872, |
|
"eval_f1": 0.9523148148148148, |
|
"eval_loss": 0.048562802374362946, |
|
"eval_precision": 0.949238578680203, |
|
"eval_recall": 0.9554110543427775, |
|
"eval_runtime": 36.2887, |
|
"eval_samples_per_second": 5.484, |
|
"eval_steps_per_second": 1.378, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 22.46879334257975, |
|
"grad_norm": 0.7383334040641785, |
|
"learning_rate": 3.2533333333333332e-06, |
|
"loss": 0.0114, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 22.46879334257975, |
|
"eval_accuracy": 0.9910236305629156, |
|
"eval_f1": 0.9539671524404348, |
|
"eval_loss": 0.04967198148369789, |
|
"eval_precision": 0.9502304147465438, |
|
"eval_recall": 0.9577333952624245, |
|
"eval_runtime": 36.3824, |
|
"eval_samples_per_second": 5.47, |
|
"eval_steps_per_second": 1.374, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 22.635228848821082, |
|
"grad_norm": 0.5105836987495422, |
|
"learning_rate": 3.2033333333333337e-06, |
|
"loss": 0.0091, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 22.635228848821082, |
|
"eval_accuracy": 0.9909695560482344, |
|
"eval_f1": 0.954209065679926, |
|
"eval_loss": 0.049895454198122025, |
|
"eval_precision": 0.9502533394748963, |
|
"eval_recall": 0.9581978634463539, |
|
"eval_runtime": 36.2966, |
|
"eval_samples_per_second": 5.483, |
|
"eval_steps_per_second": 1.378, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 22.801664355062414, |
|
"grad_norm": 0.8460143804550171, |
|
"learning_rate": 3.1533333333333338e-06, |
|
"loss": 0.0077, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 22.801664355062414, |
|
"eval_accuracy": 0.991077705077597, |
|
"eval_f1": 0.9563409563409564, |
|
"eval_loss": 0.05023453012108803, |
|
"eval_precision": 0.9512867647058824, |
|
"eval_recall": 0.9614491407338597, |
|
"eval_runtime": 36.5792, |
|
"eval_samples_per_second": 5.44, |
|
"eval_steps_per_second": 1.367, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 22.968099861303745, |
|
"grad_norm": 0.46876421570777893, |
|
"learning_rate": 3.103333333333334e-06, |
|
"loss": 0.01, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 22.968099861303745, |
|
"eval_accuracy": 0.9912940031363219, |
|
"eval_f1": 0.9586127167630057, |
|
"eval_loss": 0.05132585018873215, |
|
"eval_precision": 0.9544198895027625, |
|
"eval_recall": 0.9628425452856479, |
|
"eval_runtime": 36.5943, |
|
"eval_samples_per_second": 5.438, |
|
"eval_steps_per_second": 1.366, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 23.134535367545077, |
|
"grad_norm": 0.26761332154273987, |
|
"learning_rate": 3.053333333333334e-06, |
|
"loss": 0.0087, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 23.134535367545077, |
|
"eval_accuracy": 0.9911858541069594, |
|
"eval_f1": 0.9554375432925422, |
|
"eval_loss": 0.04853161796927452, |
|
"eval_precision": 0.9499540863177227, |
|
"eval_recall": 0.9609846725499304, |
|
"eval_runtime": 36.2471, |
|
"eval_samples_per_second": 5.49, |
|
"eval_steps_per_second": 1.379, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 23.300970873786408, |
|
"grad_norm": 0.32841914892196655, |
|
"learning_rate": 3.0033333333333335e-06, |
|
"loss": 0.0073, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 23.300970873786408, |
|
"eval_accuracy": 0.9916725247390905, |
|
"eval_f1": 0.959278111985192, |
|
"eval_loss": 0.04846283420920372, |
|
"eval_precision": 0.9557399723374828, |
|
"eval_recall": 0.9628425452856479, |
|
"eval_runtime": 36.0113, |
|
"eval_samples_per_second": 5.526, |
|
"eval_steps_per_second": 1.388, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 23.46740638002774, |
|
"grad_norm": 0.3114074766635895, |
|
"learning_rate": 2.9533333333333336e-06, |
|
"loss": 0.0083, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 23.46740638002774, |
|
"eval_accuracy": 0.9913480776510031, |
|
"eval_f1": 0.957205644228545, |
|
"eval_loss": 0.04847896471619606, |
|
"eval_precision": 0.9534562211981567, |
|
"eval_recall": 0.9609846725499304, |
|
"eval_runtime": 36.2766, |
|
"eval_samples_per_second": 5.486, |
|
"eval_steps_per_second": 1.378, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 23.63384188626907, |
|
"grad_norm": 0.815006673336029, |
|
"learning_rate": 2.9033333333333336e-06, |
|
"loss": 0.0117, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 23.63384188626907, |
|
"eval_accuracy": 0.991564375709728, |
|
"eval_f1": 0.9590372598935432, |
|
"eval_loss": 0.04786692187190056, |
|
"eval_precision": 0.955719557195572, |
|
"eval_recall": 0.9623780771017185, |
|
"eval_runtime": 36.5057, |
|
"eval_samples_per_second": 5.451, |
|
"eval_steps_per_second": 1.37, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 23.800277392510402, |
|
"grad_norm": 0.34551236033439636, |
|
"learning_rate": 2.8533333333333337e-06, |
|
"loss": 0.0095, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 23.800277392510402, |
|
"eval_accuracy": 0.991077705077597, |
|
"eval_f1": 0.9542302357836339, |
|
"eval_loss": 0.05084284767508507, |
|
"eval_precision": 0.9498389323515877, |
|
"eval_recall": 0.9586623316302834, |
|
"eval_runtime": 36.5022, |
|
"eval_samples_per_second": 5.452, |
|
"eval_steps_per_second": 1.37, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 23.966712898751734, |
|
"grad_norm": 0.988761305809021, |
|
"learning_rate": 2.8033333333333333e-06, |
|
"loss": 0.009, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 23.966712898751734, |
|
"eval_accuracy": 0.9909695560482344, |
|
"eval_f1": 0.9559603412497119, |
|
"eval_loss": 0.051338665187358856, |
|
"eval_precision": 0.9491758241758241, |
|
"eval_recall": 0.9628425452856479, |
|
"eval_runtime": 36.4961, |
|
"eval_samples_per_second": 5.453, |
|
"eval_steps_per_second": 1.37, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 24.133148404993065, |
|
"grad_norm": 0.20439928770065308, |
|
"learning_rate": 2.7533333333333334e-06, |
|
"loss": 0.0077, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 24.133148404993065, |
|
"eval_accuracy": 0.9915103011950468, |
|
"eval_f1": 0.9590562109646079, |
|
"eval_loss": 0.050405893474817276, |
|
"eval_precision": 0.9552995391705069, |
|
"eval_recall": 0.9628425452856479, |
|
"eval_runtime": 36.5792, |
|
"eval_samples_per_second": 5.44, |
|
"eval_steps_per_second": 1.367, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 24.299583911234397, |
|
"grad_norm": 0.6065575480461121, |
|
"learning_rate": 2.7033333333333334e-06, |
|
"loss": 0.0087, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 24.299583911234397, |
|
"eval_accuracy": 0.9912940031363219, |
|
"eval_f1": 0.9565418400369857, |
|
"eval_loss": 0.04999900609254837, |
|
"eval_precision": 0.9521398987574782, |
|
"eval_recall": 0.9609846725499304, |
|
"eval_runtime": 36.2889, |
|
"eval_samples_per_second": 5.484, |
|
"eval_steps_per_second": 1.378, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 24.466019417475728, |
|
"grad_norm": 0.4505390226840973, |
|
"learning_rate": 2.6533333333333335e-06, |
|
"loss": 0.0068, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 24.466019417475728, |
|
"eval_accuracy": 0.9912940031363219, |
|
"eval_f1": 0.9574271170754282, |
|
"eval_loss": 0.05055619403719902, |
|
"eval_precision": 0.9538958045182112, |
|
"eval_recall": 0.9609846725499304, |
|
"eval_runtime": 35.8763, |
|
"eval_samples_per_second": 5.547, |
|
"eval_steps_per_second": 1.394, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 24.63245492371706, |
|
"grad_norm": 0.2784092128276825, |
|
"learning_rate": 2.603333333333334e-06, |
|
"loss": 0.0094, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 24.63245492371706, |
|
"eval_accuracy": 0.9913480776510031, |
|
"eval_f1": 0.9549132947976879, |
|
"eval_loss": 0.050024211406707764, |
|
"eval_precision": 0.9507366482504604, |
|
"eval_recall": 0.9591267998142127, |
|
"eval_runtime": 36.3659, |
|
"eval_samples_per_second": 5.472, |
|
"eval_steps_per_second": 1.375, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 24.79889042995839, |
|
"grad_norm": 0.24667127430438995, |
|
"learning_rate": 2.5533333333333336e-06, |
|
"loss": 0.0088, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 24.79889042995839, |
|
"eval_accuracy": 0.9914021521656843, |
|
"eval_f1": 0.9551548774849746, |
|
"eval_loss": 0.048643559217453, |
|
"eval_precision": 0.9507593189139438, |
|
"eval_recall": 0.9595912679981421, |
|
"eval_runtime": 36.4912, |
|
"eval_samples_per_second": 5.453, |
|
"eval_steps_per_second": 1.37, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 24.965325936199722, |
|
"grad_norm": 0.10884588211774826, |
|
"learning_rate": 2.5033333333333336e-06, |
|
"loss": 0.0089, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 24.965325936199722, |
|
"eval_accuracy": 0.991077705077597, |
|
"eval_f1": 0.9558789558789559, |
|
"eval_loss": 0.05070747807621956, |
|
"eval_precision": 0.9508272058823529, |
|
"eval_recall": 0.9609846725499304, |
|
"eval_runtime": 36.2816, |
|
"eval_samples_per_second": 5.485, |
|
"eval_steps_per_second": 1.378, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 25.131761442441054, |
|
"grad_norm": 0.6150490641593933, |
|
"learning_rate": 2.4533333333333333e-06, |
|
"loss": 0.0063, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 25.131761442441054, |
|
"eval_accuracy": 0.9916725247390905, |
|
"eval_f1": 0.9585360203845263, |
|
"eval_loss": 0.04789712280035019, |
|
"eval_precision": 0.9560998151571165, |
|
"eval_recall": 0.9609846725499304, |
|
"eval_runtime": 36.2469, |
|
"eval_samples_per_second": 5.49, |
|
"eval_steps_per_second": 1.379, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 25.298196948682385, |
|
"grad_norm": 0.7432591319084167, |
|
"learning_rate": 2.4033333333333338e-06, |
|
"loss": 0.0058, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 25.298196948682385, |
|
"eval_accuracy": 0.991077705077597, |
|
"eval_f1": 0.9572452045296973, |
|
"eval_loss": 0.050580546259880066, |
|
"eval_precision": 0.952621895124195, |
|
"eval_recall": 0.9619136089177891, |
|
"eval_runtime": 36.4588, |
|
"eval_samples_per_second": 5.458, |
|
"eval_steps_per_second": 1.371, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 25.464632454923716, |
|
"grad_norm": 0.742586612701416, |
|
"learning_rate": 2.3533333333333334e-06, |
|
"loss": 0.0102, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 25.464632454923716, |
|
"eval_accuracy": 0.9912399286216407, |
|
"eval_f1": 0.9574861367837338, |
|
"eval_loss": 0.04992222413420677, |
|
"eval_precision": 0.9526436781609195, |
|
"eval_recall": 0.9623780771017185, |
|
"eval_runtime": 36.5208, |
|
"eval_samples_per_second": 5.449, |
|
"eval_steps_per_second": 1.369, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 25.631067961165048, |
|
"grad_norm": 0.9237321019172668, |
|
"learning_rate": 2.3033333333333334e-06, |
|
"loss": 0.0079, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 25.631067961165048, |
|
"eval_accuracy": 0.9905369599307846, |
|
"eval_f1": 0.9541368979027426, |
|
"eval_loss": 0.05427027493715286, |
|
"eval_precision": 0.9469350411710887, |
|
"eval_recall": 0.9614491407338597, |
|
"eval_runtime": 36.0226, |
|
"eval_samples_per_second": 5.524, |
|
"eval_steps_per_second": 1.388, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 25.79750346740638, |
|
"grad_norm": 0.2974264621734619, |
|
"learning_rate": 2.2533333333333335e-06, |
|
"loss": 0.009, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 25.79750346740638, |
|
"eval_accuracy": 0.9914562266803656, |
|
"eval_f1": 0.9572452045296973, |
|
"eval_loss": 0.049834854900836945, |
|
"eval_precision": 0.952621895124195, |
|
"eval_recall": 0.9619136089177891, |
|
"eval_runtime": 36.6625, |
|
"eval_samples_per_second": 5.428, |
|
"eval_steps_per_second": 1.364, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 25.96393897364771, |
|
"grad_norm": 0.6791291236877441, |
|
"learning_rate": 2.2033333333333336e-06, |
|
"loss": 0.0068, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 25.96393897364771, |
|
"eval_accuracy": 0.991077705077597, |
|
"eval_f1": 0.9563611175248211, |
|
"eval_loss": 0.05109778791666031, |
|
"eval_precision": 0.950872359963269, |
|
"eval_recall": 0.9619136089177891, |
|
"eval_runtime": 35.8655, |
|
"eval_samples_per_second": 5.549, |
|
"eval_steps_per_second": 1.394, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 26.130374479889042, |
|
"grad_norm": 0.5723872184753418, |
|
"learning_rate": 2.153333333333333e-06, |
|
"loss": 0.007, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 26.130374479889042, |
|
"eval_accuracy": 0.9914021521656843, |
|
"eval_f1": 0.9579676674364895, |
|
"eval_loss": 0.049178168177604675, |
|
"eval_precision": 0.9526871841984382, |
|
"eval_recall": 0.9633070134695774, |
|
"eval_runtime": 35.9503, |
|
"eval_samples_per_second": 5.535, |
|
"eval_steps_per_second": 1.391, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 26.296809986130373, |
|
"grad_norm": 0.3830583393573761, |
|
"learning_rate": 2.1033333333333337e-06, |
|
"loss": 0.0086, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 26.296809986130373, |
|
"eval_accuracy": 0.9912940031363219, |
|
"eval_f1": 0.9554375432925422, |
|
"eval_loss": 0.05156167596578598, |
|
"eval_precision": 0.9499540863177227, |
|
"eval_recall": 0.9609846725499304, |
|
"eval_runtime": 35.8417, |
|
"eval_samples_per_second": 5.552, |
|
"eval_steps_per_second": 1.395, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 26.463245492371705, |
|
"grad_norm": 0.14329634606838226, |
|
"learning_rate": 2.0533333333333337e-06, |
|
"loss": 0.0078, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 26.463245492371705, |
|
"eval_accuracy": 0.9914021521656843, |
|
"eval_f1": 0.9556581986143187, |
|
"eval_loss": 0.05027909576892853, |
|
"eval_precision": 0.9503904455672945, |
|
"eval_recall": 0.9609846725499304, |
|
"eval_runtime": 35.8472, |
|
"eval_samples_per_second": 5.551, |
|
"eval_steps_per_second": 1.395, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 26.629680998613036, |
|
"grad_norm": 0.17582757771015167, |
|
"learning_rate": 2.0033333333333334e-06, |
|
"loss": 0.0067, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 26.629680998613036, |
|
"eval_accuracy": 0.9915103011950468, |
|
"eval_f1": 0.9577269577269578, |
|
"eval_loss": 0.05140436813235283, |
|
"eval_precision": 0.9526654411764706, |
|
"eval_recall": 0.9628425452856479, |
|
"eval_runtime": 35.8691, |
|
"eval_samples_per_second": 5.548, |
|
"eval_steps_per_second": 1.394, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 26.796116504854368, |
|
"grad_norm": 0.6374102830886841, |
|
"learning_rate": 1.9533333333333334e-06, |
|
"loss": 0.0059, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 26.796116504854368, |
|
"eval_accuracy": 0.9918888227978154, |
|
"eval_f1": 0.9588344125809436, |
|
"eval_loss": 0.05035752058029175, |
|
"eval_precision": 0.9548595117457392, |
|
"eval_recall": 0.9628425452856479, |
|
"eval_runtime": 35.7794, |
|
"eval_samples_per_second": 5.562, |
|
"eval_steps_per_second": 1.397, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 26.9625520110957, |
|
"grad_norm": 0.5752395987510681, |
|
"learning_rate": 1.9033333333333335e-06, |
|
"loss": 0.0089, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 26.9625520110957, |
|
"eval_accuracy": 0.9916184502244092, |
|
"eval_f1": 0.9560795191863154, |
|
"eval_loss": 0.051971472799777985, |
|
"eval_precision": 0.9516797054763001, |
|
"eval_recall": 0.9605202043660009, |
|
"eval_runtime": 36.1279, |
|
"eval_samples_per_second": 5.508, |
|
"eval_steps_per_second": 1.384, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 27.12898751733703, |
|
"grad_norm": 0.40148672461509705, |
|
"learning_rate": 1.8533333333333333e-06, |
|
"loss": 0.0059, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 27.12898751733703, |
|
"eval_accuracy": 0.9916725247390905, |
|
"eval_f1": 0.9572649572649573, |
|
"eval_loss": 0.05115849897265434, |
|
"eval_precision": 0.9522058823529411, |
|
"eval_recall": 0.9623780771017185, |
|
"eval_runtime": 36.2324, |
|
"eval_samples_per_second": 5.492, |
|
"eval_steps_per_second": 1.38, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 27.295423023578362, |
|
"grad_norm": 0.19672174751758575, |
|
"learning_rate": 1.8033333333333336e-06, |
|
"loss": 0.0073, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 27.295423023578362, |
|
"eval_accuracy": 0.9916184502244092, |
|
"eval_f1": 0.9569842738205366, |
|
"eval_loss": 0.05259764939546585, |
|
"eval_precision": 0.9530170428374021, |
|
"eval_recall": 0.9609846725499304, |
|
"eval_runtime": 36.3768, |
|
"eval_samples_per_second": 5.471, |
|
"eval_steps_per_second": 1.375, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 27.461858529819693, |
|
"grad_norm": 1.178671956062317, |
|
"learning_rate": 1.7533333333333336e-06, |
|
"loss": 0.0065, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 27.461858529819693, |
|
"eval_accuracy": 0.991564375709728, |
|
"eval_f1": 0.9577269577269578, |
|
"eval_loss": 0.052951879799366, |
|
"eval_precision": 0.9526654411764706, |
|
"eval_recall": 0.9628425452856479, |
|
"eval_runtime": 36.7573, |
|
"eval_samples_per_second": 5.414, |
|
"eval_steps_per_second": 1.36, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 27.628294036061025, |
|
"grad_norm": 0.8156425356864929, |
|
"learning_rate": 1.7033333333333335e-06, |
|
"loss": 0.0064, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 27.628294036061025, |
|
"eval_accuracy": 0.9916725247390905, |
|
"eval_f1": 0.957205644228545, |
|
"eval_loss": 0.05146779865026474, |
|
"eval_precision": 0.9534562211981567, |
|
"eval_recall": 0.9609846725499304, |
|
"eval_runtime": 35.9308, |
|
"eval_samples_per_second": 5.538, |
|
"eval_steps_per_second": 1.392, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 27.794729542302356, |
|
"grad_norm": 0.4098323881626129, |
|
"learning_rate": 1.6533333333333335e-06, |
|
"loss": 0.0072, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 27.794729542302356, |
|
"eval_accuracy": 0.9906991834748283, |
|
"eval_f1": 0.9545559400230681, |
|
"eval_loss": 0.054223690181970596, |
|
"eval_precision": 0.9482126489459212, |
|
"eval_recall": 0.9609846725499304, |
|
"eval_runtime": 35.9196, |
|
"eval_samples_per_second": 5.54, |
|
"eval_steps_per_second": 1.392, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 27.96116504854369, |
|
"grad_norm": 0.5159748792648315, |
|
"learning_rate": 1.6033333333333334e-06, |
|
"loss": 0.0066, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 27.96116504854369, |
|
"eval_accuracy": 0.990861407018872, |
|
"eval_f1": 0.9549965381952458, |
|
"eval_loss": 0.05374361574649811, |
|
"eval_precision": 0.9490825688073394, |
|
"eval_recall": 0.9609846725499304, |
|
"eval_runtime": 35.7031, |
|
"eval_samples_per_second": 5.574, |
|
"eval_steps_per_second": 1.4, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 28.127600554785023, |
|
"grad_norm": 0.499012291431427, |
|
"learning_rate": 1.5533333333333334e-06, |
|
"loss": 0.006, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 28.127600554785023, |
|
"eval_accuracy": 0.9915103011950468, |
|
"eval_f1": 0.9579482439926063, |
|
"eval_loss": 0.05182594433426857, |
|
"eval_precision": 0.953103448275862, |
|
"eval_recall": 0.9628425452856479, |
|
"eval_runtime": 35.8174, |
|
"eval_samples_per_second": 5.556, |
|
"eval_steps_per_second": 1.396, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 28.294036061026354, |
|
"grad_norm": 0.5842483639717102, |
|
"learning_rate": 1.5033333333333337e-06, |
|
"loss": 0.0074, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 28.294036061026354, |
|
"eval_accuracy": 0.9914021521656843, |
|
"eval_f1": 0.9565418400369857, |
|
"eval_loss": 0.05230095610022545, |
|
"eval_precision": 0.9521398987574782, |
|
"eval_recall": 0.9609846725499304, |
|
"eval_runtime": 35.928, |
|
"eval_samples_per_second": 5.539, |
|
"eval_steps_per_second": 1.392, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 28.460471567267685, |
|
"grad_norm": 0.4897175431251526, |
|
"learning_rate": 1.4533333333333335e-06, |
|
"loss": 0.0068, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 28.460471567267685, |
|
"eval_accuracy": 0.9912940031363219, |
|
"eval_f1": 0.955458112162474, |
|
"eval_loss": 0.05341142788529396, |
|
"eval_precision": 0.9495412844036697, |
|
"eval_recall": 0.9614491407338597, |
|
"eval_runtime": 36.0278, |
|
"eval_samples_per_second": 5.524, |
|
"eval_steps_per_second": 1.388, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 28.626907073509017, |
|
"grad_norm": 0.4191240668296814, |
|
"learning_rate": 1.4033333333333336e-06, |
|
"loss": 0.0055, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 28.626907073509017, |
|
"eval_accuracy": 0.9916725247390905, |
|
"eval_f1": 0.9583526145303101, |
|
"eval_loss": 0.05210199952125549, |
|
"eval_precision": 0.954817888427847, |
|
"eval_recall": 0.9619136089177891, |
|
"eval_runtime": 36.2636, |
|
"eval_samples_per_second": 5.488, |
|
"eval_steps_per_second": 1.379, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 28.793342579750348, |
|
"grad_norm": 0.6655350923538208, |
|
"learning_rate": 1.3533333333333334e-06, |
|
"loss": 0.0056, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 28.793342579750348, |
|
"eval_accuracy": 0.9912940031363219, |
|
"eval_f1": 0.9567829905246129, |
|
"eval_loss": 0.05259960889816284, |
|
"eval_precision": 0.952161913523459, |
|
"eval_recall": 0.9614491407338597, |
|
"eval_runtime": 36.0456, |
|
"eval_samples_per_second": 5.521, |
|
"eval_steps_per_second": 1.387, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 28.95977808599168, |
|
"grad_norm": 0.9510291814804077, |
|
"learning_rate": 1.3033333333333335e-06, |
|
"loss": 0.0066, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 28.95977808599168, |
|
"eval_accuracy": 0.9913480776510031, |
|
"eval_f1": 0.9570240295748613, |
|
"eval_loss": 0.05272991955280304, |
|
"eval_precision": 0.952183908045977, |
|
"eval_recall": 0.9619136089177891, |
|
"eval_runtime": 36.3753, |
|
"eval_samples_per_second": 5.471, |
|
"eval_steps_per_second": 1.375, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 29.12621359223301, |
|
"grad_norm": 0.33463072776794434, |
|
"learning_rate": 1.2533333333333333e-06, |
|
"loss": 0.0053, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 29.12621359223301, |
|
"eval_accuracy": 0.9912940031363219, |
|
"eval_f1": 0.9579482439926063, |
|
"eval_loss": 0.0533275306224823, |
|
"eval_precision": 0.953103448275862, |
|
"eval_recall": 0.9628425452856479, |
|
"eval_runtime": 35.8945, |
|
"eval_samples_per_second": 5.544, |
|
"eval_steps_per_second": 1.393, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 29.292649098474342, |
|
"grad_norm": 0.2936910092830658, |
|
"learning_rate": 1.2033333333333334e-06, |
|
"loss": 0.0063, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 29.292649098474342, |
|
"eval_accuracy": 0.9912940031363219, |
|
"eval_f1": 0.9569842738205366, |
|
"eval_loss": 0.05200694501399994, |
|
"eval_precision": 0.9530170428374021, |
|
"eval_recall": 0.9609846725499304, |
|
"eval_runtime": 35.7745, |
|
"eval_samples_per_second": 5.563, |
|
"eval_steps_per_second": 1.398, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 29.459084604715674, |
|
"grad_norm": 0.45608168840408325, |
|
"learning_rate": 1.1533333333333334e-06, |
|
"loss": 0.0059, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 29.459084604715674, |
|
"eval_accuracy": 0.9910236305629156, |
|
"eval_f1": 0.9554169554169554, |
|
"eval_loss": 0.0532723143696785, |
|
"eval_precision": 0.9503676470588235, |
|
"eval_recall": 0.9605202043660009, |
|
"eval_runtime": 35.9196, |
|
"eval_samples_per_second": 5.54, |
|
"eval_steps_per_second": 1.392, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 29.625520110957005, |
|
"grad_norm": 0.46974512934684753, |
|
"learning_rate": 1.1033333333333335e-06, |
|
"loss": 0.0059, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 29.625520110957005, |
|
"eval_accuracy": 0.9911858541069594, |
|
"eval_f1": 0.9572452045296973, |
|
"eval_loss": 0.05324824899435043, |
|
"eval_precision": 0.952621895124195, |
|
"eval_recall": 0.9619136089177891, |
|
"eval_runtime": 36.0296, |
|
"eval_samples_per_second": 5.523, |
|
"eval_steps_per_second": 1.388, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 29.791955617198337, |
|
"grad_norm": 0.6280196309089661, |
|
"learning_rate": 1.0533333333333333e-06, |
|
"loss": 0.0062, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 29.791955617198337, |
|
"eval_accuracy": 0.9916725247390905, |
|
"eval_f1": 0.9579288025889968, |
|
"eval_loss": 0.05163406580686569, |
|
"eval_precision": 0.9535204786010124, |
|
"eval_recall": 0.9623780771017185, |
|
"eval_runtime": 35.8797, |
|
"eval_samples_per_second": 5.546, |
|
"eval_steps_per_second": 1.394, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 29.958391123439668, |
|
"grad_norm": 0.3609830439090729, |
|
"learning_rate": 1.0033333333333334e-06, |
|
"loss": 0.0064, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 29.958391123439668, |
|
"eval_accuracy": 0.9914562266803656, |
|
"eval_f1": 0.9572649572649573, |
|
"eval_loss": 0.05152719095349312, |
|
"eval_precision": 0.9522058823529411, |
|
"eval_recall": 0.9623780771017185, |
|
"eval_runtime": 36.0059, |
|
"eval_samples_per_second": 5.527, |
|
"eval_steps_per_second": 1.389, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 30.124826629681, |
|
"grad_norm": 0.37590721249580383, |
|
"learning_rate": 9.533333333333335e-07, |
|
"loss": 0.0055, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 30.124826629681, |
|
"eval_accuracy": 0.9917265992537717, |
|
"eval_f1": 0.9590751445086704, |
|
"eval_loss": 0.05128318816423416, |
|
"eval_precision": 0.9548802946593001, |
|
"eval_recall": 0.9633070134695774, |
|
"eval_runtime": 36.0097, |
|
"eval_samples_per_second": 5.526, |
|
"eval_steps_per_second": 1.389, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 30.29126213592233, |
|
"grad_norm": 0.4574069678783417, |
|
"learning_rate": 9.033333333333334e-07, |
|
"loss": 0.0064, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 30.29126213592233, |
|
"eval_accuracy": 0.991564375709728, |
|
"eval_f1": 0.9583911234396673, |
|
"eval_loss": 0.052385713905096054, |
|
"eval_precision": 0.9539806718821905, |
|
"eval_recall": 0.9628425452856479, |
|
"eval_runtime": 35.8265, |
|
"eval_samples_per_second": 5.555, |
|
"eval_steps_per_second": 1.396, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 30.457697642163662, |
|
"grad_norm": 1.509279489517212, |
|
"learning_rate": 8.533333333333334e-07, |
|
"loss": 0.0055, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 30.457697642163662, |
|
"eval_accuracy": 0.9915103011950468, |
|
"eval_f1": 0.9581889581889582, |
|
"eval_loss": 0.05304015427827835, |
|
"eval_precision": 0.953125, |
|
"eval_recall": 0.9633070134695774, |
|
"eval_runtime": 35.8068, |
|
"eval_samples_per_second": 5.558, |
|
"eval_steps_per_second": 1.396, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 30.624133148404994, |
|
"grad_norm": 0.08701591938734055, |
|
"learning_rate": 8.033333333333335e-07, |
|
"loss": 0.0065, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 30.624133148404994, |
|
"eval_accuracy": 0.9916725247390905, |
|
"eval_f1": 0.9588914549653579, |
|
"eval_loss": 0.05279012396931648, |
|
"eval_precision": 0.9536058796508957, |
|
"eval_recall": 0.9642359498374361, |
|
"eval_runtime": 36.0763, |
|
"eval_samples_per_second": 5.516, |
|
"eval_steps_per_second": 1.386, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 30.790568654646325, |
|
"grad_norm": 0.39128488302230835, |
|
"learning_rate": 7.533333333333335e-07, |
|
"loss": 0.0068, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 30.790568654646325, |
|
"eval_accuracy": 0.991564375709728, |
|
"eval_f1": 0.9575253924284395, |
|
"eval_loss": 0.05296061187982559, |
|
"eval_precision": 0.9518127581459385, |
|
"eval_recall": 0.9633070134695774, |
|
"eval_runtime": 35.9916, |
|
"eval_samples_per_second": 5.529, |
|
"eval_steps_per_second": 1.389, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 30.957004160887656, |
|
"grad_norm": 0.20628976821899414, |
|
"learning_rate": 7.033333333333334e-07, |
|
"loss": 0.0047, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 30.957004160887656, |
|
"eval_accuracy": 0.991564375709728, |
|
"eval_f1": 0.958910433979686, |
|
"eval_loss": 0.05448687821626663, |
|
"eval_precision": 0.953189536484626, |
|
"eval_recall": 0.9647004180213655, |
|
"eval_runtime": 35.9295, |
|
"eval_samples_per_second": 5.539, |
|
"eval_steps_per_second": 1.392, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 31.123439667128988, |
|
"grad_norm": 0.3910321295261383, |
|
"learning_rate": 6.533333333333334e-07, |
|
"loss": 0.0051, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 31.123439667128988, |
|
"eval_accuracy": 0.9916725247390905, |
|
"eval_f1": 0.9595749595749595, |
|
"eval_loss": 0.05336242541670799, |
|
"eval_precision": 0.9545036764705882, |
|
"eval_recall": 0.9647004180213655, |
|
"eval_runtime": 36.0288, |
|
"eval_samples_per_second": 5.523, |
|
"eval_steps_per_second": 1.388, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 31.28987517337032, |
|
"grad_norm": 0.2049601525068283, |
|
"learning_rate": 6.033333333333334e-07, |
|
"loss": 0.0044, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 31.28987517337032, |
|
"eval_accuracy": 0.9914021521656843, |
|
"eval_f1": 0.9581889581889582, |
|
"eval_loss": 0.053161416202783585, |
|
"eval_precision": 0.953125, |
|
"eval_recall": 0.9633070134695774, |
|
"eval_runtime": 35.9772, |
|
"eval_samples_per_second": 5.531, |
|
"eval_steps_per_second": 1.39, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 31.45631067961165, |
|
"grad_norm": 0.4429149329662323, |
|
"learning_rate": 5.533333333333334e-07, |
|
"loss": 0.0068, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 31.45631067961165, |
|
"eval_accuracy": 0.9913480776510031, |
|
"eval_f1": 0.9579676674364895, |
|
"eval_loss": 0.05317556858062744, |
|
"eval_precision": 0.9526871841984382, |
|
"eval_recall": 0.9633070134695774, |
|
"eval_runtime": 35.6808, |
|
"eval_samples_per_second": 5.577, |
|
"eval_steps_per_second": 1.401, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 31.622746185852982, |
|
"grad_norm": 0.4102032482624054, |
|
"learning_rate": 5.033333333333334e-07, |
|
"loss": 0.0045, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 31.622746185852982, |
|
"eval_accuracy": 0.9915103011950468, |
|
"eval_f1": 0.9590940605500345, |
|
"eval_loss": 0.053103264421224594, |
|
"eval_precision": 0.9544618215271389, |
|
"eval_recall": 0.9637714816535068, |
|
"eval_runtime": 35.706, |
|
"eval_samples_per_second": 5.573, |
|
"eval_steps_per_second": 1.4, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 31.789181692094314, |
|
"grad_norm": 0.8468719720840454, |
|
"learning_rate": 4.533333333333334e-07, |
|
"loss": 0.0047, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 31.789181692094314, |
|
"eval_accuracy": 0.991564375709728, |
|
"eval_f1": 0.9586318465449504, |
|
"eval_loss": 0.05298003926873207, |
|
"eval_precision": 0.954001839926403, |
|
"eval_recall": 0.9633070134695774, |
|
"eval_runtime": 35.8749, |
|
"eval_samples_per_second": 5.547, |
|
"eval_steps_per_second": 1.394, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 31.955617198335645, |
|
"grad_norm": 0.2063705176115036, |
|
"learning_rate": 4.0333333333333337e-07, |
|
"loss": 0.0075, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 31.955617198335645, |
|
"eval_accuracy": 0.9916184502244092, |
|
"eval_f1": 0.9593157651410079, |
|
"eval_loss": 0.05329431965947151, |
|
"eval_precision": 0.9549010584445468, |
|
"eval_recall": 0.9637714816535068, |
|
"eval_runtime": 36.0809, |
|
"eval_samples_per_second": 5.515, |
|
"eval_steps_per_second": 1.386, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 32.12205270457698, |
|
"grad_norm": 0.3478763997554779, |
|
"learning_rate": 3.533333333333334e-07, |
|
"loss": 0.0055, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 32.12205270457698, |
|
"eval_accuracy": 0.9917265992537717, |
|
"eval_f1": 0.9595375722543353, |
|
"eval_loss": 0.05245138704776764, |
|
"eval_precision": 0.9553406998158379, |
|
"eval_recall": 0.9637714816535068, |
|
"eval_runtime": 35.9673, |
|
"eval_samples_per_second": 5.533, |
|
"eval_steps_per_second": 1.39, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 32.28848821081831, |
|
"grad_norm": 0.721191942691803, |
|
"learning_rate": 3.033333333333334e-07, |
|
"loss": 0.006, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 32.28848821081831, |
|
"eval_accuracy": 0.9917265992537717, |
|
"eval_f1": 0.9595375722543353, |
|
"eval_loss": 0.05226488783955574, |
|
"eval_precision": 0.9553406998158379, |
|
"eval_recall": 0.9637714816535068, |
|
"eval_runtime": 35.9385, |
|
"eval_samples_per_second": 5.537, |
|
"eval_steps_per_second": 1.391, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 32.45492371705964, |
|
"grad_norm": 0.3022706210613251, |
|
"learning_rate": 2.533333333333333e-07, |
|
"loss": 0.0062, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 32.45492371705964, |
|
"eval_accuracy": 0.9916725247390905, |
|
"eval_f1": 0.9588534442903375, |
|
"eval_loss": 0.05245348811149597, |
|
"eval_precision": 0.9544408651633686, |
|
"eval_recall": 0.9633070134695774, |
|
"eval_runtime": 35.8766, |
|
"eval_samples_per_second": 5.547, |
|
"eval_steps_per_second": 1.394, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 32.62135922330097, |
|
"grad_norm": 0.4700392186641693, |
|
"learning_rate": 2.0333333333333333e-07, |
|
"loss": 0.0059, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 32.62135922330097, |
|
"eval_accuracy": 0.9917265992537717, |
|
"eval_f1": 0.9593157651410079, |
|
"eval_loss": 0.05246575176715851, |
|
"eval_precision": 0.9549010584445468, |
|
"eval_recall": 0.9637714816535068, |
|
"eval_runtime": 35.8779, |
|
"eval_samples_per_second": 5.547, |
|
"eval_steps_per_second": 1.394, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 32.787794729542306, |
|
"grad_norm": 0.7413909435272217, |
|
"learning_rate": 1.5333333333333333e-07, |
|
"loss": 0.0058, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 32.787794729542306, |
|
"eval_accuracy": 0.9917265992537717, |
|
"eval_f1": 0.959556274555119, |
|
"eval_loss": 0.053051915019750595, |
|
"eval_precision": 0.9549218031278749, |
|
"eval_recall": 0.9642359498374361, |
|
"eval_runtime": 35.8838, |
|
"eval_samples_per_second": 5.546, |
|
"eval_steps_per_second": 1.393, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 32.95423023578363, |
|
"grad_norm": 0.7399964332580566, |
|
"learning_rate": 1.0333333333333335e-07, |
|
"loss": 0.005, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 32.95423023578363, |
|
"eval_accuracy": 0.991564375709728, |
|
"eval_f1": 0.9584103512014789, |
|
"eval_loss": 0.05329006537795067, |
|
"eval_precision": 0.9535632183908046, |
|
"eval_recall": 0.9633070134695774, |
|
"eval_runtime": 35.9193, |
|
"eval_samples_per_second": 5.54, |
|
"eval_steps_per_second": 1.392, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 33.12066574202497, |
|
"grad_norm": 0.3113914728164673, |
|
"learning_rate": 5.3333333333333334e-08, |
|
"loss": 0.007, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 33.12066574202497, |
|
"eval_accuracy": 0.991564375709728, |
|
"eval_f1": 0.9584103512014789, |
|
"eval_loss": 0.05327802523970604, |
|
"eval_precision": 0.9535632183908046, |
|
"eval_recall": 0.9633070134695774, |
|
"eval_runtime": 35.914, |
|
"eval_samples_per_second": 5.541, |
|
"eval_steps_per_second": 1.392, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 33.287101248266296, |
|
"grad_norm": 0.33092889189720154, |
|
"learning_rate": 3.3333333333333334e-09, |
|
"loss": 0.0047, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 33.287101248266296, |
|
"eval_accuracy": 0.991564375709728, |
|
"eval_f1": 0.9584103512014789, |
|
"eval_loss": 0.05324762314558029, |
|
"eval_precision": 0.9535632183908046, |
|
"eval_recall": 0.9633070134695774, |
|
"eval_runtime": 35.8608, |
|
"eval_samples_per_second": 5.549, |
|
"eval_steps_per_second": 1.394, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 33.287101248266296, |
|
"step": 3000, |
|
"total_flos": 8.9780255686656e+16, |
|
"train_loss": 0.0764370101193587, |
|
"train_runtime": 55223.9534, |
|
"train_samples_per_second": 1.738, |
|
"train_steps_per_second": 0.054 |
|
} |
|
], |
|
"logging_steps": 15, |
|
"max_steps": 3000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 34, |
|
"save_steps": 15, |
|
"total_flos": 8.9780255686656e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|