|
{ |
|
"best_metric": 1.1938023567199707, |
|
"best_model_checkpoint": "autotrain-v2v7o-9tu3d/checkpoint-1340", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 1340, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.018656716417910446, |
|
"grad_norm": 3.044177532196045, |
|
"learning_rate": 4.6641791044776116e-07, |
|
"loss": 3.5829, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03731343283582089, |
|
"grad_norm": 3.1452090740203857, |
|
"learning_rate": 9.328358208955223e-07, |
|
"loss": 3.5707, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.055970149253731345, |
|
"grad_norm": 3.5051560401916504, |
|
"learning_rate": 1.3992537313432837e-06, |
|
"loss": 3.5587, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.07462686567164178, |
|
"grad_norm": 3.155848741531372, |
|
"learning_rate": 1.8656716417910446e-06, |
|
"loss": 3.5653, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09328358208955224, |
|
"grad_norm": 3.025758743286133, |
|
"learning_rate": 2.3320895522388064e-06, |
|
"loss": 3.5639, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.11194029850746269, |
|
"grad_norm": 2.966825485229492, |
|
"learning_rate": 2.7985074626865674e-06, |
|
"loss": 3.5187, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13059701492537312, |
|
"grad_norm": 2.870944023132324, |
|
"learning_rate": 3.2649253731343283e-06, |
|
"loss": 3.5382, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.14925373134328357, |
|
"grad_norm": 3.2777228355407715, |
|
"learning_rate": 3.7313432835820893e-06, |
|
"loss": 3.5311, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16791044776119404, |
|
"grad_norm": 2.8916268348693848, |
|
"learning_rate": 4.1977611940298515e-06, |
|
"loss": 3.5348, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.1865671641791045, |
|
"grad_norm": 2.667680263519287, |
|
"learning_rate": 4.664179104477613e-06, |
|
"loss": 3.5209, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.20522388059701493, |
|
"grad_norm": 2.6136128902435303, |
|
"learning_rate": 5.130597014925373e-06, |
|
"loss": 3.5293, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.22388059701492538, |
|
"grad_norm": 3.2311267852783203, |
|
"learning_rate": 5.597014925373135e-06, |
|
"loss": 3.5536, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.24253731343283583, |
|
"grad_norm": 2.513153076171875, |
|
"learning_rate": 6.063432835820896e-06, |
|
"loss": 3.5286, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.26119402985074625, |
|
"grad_norm": 2.53420090675354, |
|
"learning_rate": 6.529850746268657e-06, |
|
"loss": 3.5305, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2798507462686567, |
|
"grad_norm": 2.7235593795776367, |
|
"learning_rate": 6.996268656716418e-06, |
|
"loss": 3.5244, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.29850746268656714, |
|
"grad_norm": 2.621687889099121, |
|
"learning_rate": 7.4626865671641785e-06, |
|
"loss": 3.4981, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.31716417910447764, |
|
"grad_norm": 2.5116870403289795, |
|
"learning_rate": 7.92910447761194e-06, |
|
"loss": 3.5208, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.3358208955223881, |
|
"grad_norm": 2.6995580196380615, |
|
"learning_rate": 8.395522388059703e-06, |
|
"loss": 3.5069, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.35447761194029853, |
|
"grad_norm": 2.7225232124328613, |
|
"learning_rate": 8.861940298507463e-06, |
|
"loss": 3.517, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.373134328358209, |
|
"grad_norm": 2.9143073558807373, |
|
"learning_rate": 9.328358208955226e-06, |
|
"loss": 3.4939, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3917910447761194, |
|
"grad_norm": 2.6115267276763916, |
|
"learning_rate": 9.794776119402986e-06, |
|
"loss": 3.4971, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.41044776119402987, |
|
"grad_norm": 2.9028003215789795, |
|
"learning_rate": 1.0261194029850747e-05, |
|
"loss": 3.5239, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.4291044776119403, |
|
"grad_norm": 2.7557551860809326, |
|
"learning_rate": 1.0727611940298509e-05, |
|
"loss": 3.4567, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.44776119402985076, |
|
"grad_norm": 3.2114148139953613, |
|
"learning_rate": 1.119402985074627e-05, |
|
"loss": 3.5006, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.4664179104477612, |
|
"grad_norm": 3.3313052654266357, |
|
"learning_rate": 1.166044776119403e-05, |
|
"loss": 3.4342, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.48507462686567165, |
|
"grad_norm": 3.8037962913513184, |
|
"learning_rate": 1.2126865671641792e-05, |
|
"loss": 3.5062, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.503731343283582, |
|
"grad_norm": 3.6135289669036865, |
|
"learning_rate": 1.2593283582089551e-05, |
|
"loss": 3.4769, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.5223880597014925, |
|
"grad_norm": 3.9982452392578125, |
|
"learning_rate": 1.3059701492537313e-05, |
|
"loss": 3.5184, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.5410447761194029, |
|
"grad_norm": 4.104573726654053, |
|
"learning_rate": 1.3526119402985074e-05, |
|
"loss": 3.4843, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.5597014925373134, |
|
"grad_norm": 4.863430976867676, |
|
"learning_rate": 1.3992537313432836e-05, |
|
"loss": 3.4106, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5783582089552238, |
|
"grad_norm": 4.079490661621094, |
|
"learning_rate": 1.4458955223880596e-05, |
|
"loss": 3.4743, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.5970149253731343, |
|
"grad_norm": 3.870924949645996, |
|
"learning_rate": 1.4925373134328357e-05, |
|
"loss": 3.3934, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.6156716417910447, |
|
"grad_norm": 3.8417205810546875, |
|
"learning_rate": 1.539179104477612e-05, |
|
"loss": 3.4258, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.6343283582089553, |
|
"grad_norm": 4.541757583618164, |
|
"learning_rate": 1.585820895522388e-05, |
|
"loss": 3.4659, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.6529850746268657, |
|
"grad_norm": 4.8343424797058105, |
|
"learning_rate": 1.6324626865671644e-05, |
|
"loss": 3.4103, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.6716417910447762, |
|
"grad_norm": 5.048968315124512, |
|
"learning_rate": 1.6791044776119406e-05, |
|
"loss": 3.459, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.6902985074626866, |
|
"grad_norm": 4.90827751159668, |
|
"learning_rate": 1.7257462686567165e-05, |
|
"loss": 3.4152, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.7089552238805971, |
|
"grad_norm": 4.624119758605957, |
|
"learning_rate": 1.7723880597014927e-05, |
|
"loss": 3.4129, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.7276119402985075, |
|
"grad_norm": 4.981058120727539, |
|
"learning_rate": 1.819029850746269e-05, |
|
"loss": 3.4204, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.746268656716418, |
|
"grad_norm": 4.767482280731201, |
|
"learning_rate": 1.865671641791045e-05, |
|
"loss": 3.3169, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7649253731343284, |
|
"grad_norm": 4.601747512817383, |
|
"learning_rate": 1.912313432835821e-05, |
|
"loss": 3.278, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.7835820895522388, |
|
"grad_norm": 6.704329013824463, |
|
"learning_rate": 1.9589552238805972e-05, |
|
"loss": 3.3873, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.8022388059701493, |
|
"grad_norm": 5.003252983093262, |
|
"learning_rate": 2.0055970149253735e-05, |
|
"loss": 3.3281, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.8208955223880597, |
|
"grad_norm": 5.126171588897705, |
|
"learning_rate": 2.0522388059701493e-05, |
|
"loss": 3.2048, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.8395522388059702, |
|
"grad_norm": 5.796511173248291, |
|
"learning_rate": 2.0988805970149256e-05, |
|
"loss": 3.2639, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.8582089552238806, |
|
"grad_norm": 5.77567720413208, |
|
"learning_rate": 2.1455223880597018e-05, |
|
"loss": 3.2442, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.8768656716417911, |
|
"grad_norm": 5.419305324554443, |
|
"learning_rate": 2.1921641791044777e-05, |
|
"loss": 3.123, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.8955223880597015, |
|
"grad_norm": 5.75105619430542, |
|
"learning_rate": 2.238805970149254e-05, |
|
"loss": 3.0977, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.914179104477612, |
|
"grad_norm": 8.254322052001953, |
|
"learning_rate": 2.28544776119403e-05, |
|
"loss": 3.086, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.9328358208955224, |
|
"grad_norm": 6.343637943267822, |
|
"learning_rate": 2.332089552238806e-05, |
|
"loss": 3.3336, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.9514925373134329, |
|
"grad_norm": 5.593357563018799, |
|
"learning_rate": 2.3787313432835822e-05, |
|
"loss": 3.2817, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.9701492537313433, |
|
"grad_norm": 8.842999458312988, |
|
"learning_rate": 2.4253731343283584e-05, |
|
"loss": 3.1676, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.9888059701492538, |
|
"grad_norm": 8.3274507522583, |
|
"learning_rate": 2.4720149253731347e-05, |
|
"loss": 3.1462, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.155, |
|
"eval_f1_macro": 0.09100175187131708, |
|
"eval_f1_micro": 0.155, |
|
"eval_f1_weighted": 0.08955112279025322, |
|
"eval_loss": 3.2174994945526123, |
|
"eval_precision_macro": 0.07134410305142012, |
|
"eval_precision_micro": 0.155, |
|
"eval_precision_weighted": 0.07162192482314433, |
|
"eval_recall_macro": 0.16519607843137257, |
|
"eval_recall_micro": 0.155, |
|
"eval_recall_weighted": 0.155, |
|
"eval_runtime": 16.5066, |
|
"eval_samples_per_second": 12.116, |
|
"eval_steps_per_second": 0.788, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.007462686567164, |
|
"grad_norm": 6.375778675079346, |
|
"learning_rate": 2.5186567164179102e-05, |
|
"loss": 3.0937, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.0261194029850746, |
|
"grad_norm": 9.297365188598633, |
|
"learning_rate": 2.5652985074626868e-05, |
|
"loss": 3.0123, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.044776119402985, |
|
"grad_norm": 6.8317646980285645, |
|
"learning_rate": 2.6119402985074626e-05, |
|
"loss": 3.0876, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.0634328358208955, |
|
"grad_norm": 6.641528606414795, |
|
"learning_rate": 2.658582089552239e-05, |
|
"loss": 2.9694, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.0820895522388059, |
|
"grad_norm": 6.846834659576416, |
|
"learning_rate": 2.7052238805970147e-05, |
|
"loss": 3.0272, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.1007462686567164, |
|
"grad_norm": 6.250601291656494, |
|
"learning_rate": 2.7518656716417913e-05, |
|
"loss": 2.9646, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.1194029850746268, |
|
"grad_norm": 6.11965274810791, |
|
"learning_rate": 2.7985074626865672e-05, |
|
"loss": 2.8305, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.1380597014925373, |
|
"grad_norm": 7.502304553985596, |
|
"learning_rate": 2.8451492537313434e-05, |
|
"loss": 2.7931, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.1567164179104479, |
|
"grad_norm": 6.00736665725708, |
|
"learning_rate": 2.8917910447761193e-05, |
|
"loss": 2.8108, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.1753731343283582, |
|
"grad_norm": 7.524813175201416, |
|
"learning_rate": 2.9384328358208955e-05, |
|
"loss": 2.8187, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.1940298507462686, |
|
"grad_norm": 7.996919631958008, |
|
"learning_rate": 2.9850746268656714e-05, |
|
"loss": 2.8544, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.212686567164179, |
|
"grad_norm": 7.103128910064697, |
|
"learning_rate": 3.031716417910448e-05, |
|
"loss": 2.8545, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.2313432835820897, |
|
"grad_norm": 5.705358505249023, |
|
"learning_rate": 3.078358208955224e-05, |
|
"loss": 2.5292, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 6.714156150817871, |
|
"learning_rate": 3.125e-05, |
|
"loss": 2.5716, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.2686567164179103, |
|
"grad_norm": 7.388309955596924, |
|
"learning_rate": 3.171641791044776e-05, |
|
"loss": 2.8626, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.287313432835821, |
|
"grad_norm": 6.7939934730529785, |
|
"learning_rate": 3.2182835820895525e-05, |
|
"loss": 2.7215, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.3059701492537314, |
|
"grad_norm": 6.774124622344971, |
|
"learning_rate": 3.264925373134329e-05, |
|
"loss": 2.7824, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.3246268656716418, |
|
"grad_norm": 7.475742340087891, |
|
"learning_rate": 3.311567164179105e-05, |
|
"loss": 2.7855, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.3432835820895521, |
|
"grad_norm": 6.67882776260376, |
|
"learning_rate": 3.358208955223881e-05, |
|
"loss": 2.7686, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.3619402985074627, |
|
"grad_norm": 6.921255111694336, |
|
"learning_rate": 3.404850746268657e-05, |
|
"loss": 2.6471, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.3805970149253732, |
|
"grad_norm": 5.301546573638916, |
|
"learning_rate": 3.451492537313433e-05, |
|
"loss": 2.343, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.3992537313432836, |
|
"grad_norm": 8.802115440368652, |
|
"learning_rate": 3.498134328358209e-05, |
|
"loss": 2.6459, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.417910447761194, |
|
"grad_norm": 10.76266860961914, |
|
"learning_rate": 3.5447761194029854e-05, |
|
"loss": 2.245, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.4365671641791045, |
|
"grad_norm": 6.216732978820801, |
|
"learning_rate": 3.5914179104477616e-05, |
|
"loss": 2.1281, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.455223880597015, |
|
"grad_norm": 7.1129231452941895, |
|
"learning_rate": 3.638059701492538e-05, |
|
"loss": 2.6988, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.4738805970149254, |
|
"grad_norm": 6.50852108001709, |
|
"learning_rate": 3.6847014925373134e-05, |
|
"loss": 2.3069, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.4925373134328357, |
|
"grad_norm": 7.611819744110107, |
|
"learning_rate": 3.73134328358209e-05, |
|
"loss": 2.3428, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.5111940298507462, |
|
"grad_norm": 8.59446907043457, |
|
"learning_rate": 3.777985074626866e-05, |
|
"loss": 2.278, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.5298507462686568, |
|
"grad_norm": 9.75235652923584, |
|
"learning_rate": 3.824626865671642e-05, |
|
"loss": 2.1313, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.5485074626865671, |
|
"grad_norm": 7.297292709350586, |
|
"learning_rate": 3.871268656716418e-05, |
|
"loss": 2.2863, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.5671641791044775, |
|
"grad_norm": 9.046923637390137, |
|
"learning_rate": 3.9179104477611945e-05, |
|
"loss": 2.5253, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.585820895522388, |
|
"grad_norm": 7.503407955169678, |
|
"learning_rate": 3.96455223880597e-05, |
|
"loss": 2.2857, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.6044776119402986, |
|
"grad_norm": 7.656423568725586, |
|
"learning_rate": 4.011194029850747e-05, |
|
"loss": 2.5456, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.623134328358209, |
|
"grad_norm": 7.632299423217773, |
|
"learning_rate": 4.0578358208955225e-05, |
|
"loss": 2.6645, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.6417910447761193, |
|
"grad_norm": 8.021852493286133, |
|
"learning_rate": 4.104477611940299e-05, |
|
"loss": 2.1135, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.6604477611940298, |
|
"grad_norm": 10.39423656463623, |
|
"learning_rate": 4.151119402985075e-05, |
|
"loss": 2.1937, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.6791044776119404, |
|
"grad_norm": 12.216695785522461, |
|
"learning_rate": 4.197761194029851e-05, |
|
"loss": 1.9825, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.6977611940298507, |
|
"grad_norm": 7.088036060333252, |
|
"learning_rate": 4.244402985074627e-05, |
|
"loss": 1.7742, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.716417910447761, |
|
"grad_norm": 7.268665313720703, |
|
"learning_rate": 4.2910447761194036e-05, |
|
"loss": 2.1474, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.7350746268656716, |
|
"grad_norm": 9.286911010742188, |
|
"learning_rate": 4.337686567164179e-05, |
|
"loss": 2.163, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.7537313432835822, |
|
"grad_norm": 10.1356782913208, |
|
"learning_rate": 4.384328358208955e-05, |
|
"loss": 2.0548, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.7723880597014925, |
|
"grad_norm": 10.506839752197266, |
|
"learning_rate": 4.4309701492537316e-05, |
|
"loss": 2.0674, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.7910447761194028, |
|
"grad_norm": 7.393703937530518, |
|
"learning_rate": 4.477611940298508e-05, |
|
"loss": 2.1359, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.8097014925373134, |
|
"grad_norm": 8.782050132751465, |
|
"learning_rate": 4.524253731343284e-05, |
|
"loss": 2.0858, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.828358208955224, |
|
"grad_norm": 8.55767822265625, |
|
"learning_rate": 4.57089552238806e-05, |
|
"loss": 2.1815, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.8470149253731343, |
|
"grad_norm": 6.715104579925537, |
|
"learning_rate": 4.617537313432836e-05, |
|
"loss": 2.1025, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.8656716417910446, |
|
"grad_norm": 10.000481605529785, |
|
"learning_rate": 4.664179104477612e-05, |
|
"loss": 1.9817, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.8843283582089554, |
|
"grad_norm": 7.614371299743652, |
|
"learning_rate": 4.710820895522388e-05, |
|
"loss": 2.2921, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.9029850746268657, |
|
"grad_norm": 7.8831658363342285, |
|
"learning_rate": 4.7574626865671644e-05, |
|
"loss": 1.6008, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.921641791044776, |
|
"grad_norm": 8.811071395874023, |
|
"learning_rate": 4.8041044776119407e-05, |
|
"loss": 2.131, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.9402985074626866, |
|
"grad_norm": 8.217741966247559, |
|
"learning_rate": 4.850746268656717e-05, |
|
"loss": 2.0112, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.9589552238805972, |
|
"grad_norm": 8.56834602355957, |
|
"learning_rate": 4.8973880597014924e-05, |
|
"loss": 1.8353, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.9776119402985075, |
|
"grad_norm": 12.720284461975098, |
|
"learning_rate": 4.944029850746269e-05, |
|
"loss": 1.5857, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.9962686567164178, |
|
"grad_norm": 10.004925727844238, |
|
"learning_rate": 4.990671641791045e-05, |
|
"loss": 2.0364, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.425, |
|
"eval_f1_macro": 0.34724867475550414, |
|
"eval_f1_micro": 0.425, |
|
"eval_f1_weighted": 0.35814368775359484, |
|
"eval_loss": 2.165677785873413, |
|
"eval_precision_macro": 0.36154654169360045, |
|
"eval_precision_micro": 0.425, |
|
"eval_precision_weighted": 0.3759306318681318, |
|
"eval_recall_macro": 0.423249299719888, |
|
"eval_recall_micro": 0.425, |
|
"eval_recall_weighted": 0.425, |
|
"eval_runtime": 22.9282, |
|
"eval_samples_per_second": 8.723, |
|
"eval_steps_per_second": 0.567, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 2.014925373134328, |
|
"grad_norm": 7.278439044952393, |
|
"learning_rate": 4.995854063018242e-05, |
|
"loss": 1.297, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.033582089552239, |
|
"grad_norm": 13.718267440795898, |
|
"learning_rate": 4.990671641791045e-05, |
|
"loss": 1.9014, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 2.0522388059701493, |
|
"grad_norm": 11.233081817626953, |
|
"learning_rate": 4.985489220563848e-05, |
|
"loss": 1.9761, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.0708955223880596, |
|
"grad_norm": 11.126688003540039, |
|
"learning_rate": 4.98030679933665e-05, |
|
"loss": 1.7989, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 2.08955223880597, |
|
"grad_norm": 10.091024398803711, |
|
"learning_rate": 4.975124378109453e-05, |
|
"loss": 1.4646, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.1082089552238807, |
|
"grad_norm": 8.601826667785645, |
|
"learning_rate": 4.9699419568822556e-05, |
|
"loss": 1.8275, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 2.126865671641791, |
|
"grad_norm": 12.649139404296875, |
|
"learning_rate": 4.964759535655058e-05, |
|
"loss": 1.5706, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.1455223880597014, |
|
"grad_norm": 12.75953483581543, |
|
"learning_rate": 4.959577114427861e-05, |
|
"loss": 1.8285, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.1641791044776117, |
|
"grad_norm": 9.071000099182129, |
|
"learning_rate": 4.954394693200663e-05, |
|
"loss": 1.6905, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.1828358208955225, |
|
"grad_norm": 9.372791290283203, |
|
"learning_rate": 4.949212271973466e-05, |
|
"loss": 1.8672, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 2.201492537313433, |
|
"grad_norm": 9.870670318603516, |
|
"learning_rate": 4.944029850746269e-05, |
|
"loss": 1.6721, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.220149253731343, |
|
"grad_norm": 7.243556976318359, |
|
"learning_rate": 4.9388474295190717e-05, |
|
"loss": 1.6255, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 2.2388059701492535, |
|
"grad_norm": 5.97638463973999, |
|
"learning_rate": 4.933665008291874e-05, |
|
"loss": 1.6361, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.2574626865671643, |
|
"grad_norm": 10.749024391174316, |
|
"learning_rate": 4.928482587064677e-05, |
|
"loss": 1.8244, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 2.2761194029850746, |
|
"grad_norm": 5.7718186378479, |
|
"learning_rate": 4.9233001658374794e-05, |
|
"loss": 1.2545, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.294776119402985, |
|
"grad_norm": 11.37286376953125, |
|
"learning_rate": 4.9181177446102824e-05, |
|
"loss": 1.7528, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 2.3134328358208958, |
|
"grad_norm": 10.693103790283203, |
|
"learning_rate": 4.912935323383085e-05, |
|
"loss": 1.6237, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.332089552238806, |
|
"grad_norm": 8.764638900756836, |
|
"learning_rate": 4.907752902155888e-05, |
|
"loss": 1.6663, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 2.3507462686567164, |
|
"grad_norm": 6.239055156707764, |
|
"learning_rate": 4.90257048092869e-05, |
|
"loss": 1.473, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.3694029850746268, |
|
"grad_norm": 12.444618225097656, |
|
"learning_rate": 4.8973880597014924e-05, |
|
"loss": 1.6951, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 2.388059701492537, |
|
"grad_norm": 10.495733261108398, |
|
"learning_rate": 4.8922056384742954e-05, |
|
"loss": 1.362, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.406716417910448, |
|
"grad_norm": 6.903144836425781, |
|
"learning_rate": 4.887023217247098e-05, |
|
"loss": 1.3483, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 2.425373134328358, |
|
"grad_norm": 8.9871826171875, |
|
"learning_rate": 4.881840796019901e-05, |
|
"loss": 1.4329, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.4440298507462686, |
|
"grad_norm": 12.040112495422363, |
|
"learning_rate": 4.876658374792704e-05, |
|
"loss": 1.7239, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 2.4626865671641793, |
|
"grad_norm": 12.604753494262695, |
|
"learning_rate": 4.8714759535655055e-05, |
|
"loss": 1.0981, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.4813432835820897, |
|
"grad_norm": 8.3511323928833, |
|
"learning_rate": 4.8662935323383085e-05, |
|
"loss": 1.5905, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 6.789451599121094, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 1.283, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.5186567164179103, |
|
"grad_norm": 6.545684337615967, |
|
"learning_rate": 4.855928689883914e-05, |
|
"loss": 1.3954, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.5373134328358207, |
|
"grad_norm": 11.606388092041016, |
|
"learning_rate": 4.850746268656717e-05, |
|
"loss": 1.6337, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.5559701492537314, |
|
"grad_norm": 6.834081172943115, |
|
"learning_rate": 4.845563847429519e-05, |
|
"loss": 1.0763, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 2.574626865671642, |
|
"grad_norm": 9.194738388061523, |
|
"learning_rate": 4.8403814262023216e-05, |
|
"loss": 1.3672, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.593283582089552, |
|
"grad_norm": 7.910974025726318, |
|
"learning_rate": 4.8351990049751246e-05, |
|
"loss": 1.2458, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 2.611940298507463, |
|
"grad_norm": 8.25942611694336, |
|
"learning_rate": 4.830016583747927e-05, |
|
"loss": 1.4986, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.6305970149253732, |
|
"grad_norm": 5.547841548919678, |
|
"learning_rate": 4.82483416252073e-05, |
|
"loss": 1.488, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 2.6492537313432836, |
|
"grad_norm": 8.00507640838623, |
|
"learning_rate": 4.819651741293533e-05, |
|
"loss": 1.5729, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.667910447761194, |
|
"grad_norm": 10.959121704101562, |
|
"learning_rate": 4.814469320066335e-05, |
|
"loss": 1.393, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 2.6865671641791042, |
|
"grad_norm": 12.941998481750488, |
|
"learning_rate": 4.8092868988391376e-05, |
|
"loss": 1.6623, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.705223880597015, |
|
"grad_norm": 10.971231460571289, |
|
"learning_rate": 4.8041044776119407e-05, |
|
"loss": 1.2405, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.7238805970149254, |
|
"grad_norm": 6.511988639831543, |
|
"learning_rate": 4.798922056384743e-05, |
|
"loss": 1.4053, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.7425373134328357, |
|
"grad_norm": 16.730091094970703, |
|
"learning_rate": 4.793739635157546e-05, |
|
"loss": 1.6276, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 2.7611940298507465, |
|
"grad_norm": 8.781760215759277, |
|
"learning_rate": 4.7885572139303484e-05, |
|
"loss": 1.4323, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.779850746268657, |
|
"grad_norm": 8.811301231384277, |
|
"learning_rate": 4.7833747927031514e-05, |
|
"loss": 1.3947, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 2.798507462686567, |
|
"grad_norm": 6.631250381469727, |
|
"learning_rate": 4.778192371475954e-05, |
|
"loss": 1.3312, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.8171641791044775, |
|
"grad_norm": 13.198403358459473, |
|
"learning_rate": 4.773009950248756e-05, |
|
"loss": 1.3845, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 2.835820895522388, |
|
"grad_norm": 13.531638145446777, |
|
"learning_rate": 4.767827529021559e-05, |
|
"loss": 1.6808, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.8544776119402986, |
|
"grad_norm": 8.421935081481934, |
|
"learning_rate": 4.762645107794362e-05, |
|
"loss": 1.3985, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 2.873134328358209, |
|
"grad_norm": 11.863781929016113, |
|
"learning_rate": 4.7574626865671644e-05, |
|
"loss": 1.3896, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.8917910447761193, |
|
"grad_norm": 12.868813514709473, |
|
"learning_rate": 4.7522802653399674e-05, |
|
"loss": 1.5119, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 2.91044776119403, |
|
"grad_norm": 9.192615509033203, |
|
"learning_rate": 4.74709784411277e-05, |
|
"loss": 1.2569, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.9291044776119404, |
|
"grad_norm": 6.196342945098877, |
|
"learning_rate": 4.741915422885572e-05, |
|
"loss": 1.0811, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 2.9477611940298507, |
|
"grad_norm": 13.614799499511719, |
|
"learning_rate": 4.736733001658375e-05, |
|
"loss": 1.298, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.966417910447761, |
|
"grad_norm": 8.10826301574707, |
|
"learning_rate": 4.7315505804311775e-05, |
|
"loss": 1.152, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 2.9850746268656714, |
|
"grad_norm": 15.173196792602539, |
|
"learning_rate": 4.7263681592039805e-05, |
|
"loss": 1.5199, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.545, |
|
"eval_f1_macro": 0.4674843775735156, |
|
"eval_f1_micro": 0.545, |
|
"eval_f1_weighted": 0.4795885366054804, |
|
"eval_loss": 1.6110626459121704, |
|
"eval_precision_macro": 0.48627026568203036, |
|
"eval_precision_micro": 0.545, |
|
"eval_precision_weighted": 0.5079666305916306, |
|
"eval_recall_macro": 0.5418767507002801, |
|
"eval_recall_micro": 0.545, |
|
"eval_recall_weighted": 0.545, |
|
"eval_runtime": 16.2687, |
|
"eval_samples_per_second": 12.294, |
|
"eval_steps_per_second": 0.799, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 3.003731343283582, |
|
"grad_norm": 6.815779209136963, |
|
"learning_rate": 4.7211857379767835e-05, |
|
"loss": 0.8911, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 3.0223880597014925, |
|
"grad_norm": 8.526626586914062, |
|
"learning_rate": 4.716003316749585e-05, |
|
"loss": 1.2486, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.041044776119403, |
|
"grad_norm": 13.612601280212402, |
|
"learning_rate": 4.710820895522388e-05, |
|
"loss": 1.0458, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 3.0597014925373136, |
|
"grad_norm": 11.703157424926758, |
|
"learning_rate": 4.705638474295191e-05, |
|
"loss": 1.0918, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 3.078358208955224, |
|
"grad_norm": 12.684816360473633, |
|
"learning_rate": 4.7004560530679936e-05, |
|
"loss": 1.3883, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 3.0970149253731343, |
|
"grad_norm": 6.267882823944092, |
|
"learning_rate": 4.6952736318407966e-05, |
|
"loss": 0.951, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 3.1156716417910446, |
|
"grad_norm": 6.348490238189697, |
|
"learning_rate": 4.690091210613599e-05, |
|
"loss": 1.1257, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 3.1343283582089554, |
|
"grad_norm": 15.914472579956055, |
|
"learning_rate": 4.684908789386401e-05, |
|
"loss": 1.2884, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.1529850746268657, |
|
"grad_norm": 8.568007469177246, |
|
"learning_rate": 4.679726368159204e-05, |
|
"loss": 1.1261, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 3.171641791044776, |
|
"grad_norm": 12.23885440826416, |
|
"learning_rate": 4.6745439469320066e-05, |
|
"loss": 1.0198, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.1902985074626864, |
|
"grad_norm": 18.287586212158203, |
|
"learning_rate": 4.6693615257048096e-05, |
|
"loss": 0.9699, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 3.208955223880597, |
|
"grad_norm": 6.615917682647705, |
|
"learning_rate": 4.664179104477612e-05, |
|
"loss": 1.0768, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 3.2276119402985075, |
|
"grad_norm": 4.780862808227539, |
|
"learning_rate": 4.658996683250415e-05, |
|
"loss": 0.8213, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 3.246268656716418, |
|
"grad_norm": 12.68826675415039, |
|
"learning_rate": 4.6538142620232173e-05, |
|
"loss": 1.1507, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 3.264925373134328, |
|
"grad_norm": 6.729078769683838, |
|
"learning_rate": 4.64863184079602e-05, |
|
"loss": 1.0932, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 3.283582089552239, |
|
"grad_norm": 7.569777488708496, |
|
"learning_rate": 4.643449419568823e-05, |
|
"loss": 1.0546, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 3.3022388059701493, |
|
"grad_norm": 11.058488845825195, |
|
"learning_rate": 4.638266998341626e-05, |
|
"loss": 1.1409, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 3.3208955223880596, |
|
"grad_norm": 3.7507197856903076, |
|
"learning_rate": 4.633084577114428e-05, |
|
"loss": 0.8494, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 3.33955223880597, |
|
"grad_norm": 7.260571479797363, |
|
"learning_rate": 4.627902155887231e-05, |
|
"loss": 0.9837, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 3.3582089552238807, |
|
"grad_norm": 10.02383041381836, |
|
"learning_rate": 4.6227197346600334e-05, |
|
"loss": 1.1109, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.376865671641791, |
|
"grad_norm": 13.254716873168945, |
|
"learning_rate": 4.617537313432836e-05, |
|
"loss": 1.1479, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 3.3955223880597014, |
|
"grad_norm": 10.84252643585205, |
|
"learning_rate": 4.612354892205639e-05, |
|
"loss": 1.5116, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 3.4141791044776117, |
|
"grad_norm": 9.479592323303223, |
|
"learning_rate": 4.607172470978441e-05, |
|
"loss": 1.3419, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 3.4328358208955225, |
|
"grad_norm": 11.758341789245605, |
|
"learning_rate": 4.601990049751244e-05, |
|
"loss": 1.0607, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 3.451492537313433, |
|
"grad_norm": 12.16956901550293, |
|
"learning_rate": 4.596807628524047e-05, |
|
"loss": 1.2986, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 3.470149253731343, |
|
"grad_norm": 16.24034309387207, |
|
"learning_rate": 4.591625207296849e-05, |
|
"loss": 0.9493, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 3.4888059701492535, |
|
"grad_norm": 5.806056499481201, |
|
"learning_rate": 4.586442786069652e-05, |
|
"loss": 0.854, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 3.5074626865671643, |
|
"grad_norm": 31.129661560058594, |
|
"learning_rate": 4.581260364842455e-05, |
|
"loss": 0.9267, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 3.5261194029850746, |
|
"grad_norm": 10.927783012390137, |
|
"learning_rate": 4.576077943615257e-05, |
|
"loss": 0.9969, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 3.544776119402985, |
|
"grad_norm": 19.682039260864258, |
|
"learning_rate": 4.57089552238806e-05, |
|
"loss": 0.9038, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.5634328358208958, |
|
"grad_norm": 12.5592679977417, |
|
"learning_rate": 4.5657131011608626e-05, |
|
"loss": 1.1389, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 3.582089552238806, |
|
"grad_norm": 10.561615943908691, |
|
"learning_rate": 4.560530679933665e-05, |
|
"loss": 0.8216, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 3.6007462686567164, |
|
"grad_norm": 6.979230880737305, |
|
"learning_rate": 4.555348258706468e-05, |
|
"loss": 0.968, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 3.6194029850746268, |
|
"grad_norm": 8.147384643554688, |
|
"learning_rate": 4.55016583747927e-05, |
|
"loss": 1.0937, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 3.638059701492537, |
|
"grad_norm": 14.14340591430664, |
|
"learning_rate": 4.544983416252073e-05, |
|
"loss": 1.1813, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 3.656716417910448, |
|
"grad_norm": 10.942938804626465, |
|
"learning_rate": 4.539800995024876e-05, |
|
"loss": 0.9468, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 3.675373134328358, |
|
"grad_norm": 7.3539838790893555, |
|
"learning_rate": 4.5346185737976786e-05, |
|
"loss": 0.7761, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 3.6940298507462686, |
|
"grad_norm": 5.7556843757629395, |
|
"learning_rate": 4.529436152570481e-05, |
|
"loss": 0.848, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 3.7126865671641793, |
|
"grad_norm": 7.527775287628174, |
|
"learning_rate": 4.524253731343284e-05, |
|
"loss": 0.9969, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 3.7313432835820897, |
|
"grad_norm": 3.0273733139038086, |
|
"learning_rate": 4.5190713101160863e-05, |
|
"loss": 0.6834, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"grad_norm": 4.489946365356445, |
|
"learning_rate": 4.5138888888888894e-05, |
|
"loss": 0.6737, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 3.7686567164179103, |
|
"grad_norm": 14.911623001098633, |
|
"learning_rate": 4.508706467661692e-05, |
|
"loss": 0.7317, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 3.7873134328358207, |
|
"grad_norm": 18.18294906616211, |
|
"learning_rate": 4.503524046434495e-05, |
|
"loss": 1.1278, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 3.8059701492537314, |
|
"grad_norm": 19.4583797454834, |
|
"learning_rate": 4.498341625207297e-05, |
|
"loss": 1.29, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 3.824626865671642, |
|
"grad_norm": 5.924612522125244, |
|
"learning_rate": 4.4931592039800994e-05, |
|
"loss": 0.6149, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 3.843283582089552, |
|
"grad_norm": 9.307779312133789, |
|
"learning_rate": 4.4879767827529024e-05, |
|
"loss": 0.7793, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 3.861940298507463, |
|
"grad_norm": 4.041577339172363, |
|
"learning_rate": 4.482794361525705e-05, |
|
"loss": 0.5555, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 3.8805970149253732, |
|
"grad_norm": 12.132710456848145, |
|
"learning_rate": 4.477611940298508e-05, |
|
"loss": 1.5825, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 3.8992537313432836, |
|
"grad_norm": 11.172629356384277, |
|
"learning_rate": 4.472429519071311e-05, |
|
"loss": 1.2408, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 3.917910447761194, |
|
"grad_norm": 9.551191329956055, |
|
"learning_rate": 4.4672470978441125e-05, |
|
"loss": 1.2186, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 3.9365671641791042, |
|
"grad_norm": 6.64059591293335, |
|
"learning_rate": 4.4620646766169155e-05, |
|
"loss": 0.9661, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 3.955223880597015, |
|
"grad_norm": 11.107884407043457, |
|
"learning_rate": 4.4568822553897185e-05, |
|
"loss": 0.8848, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 3.9738805970149254, |
|
"grad_norm": 14.161197662353516, |
|
"learning_rate": 4.451699834162521e-05, |
|
"loss": 1.0005, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 3.9925373134328357, |
|
"grad_norm": 6.025814533233643, |
|
"learning_rate": 4.446517412935324e-05, |
|
"loss": 0.7212, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.685, |
|
"eval_f1_macro": 0.6471554397159933, |
|
"eval_f1_micro": 0.685, |
|
"eval_f1_weighted": 0.6601559323029912, |
|
"eval_loss": 1.3381353616714478, |
|
"eval_precision_macro": 0.6763538748832867, |
|
"eval_precision_micro": 0.685, |
|
"eval_precision_weighted": 0.6935892857142857, |
|
"eval_recall_macro": 0.6763305322128851, |
|
"eval_recall_micro": 0.685, |
|
"eval_recall_weighted": 0.685, |
|
"eval_runtime": 18.3933, |
|
"eval_samples_per_second": 10.873, |
|
"eval_steps_per_second": 0.707, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 4.0111940298507465, |
|
"grad_norm": 2.7835495471954346, |
|
"learning_rate": 4.441334991708126e-05, |
|
"loss": 0.6966, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 4.029850746268656, |
|
"grad_norm": 10.108399391174316, |
|
"learning_rate": 4.4361525704809285e-05, |
|
"loss": 1.1173, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 4.048507462686567, |
|
"grad_norm": 4.910768032073975, |
|
"learning_rate": 4.4309701492537316e-05, |
|
"loss": 0.6386, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 4.067164179104478, |
|
"grad_norm": 15.222392082214355, |
|
"learning_rate": 4.425787728026534e-05, |
|
"loss": 0.8312, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 4.085820895522388, |
|
"grad_norm": 5.983860969543457, |
|
"learning_rate": 4.420605306799337e-05, |
|
"loss": 0.5752, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 4.104477611940299, |
|
"grad_norm": 14.089235305786133, |
|
"learning_rate": 4.41542288557214e-05, |
|
"loss": 0.8314, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.123134328358209, |
|
"grad_norm": 6.404083251953125, |
|
"learning_rate": 4.410240464344942e-05, |
|
"loss": 0.9376, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 4.141791044776119, |
|
"grad_norm": 7.631033897399902, |
|
"learning_rate": 4.4050580431177446e-05, |
|
"loss": 0.8144, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 4.16044776119403, |
|
"grad_norm": 7.922863483428955, |
|
"learning_rate": 4.3998756218905476e-05, |
|
"loss": 0.5124, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 4.17910447761194, |
|
"grad_norm": 5.675821304321289, |
|
"learning_rate": 4.39469320066335e-05, |
|
"loss": 0.778, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 4.197761194029851, |
|
"grad_norm": 10.380525588989258, |
|
"learning_rate": 4.389510779436153e-05, |
|
"loss": 0.824, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 4.2164179104477615, |
|
"grad_norm": 10.827855110168457, |
|
"learning_rate": 4.384328358208955e-05, |
|
"loss": 0.7693, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 4.235074626865671, |
|
"grad_norm": 8.679563522338867, |
|
"learning_rate": 4.3791459369817584e-05, |
|
"loss": 0.7922, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 4.253731343283582, |
|
"grad_norm": 6.679693222045898, |
|
"learning_rate": 4.373963515754561e-05, |
|
"loss": 0.629, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 4.272388059701493, |
|
"grad_norm": 3.585435390472412, |
|
"learning_rate": 4.368781094527363e-05, |
|
"loss": 0.4874, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 4.291044776119403, |
|
"grad_norm": 16.56496238708496, |
|
"learning_rate": 4.363598673300166e-05, |
|
"loss": 0.9753, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 4.309701492537314, |
|
"grad_norm": 7.082263469696045, |
|
"learning_rate": 4.358416252072969e-05, |
|
"loss": 0.7295, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 4.3283582089552235, |
|
"grad_norm": 14.251839637756348, |
|
"learning_rate": 4.3532338308457714e-05, |
|
"loss": 0.4749, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 4.347014925373134, |
|
"grad_norm": 10.183996200561523, |
|
"learning_rate": 4.3480514096185744e-05, |
|
"loss": 0.9388, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 4.365671641791045, |
|
"grad_norm": 11.86069393157959, |
|
"learning_rate": 4.342868988391377e-05, |
|
"loss": 0.8015, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 4.384328358208955, |
|
"grad_norm": 8.476336479187012, |
|
"learning_rate": 4.337686567164179e-05, |
|
"loss": 0.8224, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 4.402985074626866, |
|
"grad_norm": 14.603717803955078, |
|
"learning_rate": 4.332504145936982e-05, |
|
"loss": 0.5892, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 4.4216417910447765, |
|
"grad_norm": 4.239728927612305, |
|
"learning_rate": 4.3273217247097845e-05, |
|
"loss": 0.5871, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 4.440298507462686, |
|
"grad_norm": 7.121862411499023, |
|
"learning_rate": 4.3221393034825875e-05, |
|
"loss": 0.6393, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 4.458955223880597, |
|
"grad_norm": 7.324426651000977, |
|
"learning_rate": 4.3169568822553905e-05, |
|
"loss": 0.6936, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 4.477611940298507, |
|
"grad_norm": 10.668092727661133, |
|
"learning_rate": 4.311774461028192e-05, |
|
"loss": 0.7822, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.496268656716418, |
|
"grad_norm": 8.423562049865723, |
|
"learning_rate": 4.306592039800995e-05, |
|
"loss": 0.6684, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 4.514925373134329, |
|
"grad_norm": 14.151291847229004, |
|
"learning_rate": 4.301409618573798e-05, |
|
"loss": 0.82, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 4.5335820895522385, |
|
"grad_norm": 8.986945152282715, |
|
"learning_rate": 4.2962271973466006e-05, |
|
"loss": 0.7146, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 4.552238805970149, |
|
"grad_norm": 15.039894104003906, |
|
"learning_rate": 4.2910447761194036e-05, |
|
"loss": 0.9461, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 4.57089552238806, |
|
"grad_norm": 14.114082336425781, |
|
"learning_rate": 4.285862354892206e-05, |
|
"loss": 0.7352, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 4.58955223880597, |
|
"grad_norm": 9.63955307006836, |
|
"learning_rate": 4.280679933665008e-05, |
|
"loss": 0.6686, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 4.608208955223881, |
|
"grad_norm": 9.240038871765137, |
|
"learning_rate": 4.275497512437811e-05, |
|
"loss": 0.966, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 4.6268656716417915, |
|
"grad_norm": 30.38275718688965, |
|
"learning_rate": 4.2703150912106136e-05, |
|
"loss": 0.6015, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 4.645522388059701, |
|
"grad_norm": 3.4407896995544434, |
|
"learning_rate": 4.2651326699834166e-05, |
|
"loss": 0.5758, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 4.664179104477612, |
|
"grad_norm": 2.6312341690063477, |
|
"learning_rate": 4.259950248756219e-05, |
|
"loss": 0.7366, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 4.682835820895522, |
|
"grad_norm": 5.770228862762451, |
|
"learning_rate": 4.254767827529022e-05, |
|
"loss": 0.5223, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 4.701492537313433, |
|
"grad_norm": 9.451111793518066, |
|
"learning_rate": 4.249585406301824e-05, |
|
"loss": 0.6489, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 4.720149253731344, |
|
"grad_norm": 9.007664680480957, |
|
"learning_rate": 4.244402985074627e-05, |
|
"loss": 0.6277, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 4.7388059701492535, |
|
"grad_norm": 11.020060539245605, |
|
"learning_rate": 4.23922056384743e-05, |
|
"loss": 0.5666, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 4.757462686567164, |
|
"grad_norm": 4.820965766906738, |
|
"learning_rate": 4.234038142620233e-05, |
|
"loss": 0.6379, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 4.776119402985074, |
|
"grad_norm": 7.916781902313232, |
|
"learning_rate": 4.228855721393035e-05, |
|
"loss": 0.7684, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 4.794776119402985, |
|
"grad_norm": 9.316835403442383, |
|
"learning_rate": 4.223673300165838e-05, |
|
"loss": 0.8076, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 4.813432835820896, |
|
"grad_norm": 11.205623626708984, |
|
"learning_rate": 4.2184908789386404e-05, |
|
"loss": 0.7485, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 4.832089552238806, |
|
"grad_norm": 9.189976692199707, |
|
"learning_rate": 4.213308457711443e-05, |
|
"loss": 0.8462, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 4.850746268656716, |
|
"grad_norm": 10.27412223815918, |
|
"learning_rate": 4.208126036484246e-05, |
|
"loss": 0.6273, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.869402985074627, |
|
"grad_norm": 9.638339042663574, |
|
"learning_rate": 4.202943615257048e-05, |
|
"loss": 0.7762, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 4.888059701492537, |
|
"grad_norm": 5.456606388092041, |
|
"learning_rate": 4.197761194029851e-05, |
|
"loss": 0.6509, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 4.906716417910448, |
|
"grad_norm": 8.314762115478516, |
|
"learning_rate": 4.192578772802654e-05, |
|
"loss": 0.644, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 4.925373134328359, |
|
"grad_norm": 5.1142072677612305, |
|
"learning_rate": 4.187396351575456e-05, |
|
"loss": 0.7036, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 4.9440298507462686, |
|
"grad_norm": 6.1798577308654785, |
|
"learning_rate": 4.182213930348259e-05, |
|
"loss": 0.4501, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 4.962686567164179, |
|
"grad_norm": 12.538804054260254, |
|
"learning_rate": 4.177031509121062e-05, |
|
"loss": 0.9835, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 4.981343283582089, |
|
"grad_norm": 11.114508628845215, |
|
"learning_rate": 4.171849087893864e-05, |
|
"loss": 0.4535, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 24.881195068359375, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.5743, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.69, |
|
"eval_f1_macro": 0.6552111287405404, |
|
"eval_f1_micro": 0.69, |
|
"eval_f1_weighted": 0.6724256854256855, |
|
"eval_loss": 1.1938023567199707, |
|
"eval_precision_macro": 0.687453314659197, |
|
"eval_precision_micro": 0.69, |
|
"eval_precision_weighted": 0.7043214285714287, |
|
"eval_recall_macro": 0.6736694677871148, |
|
"eval_recall_micro": 0.69, |
|
"eval_recall_weighted": 0.69, |
|
"eval_runtime": 20.8959, |
|
"eval_samples_per_second": 9.571, |
|
"eval_steps_per_second": 0.622, |
|
"step": 1340 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 5360, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 354552510105600.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|