cuwfnguyen's picture
Upload folder using huggingface_hub
cfb558d verified
{
"best_metric": 1.1938023567199707,
"best_model_checkpoint": "autotrain-v2v7o-9tu3d/checkpoint-1340",
"epoch": 5.0,
"eval_steps": 500,
"global_step": 1340,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.018656716417910446,
"grad_norm": 3.044177532196045,
"learning_rate": 4.6641791044776116e-07,
"loss": 3.5829,
"step": 5
},
{
"epoch": 0.03731343283582089,
"grad_norm": 3.1452090740203857,
"learning_rate": 9.328358208955223e-07,
"loss": 3.5707,
"step": 10
},
{
"epoch": 0.055970149253731345,
"grad_norm": 3.5051560401916504,
"learning_rate": 1.3992537313432837e-06,
"loss": 3.5587,
"step": 15
},
{
"epoch": 0.07462686567164178,
"grad_norm": 3.155848741531372,
"learning_rate": 1.8656716417910446e-06,
"loss": 3.5653,
"step": 20
},
{
"epoch": 0.09328358208955224,
"grad_norm": 3.025758743286133,
"learning_rate": 2.3320895522388064e-06,
"loss": 3.5639,
"step": 25
},
{
"epoch": 0.11194029850746269,
"grad_norm": 2.966825485229492,
"learning_rate": 2.7985074626865674e-06,
"loss": 3.5187,
"step": 30
},
{
"epoch": 0.13059701492537312,
"grad_norm": 2.870944023132324,
"learning_rate": 3.2649253731343283e-06,
"loss": 3.5382,
"step": 35
},
{
"epoch": 0.14925373134328357,
"grad_norm": 3.2777228355407715,
"learning_rate": 3.7313432835820893e-06,
"loss": 3.5311,
"step": 40
},
{
"epoch": 0.16791044776119404,
"grad_norm": 2.8916268348693848,
"learning_rate": 4.1977611940298515e-06,
"loss": 3.5348,
"step": 45
},
{
"epoch": 0.1865671641791045,
"grad_norm": 2.667680263519287,
"learning_rate": 4.664179104477613e-06,
"loss": 3.5209,
"step": 50
},
{
"epoch": 0.20522388059701493,
"grad_norm": 2.6136128902435303,
"learning_rate": 5.130597014925373e-06,
"loss": 3.5293,
"step": 55
},
{
"epoch": 0.22388059701492538,
"grad_norm": 3.2311267852783203,
"learning_rate": 5.597014925373135e-06,
"loss": 3.5536,
"step": 60
},
{
"epoch": 0.24253731343283583,
"grad_norm": 2.513153076171875,
"learning_rate": 6.063432835820896e-06,
"loss": 3.5286,
"step": 65
},
{
"epoch": 0.26119402985074625,
"grad_norm": 2.53420090675354,
"learning_rate": 6.529850746268657e-06,
"loss": 3.5305,
"step": 70
},
{
"epoch": 0.2798507462686567,
"grad_norm": 2.7235593795776367,
"learning_rate": 6.996268656716418e-06,
"loss": 3.5244,
"step": 75
},
{
"epoch": 0.29850746268656714,
"grad_norm": 2.621687889099121,
"learning_rate": 7.4626865671641785e-06,
"loss": 3.4981,
"step": 80
},
{
"epoch": 0.31716417910447764,
"grad_norm": 2.5116870403289795,
"learning_rate": 7.92910447761194e-06,
"loss": 3.5208,
"step": 85
},
{
"epoch": 0.3358208955223881,
"grad_norm": 2.6995580196380615,
"learning_rate": 8.395522388059703e-06,
"loss": 3.5069,
"step": 90
},
{
"epoch": 0.35447761194029853,
"grad_norm": 2.7225232124328613,
"learning_rate": 8.861940298507463e-06,
"loss": 3.517,
"step": 95
},
{
"epoch": 0.373134328358209,
"grad_norm": 2.9143073558807373,
"learning_rate": 9.328358208955226e-06,
"loss": 3.4939,
"step": 100
},
{
"epoch": 0.3917910447761194,
"grad_norm": 2.6115267276763916,
"learning_rate": 9.794776119402986e-06,
"loss": 3.4971,
"step": 105
},
{
"epoch": 0.41044776119402987,
"grad_norm": 2.9028003215789795,
"learning_rate": 1.0261194029850747e-05,
"loss": 3.5239,
"step": 110
},
{
"epoch": 0.4291044776119403,
"grad_norm": 2.7557551860809326,
"learning_rate": 1.0727611940298509e-05,
"loss": 3.4567,
"step": 115
},
{
"epoch": 0.44776119402985076,
"grad_norm": 3.2114148139953613,
"learning_rate": 1.119402985074627e-05,
"loss": 3.5006,
"step": 120
},
{
"epoch": 0.4664179104477612,
"grad_norm": 3.3313052654266357,
"learning_rate": 1.166044776119403e-05,
"loss": 3.4342,
"step": 125
},
{
"epoch": 0.48507462686567165,
"grad_norm": 3.8037962913513184,
"learning_rate": 1.2126865671641792e-05,
"loss": 3.5062,
"step": 130
},
{
"epoch": 0.503731343283582,
"grad_norm": 3.6135289669036865,
"learning_rate": 1.2593283582089551e-05,
"loss": 3.4769,
"step": 135
},
{
"epoch": 0.5223880597014925,
"grad_norm": 3.9982452392578125,
"learning_rate": 1.3059701492537313e-05,
"loss": 3.5184,
"step": 140
},
{
"epoch": 0.5410447761194029,
"grad_norm": 4.104573726654053,
"learning_rate": 1.3526119402985074e-05,
"loss": 3.4843,
"step": 145
},
{
"epoch": 0.5597014925373134,
"grad_norm": 4.863430976867676,
"learning_rate": 1.3992537313432836e-05,
"loss": 3.4106,
"step": 150
},
{
"epoch": 0.5783582089552238,
"grad_norm": 4.079490661621094,
"learning_rate": 1.4458955223880596e-05,
"loss": 3.4743,
"step": 155
},
{
"epoch": 0.5970149253731343,
"grad_norm": 3.870924949645996,
"learning_rate": 1.4925373134328357e-05,
"loss": 3.3934,
"step": 160
},
{
"epoch": 0.6156716417910447,
"grad_norm": 3.8417205810546875,
"learning_rate": 1.539179104477612e-05,
"loss": 3.4258,
"step": 165
},
{
"epoch": 0.6343283582089553,
"grad_norm": 4.541757583618164,
"learning_rate": 1.585820895522388e-05,
"loss": 3.4659,
"step": 170
},
{
"epoch": 0.6529850746268657,
"grad_norm": 4.8343424797058105,
"learning_rate": 1.6324626865671644e-05,
"loss": 3.4103,
"step": 175
},
{
"epoch": 0.6716417910447762,
"grad_norm": 5.048968315124512,
"learning_rate": 1.6791044776119406e-05,
"loss": 3.459,
"step": 180
},
{
"epoch": 0.6902985074626866,
"grad_norm": 4.90827751159668,
"learning_rate": 1.7257462686567165e-05,
"loss": 3.4152,
"step": 185
},
{
"epoch": 0.7089552238805971,
"grad_norm": 4.624119758605957,
"learning_rate": 1.7723880597014927e-05,
"loss": 3.4129,
"step": 190
},
{
"epoch": 0.7276119402985075,
"grad_norm": 4.981058120727539,
"learning_rate": 1.819029850746269e-05,
"loss": 3.4204,
"step": 195
},
{
"epoch": 0.746268656716418,
"grad_norm": 4.767482280731201,
"learning_rate": 1.865671641791045e-05,
"loss": 3.3169,
"step": 200
},
{
"epoch": 0.7649253731343284,
"grad_norm": 4.601747512817383,
"learning_rate": 1.912313432835821e-05,
"loss": 3.278,
"step": 205
},
{
"epoch": 0.7835820895522388,
"grad_norm": 6.704329013824463,
"learning_rate": 1.9589552238805972e-05,
"loss": 3.3873,
"step": 210
},
{
"epoch": 0.8022388059701493,
"grad_norm": 5.003252983093262,
"learning_rate": 2.0055970149253735e-05,
"loss": 3.3281,
"step": 215
},
{
"epoch": 0.8208955223880597,
"grad_norm": 5.126171588897705,
"learning_rate": 2.0522388059701493e-05,
"loss": 3.2048,
"step": 220
},
{
"epoch": 0.8395522388059702,
"grad_norm": 5.796511173248291,
"learning_rate": 2.0988805970149256e-05,
"loss": 3.2639,
"step": 225
},
{
"epoch": 0.8582089552238806,
"grad_norm": 5.77567720413208,
"learning_rate": 2.1455223880597018e-05,
"loss": 3.2442,
"step": 230
},
{
"epoch": 0.8768656716417911,
"grad_norm": 5.419305324554443,
"learning_rate": 2.1921641791044777e-05,
"loss": 3.123,
"step": 235
},
{
"epoch": 0.8955223880597015,
"grad_norm": 5.75105619430542,
"learning_rate": 2.238805970149254e-05,
"loss": 3.0977,
"step": 240
},
{
"epoch": 0.914179104477612,
"grad_norm": 8.254322052001953,
"learning_rate": 2.28544776119403e-05,
"loss": 3.086,
"step": 245
},
{
"epoch": 0.9328358208955224,
"grad_norm": 6.343637943267822,
"learning_rate": 2.332089552238806e-05,
"loss": 3.3336,
"step": 250
},
{
"epoch": 0.9514925373134329,
"grad_norm": 5.593357563018799,
"learning_rate": 2.3787313432835822e-05,
"loss": 3.2817,
"step": 255
},
{
"epoch": 0.9701492537313433,
"grad_norm": 8.842999458312988,
"learning_rate": 2.4253731343283584e-05,
"loss": 3.1676,
"step": 260
},
{
"epoch": 0.9888059701492538,
"grad_norm": 8.3274507522583,
"learning_rate": 2.4720149253731347e-05,
"loss": 3.1462,
"step": 265
},
{
"epoch": 1.0,
"eval_accuracy": 0.155,
"eval_f1_macro": 0.09100175187131708,
"eval_f1_micro": 0.155,
"eval_f1_weighted": 0.08955112279025322,
"eval_loss": 3.2174994945526123,
"eval_precision_macro": 0.07134410305142012,
"eval_precision_micro": 0.155,
"eval_precision_weighted": 0.07162192482314433,
"eval_recall_macro": 0.16519607843137257,
"eval_recall_micro": 0.155,
"eval_recall_weighted": 0.155,
"eval_runtime": 16.5066,
"eval_samples_per_second": 12.116,
"eval_steps_per_second": 0.788,
"step": 268
},
{
"epoch": 1.007462686567164,
"grad_norm": 6.375778675079346,
"learning_rate": 2.5186567164179102e-05,
"loss": 3.0937,
"step": 270
},
{
"epoch": 1.0261194029850746,
"grad_norm": 9.297365188598633,
"learning_rate": 2.5652985074626868e-05,
"loss": 3.0123,
"step": 275
},
{
"epoch": 1.044776119402985,
"grad_norm": 6.8317646980285645,
"learning_rate": 2.6119402985074626e-05,
"loss": 3.0876,
"step": 280
},
{
"epoch": 1.0634328358208955,
"grad_norm": 6.641528606414795,
"learning_rate": 2.658582089552239e-05,
"loss": 2.9694,
"step": 285
},
{
"epoch": 1.0820895522388059,
"grad_norm": 6.846834659576416,
"learning_rate": 2.7052238805970147e-05,
"loss": 3.0272,
"step": 290
},
{
"epoch": 1.1007462686567164,
"grad_norm": 6.250601291656494,
"learning_rate": 2.7518656716417913e-05,
"loss": 2.9646,
"step": 295
},
{
"epoch": 1.1194029850746268,
"grad_norm": 6.11965274810791,
"learning_rate": 2.7985074626865672e-05,
"loss": 2.8305,
"step": 300
},
{
"epoch": 1.1380597014925373,
"grad_norm": 7.502304553985596,
"learning_rate": 2.8451492537313434e-05,
"loss": 2.7931,
"step": 305
},
{
"epoch": 1.1567164179104479,
"grad_norm": 6.00736665725708,
"learning_rate": 2.8917910447761193e-05,
"loss": 2.8108,
"step": 310
},
{
"epoch": 1.1753731343283582,
"grad_norm": 7.524813175201416,
"learning_rate": 2.9384328358208955e-05,
"loss": 2.8187,
"step": 315
},
{
"epoch": 1.1940298507462686,
"grad_norm": 7.996919631958008,
"learning_rate": 2.9850746268656714e-05,
"loss": 2.8544,
"step": 320
},
{
"epoch": 1.212686567164179,
"grad_norm": 7.103128910064697,
"learning_rate": 3.031716417910448e-05,
"loss": 2.8545,
"step": 325
},
{
"epoch": 1.2313432835820897,
"grad_norm": 5.705358505249023,
"learning_rate": 3.078358208955224e-05,
"loss": 2.5292,
"step": 330
},
{
"epoch": 1.25,
"grad_norm": 6.714156150817871,
"learning_rate": 3.125e-05,
"loss": 2.5716,
"step": 335
},
{
"epoch": 1.2686567164179103,
"grad_norm": 7.388309955596924,
"learning_rate": 3.171641791044776e-05,
"loss": 2.8626,
"step": 340
},
{
"epoch": 1.287313432835821,
"grad_norm": 6.7939934730529785,
"learning_rate": 3.2182835820895525e-05,
"loss": 2.7215,
"step": 345
},
{
"epoch": 1.3059701492537314,
"grad_norm": 6.774124622344971,
"learning_rate": 3.264925373134329e-05,
"loss": 2.7824,
"step": 350
},
{
"epoch": 1.3246268656716418,
"grad_norm": 7.475742340087891,
"learning_rate": 3.311567164179105e-05,
"loss": 2.7855,
"step": 355
},
{
"epoch": 1.3432835820895521,
"grad_norm": 6.67882776260376,
"learning_rate": 3.358208955223881e-05,
"loss": 2.7686,
"step": 360
},
{
"epoch": 1.3619402985074627,
"grad_norm": 6.921255111694336,
"learning_rate": 3.404850746268657e-05,
"loss": 2.6471,
"step": 365
},
{
"epoch": 1.3805970149253732,
"grad_norm": 5.301546573638916,
"learning_rate": 3.451492537313433e-05,
"loss": 2.343,
"step": 370
},
{
"epoch": 1.3992537313432836,
"grad_norm": 8.802115440368652,
"learning_rate": 3.498134328358209e-05,
"loss": 2.6459,
"step": 375
},
{
"epoch": 1.417910447761194,
"grad_norm": 10.76266860961914,
"learning_rate": 3.5447761194029854e-05,
"loss": 2.245,
"step": 380
},
{
"epoch": 1.4365671641791045,
"grad_norm": 6.216732978820801,
"learning_rate": 3.5914179104477616e-05,
"loss": 2.1281,
"step": 385
},
{
"epoch": 1.455223880597015,
"grad_norm": 7.1129231452941895,
"learning_rate": 3.638059701492538e-05,
"loss": 2.6988,
"step": 390
},
{
"epoch": 1.4738805970149254,
"grad_norm": 6.50852108001709,
"learning_rate": 3.6847014925373134e-05,
"loss": 2.3069,
"step": 395
},
{
"epoch": 1.4925373134328357,
"grad_norm": 7.611819744110107,
"learning_rate": 3.73134328358209e-05,
"loss": 2.3428,
"step": 400
},
{
"epoch": 1.5111940298507462,
"grad_norm": 8.59446907043457,
"learning_rate": 3.777985074626866e-05,
"loss": 2.278,
"step": 405
},
{
"epoch": 1.5298507462686568,
"grad_norm": 9.75235652923584,
"learning_rate": 3.824626865671642e-05,
"loss": 2.1313,
"step": 410
},
{
"epoch": 1.5485074626865671,
"grad_norm": 7.297292709350586,
"learning_rate": 3.871268656716418e-05,
"loss": 2.2863,
"step": 415
},
{
"epoch": 1.5671641791044775,
"grad_norm": 9.046923637390137,
"learning_rate": 3.9179104477611945e-05,
"loss": 2.5253,
"step": 420
},
{
"epoch": 1.585820895522388,
"grad_norm": 7.503407955169678,
"learning_rate": 3.96455223880597e-05,
"loss": 2.2857,
"step": 425
},
{
"epoch": 1.6044776119402986,
"grad_norm": 7.656423568725586,
"learning_rate": 4.011194029850747e-05,
"loss": 2.5456,
"step": 430
},
{
"epoch": 1.623134328358209,
"grad_norm": 7.632299423217773,
"learning_rate": 4.0578358208955225e-05,
"loss": 2.6645,
"step": 435
},
{
"epoch": 1.6417910447761193,
"grad_norm": 8.021852493286133,
"learning_rate": 4.104477611940299e-05,
"loss": 2.1135,
"step": 440
},
{
"epoch": 1.6604477611940298,
"grad_norm": 10.39423656463623,
"learning_rate": 4.151119402985075e-05,
"loss": 2.1937,
"step": 445
},
{
"epoch": 1.6791044776119404,
"grad_norm": 12.216695785522461,
"learning_rate": 4.197761194029851e-05,
"loss": 1.9825,
"step": 450
},
{
"epoch": 1.6977611940298507,
"grad_norm": 7.088036060333252,
"learning_rate": 4.244402985074627e-05,
"loss": 1.7742,
"step": 455
},
{
"epoch": 1.716417910447761,
"grad_norm": 7.268665313720703,
"learning_rate": 4.2910447761194036e-05,
"loss": 2.1474,
"step": 460
},
{
"epoch": 1.7350746268656716,
"grad_norm": 9.286911010742188,
"learning_rate": 4.337686567164179e-05,
"loss": 2.163,
"step": 465
},
{
"epoch": 1.7537313432835822,
"grad_norm": 10.1356782913208,
"learning_rate": 4.384328358208955e-05,
"loss": 2.0548,
"step": 470
},
{
"epoch": 1.7723880597014925,
"grad_norm": 10.506839752197266,
"learning_rate": 4.4309701492537316e-05,
"loss": 2.0674,
"step": 475
},
{
"epoch": 1.7910447761194028,
"grad_norm": 7.393703937530518,
"learning_rate": 4.477611940298508e-05,
"loss": 2.1359,
"step": 480
},
{
"epoch": 1.8097014925373134,
"grad_norm": 8.782050132751465,
"learning_rate": 4.524253731343284e-05,
"loss": 2.0858,
"step": 485
},
{
"epoch": 1.828358208955224,
"grad_norm": 8.55767822265625,
"learning_rate": 4.57089552238806e-05,
"loss": 2.1815,
"step": 490
},
{
"epoch": 1.8470149253731343,
"grad_norm": 6.715104579925537,
"learning_rate": 4.617537313432836e-05,
"loss": 2.1025,
"step": 495
},
{
"epoch": 1.8656716417910446,
"grad_norm": 10.000481605529785,
"learning_rate": 4.664179104477612e-05,
"loss": 1.9817,
"step": 500
},
{
"epoch": 1.8843283582089554,
"grad_norm": 7.614371299743652,
"learning_rate": 4.710820895522388e-05,
"loss": 2.2921,
"step": 505
},
{
"epoch": 1.9029850746268657,
"grad_norm": 7.8831658363342285,
"learning_rate": 4.7574626865671644e-05,
"loss": 1.6008,
"step": 510
},
{
"epoch": 1.921641791044776,
"grad_norm": 8.811071395874023,
"learning_rate": 4.8041044776119407e-05,
"loss": 2.131,
"step": 515
},
{
"epoch": 1.9402985074626866,
"grad_norm": 8.217741966247559,
"learning_rate": 4.850746268656717e-05,
"loss": 2.0112,
"step": 520
},
{
"epoch": 1.9589552238805972,
"grad_norm": 8.56834602355957,
"learning_rate": 4.8973880597014924e-05,
"loss": 1.8353,
"step": 525
},
{
"epoch": 1.9776119402985075,
"grad_norm": 12.720284461975098,
"learning_rate": 4.944029850746269e-05,
"loss": 1.5857,
"step": 530
},
{
"epoch": 1.9962686567164178,
"grad_norm": 10.004925727844238,
"learning_rate": 4.990671641791045e-05,
"loss": 2.0364,
"step": 535
},
{
"epoch": 2.0,
"eval_accuracy": 0.425,
"eval_f1_macro": 0.34724867475550414,
"eval_f1_micro": 0.425,
"eval_f1_weighted": 0.35814368775359484,
"eval_loss": 2.165677785873413,
"eval_precision_macro": 0.36154654169360045,
"eval_precision_micro": 0.425,
"eval_precision_weighted": 0.3759306318681318,
"eval_recall_macro": 0.423249299719888,
"eval_recall_micro": 0.425,
"eval_recall_weighted": 0.425,
"eval_runtime": 22.9282,
"eval_samples_per_second": 8.723,
"eval_steps_per_second": 0.567,
"step": 536
},
{
"epoch": 2.014925373134328,
"grad_norm": 7.278439044952393,
"learning_rate": 4.995854063018242e-05,
"loss": 1.297,
"step": 540
},
{
"epoch": 2.033582089552239,
"grad_norm": 13.718267440795898,
"learning_rate": 4.990671641791045e-05,
"loss": 1.9014,
"step": 545
},
{
"epoch": 2.0522388059701493,
"grad_norm": 11.233081817626953,
"learning_rate": 4.985489220563848e-05,
"loss": 1.9761,
"step": 550
},
{
"epoch": 2.0708955223880596,
"grad_norm": 11.126688003540039,
"learning_rate": 4.98030679933665e-05,
"loss": 1.7989,
"step": 555
},
{
"epoch": 2.08955223880597,
"grad_norm": 10.091024398803711,
"learning_rate": 4.975124378109453e-05,
"loss": 1.4646,
"step": 560
},
{
"epoch": 2.1082089552238807,
"grad_norm": 8.601826667785645,
"learning_rate": 4.9699419568822556e-05,
"loss": 1.8275,
"step": 565
},
{
"epoch": 2.126865671641791,
"grad_norm": 12.649139404296875,
"learning_rate": 4.964759535655058e-05,
"loss": 1.5706,
"step": 570
},
{
"epoch": 2.1455223880597014,
"grad_norm": 12.75953483581543,
"learning_rate": 4.959577114427861e-05,
"loss": 1.8285,
"step": 575
},
{
"epoch": 2.1641791044776117,
"grad_norm": 9.071000099182129,
"learning_rate": 4.954394693200663e-05,
"loss": 1.6905,
"step": 580
},
{
"epoch": 2.1828358208955225,
"grad_norm": 9.372791290283203,
"learning_rate": 4.949212271973466e-05,
"loss": 1.8672,
"step": 585
},
{
"epoch": 2.201492537313433,
"grad_norm": 9.870670318603516,
"learning_rate": 4.944029850746269e-05,
"loss": 1.6721,
"step": 590
},
{
"epoch": 2.220149253731343,
"grad_norm": 7.243556976318359,
"learning_rate": 4.9388474295190717e-05,
"loss": 1.6255,
"step": 595
},
{
"epoch": 2.2388059701492535,
"grad_norm": 5.97638463973999,
"learning_rate": 4.933665008291874e-05,
"loss": 1.6361,
"step": 600
},
{
"epoch": 2.2574626865671643,
"grad_norm": 10.749024391174316,
"learning_rate": 4.928482587064677e-05,
"loss": 1.8244,
"step": 605
},
{
"epoch": 2.2761194029850746,
"grad_norm": 5.7718186378479,
"learning_rate": 4.9233001658374794e-05,
"loss": 1.2545,
"step": 610
},
{
"epoch": 2.294776119402985,
"grad_norm": 11.37286376953125,
"learning_rate": 4.9181177446102824e-05,
"loss": 1.7528,
"step": 615
},
{
"epoch": 2.3134328358208958,
"grad_norm": 10.693103790283203,
"learning_rate": 4.912935323383085e-05,
"loss": 1.6237,
"step": 620
},
{
"epoch": 2.332089552238806,
"grad_norm": 8.764638900756836,
"learning_rate": 4.907752902155888e-05,
"loss": 1.6663,
"step": 625
},
{
"epoch": 2.3507462686567164,
"grad_norm": 6.239055156707764,
"learning_rate": 4.90257048092869e-05,
"loss": 1.473,
"step": 630
},
{
"epoch": 2.3694029850746268,
"grad_norm": 12.444618225097656,
"learning_rate": 4.8973880597014924e-05,
"loss": 1.6951,
"step": 635
},
{
"epoch": 2.388059701492537,
"grad_norm": 10.495733261108398,
"learning_rate": 4.8922056384742954e-05,
"loss": 1.362,
"step": 640
},
{
"epoch": 2.406716417910448,
"grad_norm": 6.903144836425781,
"learning_rate": 4.887023217247098e-05,
"loss": 1.3483,
"step": 645
},
{
"epoch": 2.425373134328358,
"grad_norm": 8.9871826171875,
"learning_rate": 4.881840796019901e-05,
"loss": 1.4329,
"step": 650
},
{
"epoch": 2.4440298507462686,
"grad_norm": 12.040112495422363,
"learning_rate": 4.876658374792704e-05,
"loss": 1.7239,
"step": 655
},
{
"epoch": 2.4626865671641793,
"grad_norm": 12.604753494262695,
"learning_rate": 4.8714759535655055e-05,
"loss": 1.0981,
"step": 660
},
{
"epoch": 2.4813432835820897,
"grad_norm": 8.3511323928833,
"learning_rate": 4.8662935323383085e-05,
"loss": 1.5905,
"step": 665
},
{
"epoch": 2.5,
"grad_norm": 6.789451599121094,
"learning_rate": 4.8611111111111115e-05,
"loss": 1.283,
"step": 670
},
{
"epoch": 2.5186567164179103,
"grad_norm": 6.545684337615967,
"learning_rate": 4.855928689883914e-05,
"loss": 1.3954,
"step": 675
},
{
"epoch": 2.5373134328358207,
"grad_norm": 11.606388092041016,
"learning_rate": 4.850746268656717e-05,
"loss": 1.6337,
"step": 680
},
{
"epoch": 2.5559701492537314,
"grad_norm": 6.834081172943115,
"learning_rate": 4.845563847429519e-05,
"loss": 1.0763,
"step": 685
},
{
"epoch": 2.574626865671642,
"grad_norm": 9.194738388061523,
"learning_rate": 4.8403814262023216e-05,
"loss": 1.3672,
"step": 690
},
{
"epoch": 2.593283582089552,
"grad_norm": 7.910974025726318,
"learning_rate": 4.8351990049751246e-05,
"loss": 1.2458,
"step": 695
},
{
"epoch": 2.611940298507463,
"grad_norm": 8.25942611694336,
"learning_rate": 4.830016583747927e-05,
"loss": 1.4986,
"step": 700
},
{
"epoch": 2.6305970149253732,
"grad_norm": 5.547841548919678,
"learning_rate": 4.82483416252073e-05,
"loss": 1.488,
"step": 705
},
{
"epoch": 2.6492537313432836,
"grad_norm": 8.00507640838623,
"learning_rate": 4.819651741293533e-05,
"loss": 1.5729,
"step": 710
},
{
"epoch": 2.667910447761194,
"grad_norm": 10.959121704101562,
"learning_rate": 4.814469320066335e-05,
"loss": 1.393,
"step": 715
},
{
"epoch": 2.6865671641791042,
"grad_norm": 12.941998481750488,
"learning_rate": 4.8092868988391376e-05,
"loss": 1.6623,
"step": 720
},
{
"epoch": 2.705223880597015,
"grad_norm": 10.971231460571289,
"learning_rate": 4.8041044776119407e-05,
"loss": 1.2405,
"step": 725
},
{
"epoch": 2.7238805970149254,
"grad_norm": 6.511988639831543,
"learning_rate": 4.798922056384743e-05,
"loss": 1.4053,
"step": 730
},
{
"epoch": 2.7425373134328357,
"grad_norm": 16.730091094970703,
"learning_rate": 4.793739635157546e-05,
"loss": 1.6276,
"step": 735
},
{
"epoch": 2.7611940298507465,
"grad_norm": 8.781760215759277,
"learning_rate": 4.7885572139303484e-05,
"loss": 1.4323,
"step": 740
},
{
"epoch": 2.779850746268657,
"grad_norm": 8.811301231384277,
"learning_rate": 4.7833747927031514e-05,
"loss": 1.3947,
"step": 745
},
{
"epoch": 2.798507462686567,
"grad_norm": 6.631250381469727,
"learning_rate": 4.778192371475954e-05,
"loss": 1.3312,
"step": 750
},
{
"epoch": 2.8171641791044775,
"grad_norm": 13.198403358459473,
"learning_rate": 4.773009950248756e-05,
"loss": 1.3845,
"step": 755
},
{
"epoch": 2.835820895522388,
"grad_norm": 13.531638145446777,
"learning_rate": 4.767827529021559e-05,
"loss": 1.6808,
"step": 760
},
{
"epoch": 2.8544776119402986,
"grad_norm": 8.421935081481934,
"learning_rate": 4.762645107794362e-05,
"loss": 1.3985,
"step": 765
},
{
"epoch": 2.873134328358209,
"grad_norm": 11.863781929016113,
"learning_rate": 4.7574626865671644e-05,
"loss": 1.3896,
"step": 770
},
{
"epoch": 2.8917910447761193,
"grad_norm": 12.868813514709473,
"learning_rate": 4.7522802653399674e-05,
"loss": 1.5119,
"step": 775
},
{
"epoch": 2.91044776119403,
"grad_norm": 9.192615509033203,
"learning_rate": 4.74709784411277e-05,
"loss": 1.2569,
"step": 780
},
{
"epoch": 2.9291044776119404,
"grad_norm": 6.196342945098877,
"learning_rate": 4.741915422885572e-05,
"loss": 1.0811,
"step": 785
},
{
"epoch": 2.9477611940298507,
"grad_norm": 13.614799499511719,
"learning_rate": 4.736733001658375e-05,
"loss": 1.298,
"step": 790
},
{
"epoch": 2.966417910447761,
"grad_norm": 8.10826301574707,
"learning_rate": 4.7315505804311775e-05,
"loss": 1.152,
"step": 795
},
{
"epoch": 2.9850746268656714,
"grad_norm": 15.173196792602539,
"learning_rate": 4.7263681592039805e-05,
"loss": 1.5199,
"step": 800
},
{
"epoch": 3.0,
"eval_accuracy": 0.545,
"eval_f1_macro": 0.4674843775735156,
"eval_f1_micro": 0.545,
"eval_f1_weighted": 0.4795885366054804,
"eval_loss": 1.6110626459121704,
"eval_precision_macro": 0.48627026568203036,
"eval_precision_micro": 0.545,
"eval_precision_weighted": 0.5079666305916306,
"eval_recall_macro": 0.5418767507002801,
"eval_recall_micro": 0.545,
"eval_recall_weighted": 0.545,
"eval_runtime": 16.2687,
"eval_samples_per_second": 12.294,
"eval_steps_per_second": 0.799,
"step": 804
},
{
"epoch": 3.003731343283582,
"grad_norm": 6.815779209136963,
"learning_rate": 4.7211857379767835e-05,
"loss": 0.8911,
"step": 805
},
{
"epoch": 3.0223880597014925,
"grad_norm": 8.526626586914062,
"learning_rate": 4.716003316749585e-05,
"loss": 1.2486,
"step": 810
},
{
"epoch": 3.041044776119403,
"grad_norm": 13.612601280212402,
"learning_rate": 4.710820895522388e-05,
"loss": 1.0458,
"step": 815
},
{
"epoch": 3.0597014925373136,
"grad_norm": 11.703157424926758,
"learning_rate": 4.705638474295191e-05,
"loss": 1.0918,
"step": 820
},
{
"epoch": 3.078358208955224,
"grad_norm": 12.684816360473633,
"learning_rate": 4.7004560530679936e-05,
"loss": 1.3883,
"step": 825
},
{
"epoch": 3.0970149253731343,
"grad_norm": 6.267882823944092,
"learning_rate": 4.6952736318407966e-05,
"loss": 0.951,
"step": 830
},
{
"epoch": 3.1156716417910446,
"grad_norm": 6.348490238189697,
"learning_rate": 4.690091210613599e-05,
"loss": 1.1257,
"step": 835
},
{
"epoch": 3.1343283582089554,
"grad_norm": 15.914472579956055,
"learning_rate": 4.684908789386401e-05,
"loss": 1.2884,
"step": 840
},
{
"epoch": 3.1529850746268657,
"grad_norm": 8.568007469177246,
"learning_rate": 4.679726368159204e-05,
"loss": 1.1261,
"step": 845
},
{
"epoch": 3.171641791044776,
"grad_norm": 12.23885440826416,
"learning_rate": 4.6745439469320066e-05,
"loss": 1.0198,
"step": 850
},
{
"epoch": 3.1902985074626864,
"grad_norm": 18.287586212158203,
"learning_rate": 4.6693615257048096e-05,
"loss": 0.9699,
"step": 855
},
{
"epoch": 3.208955223880597,
"grad_norm": 6.615917682647705,
"learning_rate": 4.664179104477612e-05,
"loss": 1.0768,
"step": 860
},
{
"epoch": 3.2276119402985075,
"grad_norm": 4.780862808227539,
"learning_rate": 4.658996683250415e-05,
"loss": 0.8213,
"step": 865
},
{
"epoch": 3.246268656716418,
"grad_norm": 12.68826675415039,
"learning_rate": 4.6538142620232173e-05,
"loss": 1.1507,
"step": 870
},
{
"epoch": 3.264925373134328,
"grad_norm": 6.729078769683838,
"learning_rate": 4.64863184079602e-05,
"loss": 1.0932,
"step": 875
},
{
"epoch": 3.283582089552239,
"grad_norm": 7.569777488708496,
"learning_rate": 4.643449419568823e-05,
"loss": 1.0546,
"step": 880
},
{
"epoch": 3.3022388059701493,
"grad_norm": 11.058488845825195,
"learning_rate": 4.638266998341626e-05,
"loss": 1.1409,
"step": 885
},
{
"epoch": 3.3208955223880596,
"grad_norm": 3.7507197856903076,
"learning_rate": 4.633084577114428e-05,
"loss": 0.8494,
"step": 890
},
{
"epoch": 3.33955223880597,
"grad_norm": 7.260571479797363,
"learning_rate": 4.627902155887231e-05,
"loss": 0.9837,
"step": 895
},
{
"epoch": 3.3582089552238807,
"grad_norm": 10.02383041381836,
"learning_rate": 4.6227197346600334e-05,
"loss": 1.1109,
"step": 900
},
{
"epoch": 3.376865671641791,
"grad_norm": 13.254716873168945,
"learning_rate": 4.617537313432836e-05,
"loss": 1.1479,
"step": 905
},
{
"epoch": 3.3955223880597014,
"grad_norm": 10.84252643585205,
"learning_rate": 4.612354892205639e-05,
"loss": 1.5116,
"step": 910
},
{
"epoch": 3.4141791044776117,
"grad_norm": 9.479592323303223,
"learning_rate": 4.607172470978441e-05,
"loss": 1.3419,
"step": 915
},
{
"epoch": 3.4328358208955225,
"grad_norm": 11.758341789245605,
"learning_rate": 4.601990049751244e-05,
"loss": 1.0607,
"step": 920
},
{
"epoch": 3.451492537313433,
"grad_norm": 12.16956901550293,
"learning_rate": 4.596807628524047e-05,
"loss": 1.2986,
"step": 925
},
{
"epoch": 3.470149253731343,
"grad_norm": 16.24034309387207,
"learning_rate": 4.591625207296849e-05,
"loss": 0.9493,
"step": 930
},
{
"epoch": 3.4888059701492535,
"grad_norm": 5.806056499481201,
"learning_rate": 4.586442786069652e-05,
"loss": 0.854,
"step": 935
},
{
"epoch": 3.5074626865671643,
"grad_norm": 31.129661560058594,
"learning_rate": 4.581260364842455e-05,
"loss": 0.9267,
"step": 940
},
{
"epoch": 3.5261194029850746,
"grad_norm": 10.927783012390137,
"learning_rate": 4.576077943615257e-05,
"loss": 0.9969,
"step": 945
},
{
"epoch": 3.544776119402985,
"grad_norm": 19.682039260864258,
"learning_rate": 4.57089552238806e-05,
"loss": 0.9038,
"step": 950
},
{
"epoch": 3.5634328358208958,
"grad_norm": 12.5592679977417,
"learning_rate": 4.5657131011608626e-05,
"loss": 1.1389,
"step": 955
},
{
"epoch": 3.582089552238806,
"grad_norm": 10.561615943908691,
"learning_rate": 4.560530679933665e-05,
"loss": 0.8216,
"step": 960
},
{
"epoch": 3.6007462686567164,
"grad_norm": 6.979230880737305,
"learning_rate": 4.555348258706468e-05,
"loss": 0.968,
"step": 965
},
{
"epoch": 3.6194029850746268,
"grad_norm": 8.147384643554688,
"learning_rate": 4.55016583747927e-05,
"loss": 1.0937,
"step": 970
},
{
"epoch": 3.638059701492537,
"grad_norm": 14.14340591430664,
"learning_rate": 4.544983416252073e-05,
"loss": 1.1813,
"step": 975
},
{
"epoch": 3.656716417910448,
"grad_norm": 10.942938804626465,
"learning_rate": 4.539800995024876e-05,
"loss": 0.9468,
"step": 980
},
{
"epoch": 3.675373134328358,
"grad_norm": 7.3539838790893555,
"learning_rate": 4.5346185737976786e-05,
"loss": 0.7761,
"step": 985
},
{
"epoch": 3.6940298507462686,
"grad_norm": 5.7556843757629395,
"learning_rate": 4.529436152570481e-05,
"loss": 0.848,
"step": 990
},
{
"epoch": 3.7126865671641793,
"grad_norm": 7.527775287628174,
"learning_rate": 4.524253731343284e-05,
"loss": 0.9969,
"step": 995
},
{
"epoch": 3.7313432835820897,
"grad_norm": 3.0273733139038086,
"learning_rate": 4.5190713101160863e-05,
"loss": 0.6834,
"step": 1000
},
{
"epoch": 3.75,
"grad_norm": 4.489946365356445,
"learning_rate": 4.5138888888888894e-05,
"loss": 0.6737,
"step": 1005
},
{
"epoch": 3.7686567164179103,
"grad_norm": 14.911623001098633,
"learning_rate": 4.508706467661692e-05,
"loss": 0.7317,
"step": 1010
},
{
"epoch": 3.7873134328358207,
"grad_norm": 18.18294906616211,
"learning_rate": 4.503524046434495e-05,
"loss": 1.1278,
"step": 1015
},
{
"epoch": 3.8059701492537314,
"grad_norm": 19.4583797454834,
"learning_rate": 4.498341625207297e-05,
"loss": 1.29,
"step": 1020
},
{
"epoch": 3.824626865671642,
"grad_norm": 5.924612522125244,
"learning_rate": 4.4931592039800994e-05,
"loss": 0.6149,
"step": 1025
},
{
"epoch": 3.843283582089552,
"grad_norm": 9.307779312133789,
"learning_rate": 4.4879767827529024e-05,
"loss": 0.7793,
"step": 1030
},
{
"epoch": 3.861940298507463,
"grad_norm": 4.041577339172363,
"learning_rate": 4.482794361525705e-05,
"loss": 0.5555,
"step": 1035
},
{
"epoch": 3.8805970149253732,
"grad_norm": 12.132710456848145,
"learning_rate": 4.477611940298508e-05,
"loss": 1.5825,
"step": 1040
},
{
"epoch": 3.8992537313432836,
"grad_norm": 11.172629356384277,
"learning_rate": 4.472429519071311e-05,
"loss": 1.2408,
"step": 1045
},
{
"epoch": 3.917910447761194,
"grad_norm": 9.551191329956055,
"learning_rate": 4.4672470978441125e-05,
"loss": 1.2186,
"step": 1050
},
{
"epoch": 3.9365671641791042,
"grad_norm": 6.64059591293335,
"learning_rate": 4.4620646766169155e-05,
"loss": 0.9661,
"step": 1055
},
{
"epoch": 3.955223880597015,
"grad_norm": 11.107884407043457,
"learning_rate": 4.4568822553897185e-05,
"loss": 0.8848,
"step": 1060
},
{
"epoch": 3.9738805970149254,
"grad_norm": 14.161197662353516,
"learning_rate": 4.451699834162521e-05,
"loss": 1.0005,
"step": 1065
},
{
"epoch": 3.9925373134328357,
"grad_norm": 6.025814533233643,
"learning_rate": 4.446517412935324e-05,
"loss": 0.7212,
"step": 1070
},
{
"epoch": 4.0,
"eval_accuracy": 0.685,
"eval_f1_macro": 0.6471554397159933,
"eval_f1_micro": 0.685,
"eval_f1_weighted": 0.6601559323029912,
"eval_loss": 1.3381353616714478,
"eval_precision_macro": 0.6763538748832867,
"eval_precision_micro": 0.685,
"eval_precision_weighted": 0.6935892857142857,
"eval_recall_macro": 0.6763305322128851,
"eval_recall_micro": 0.685,
"eval_recall_weighted": 0.685,
"eval_runtime": 18.3933,
"eval_samples_per_second": 10.873,
"eval_steps_per_second": 0.707,
"step": 1072
},
{
"epoch": 4.0111940298507465,
"grad_norm": 2.7835495471954346,
"learning_rate": 4.441334991708126e-05,
"loss": 0.6966,
"step": 1075
},
{
"epoch": 4.029850746268656,
"grad_norm": 10.108399391174316,
"learning_rate": 4.4361525704809285e-05,
"loss": 1.1173,
"step": 1080
},
{
"epoch": 4.048507462686567,
"grad_norm": 4.910768032073975,
"learning_rate": 4.4309701492537316e-05,
"loss": 0.6386,
"step": 1085
},
{
"epoch": 4.067164179104478,
"grad_norm": 15.222392082214355,
"learning_rate": 4.425787728026534e-05,
"loss": 0.8312,
"step": 1090
},
{
"epoch": 4.085820895522388,
"grad_norm": 5.983860969543457,
"learning_rate": 4.420605306799337e-05,
"loss": 0.5752,
"step": 1095
},
{
"epoch": 4.104477611940299,
"grad_norm": 14.089235305786133,
"learning_rate": 4.41542288557214e-05,
"loss": 0.8314,
"step": 1100
},
{
"epoch": 4.123134328358209,
"grad_norm": 6.404083251953125,
"learning_rate": 4.410240464344942e-05,
"loss": 0.9376,
"step": 1105
},
{
"epoch": 4.141791044776119,
"grad_norm": 7.631033897399902,
"learning_rate": 4.4050580431177446e-05,
"loss": 0.8144,
"step": 1110
},
{
"epoch": 4.16044776119403,
"grad_norm": 7.922863483428955,
"learning_rate": 4.3998756218905476e-05,
"loss": 0.5124,
"step": 1115
},
{
"epoch": 4.17910447761194,
"grad_norm": 5.675821304321289,
"learning_rate": 4.39469320066335e-05,
"loss": 0.778,
"step": 1120
},
{
"epoch": 4.197761194029851,
"grad_norm": 10.380525588989258,
"learning_rate": 4.389510779436153e-05,
"loss": 0.824,
"step": 1125
},
{
"epoch": 4.2164179104477615,
"grad_norm": 10.827855110168457,
"learning_rate": 4.384328358208955e-05,
"loss": 0.7693,
"step": 1130
},
{
"epoch": 4.235074626865671,
"grad_norm": 8.679563522338867,
"learning_rate": 4.3791459369817584e-05,
"loss": 0.7922,
"step": 1135
},
{
"epoch": 4.253731343283582,
"grad_norm": 6.679693222045898,
"learning_rate": 4.373963515754561e-05,
"loss": 0.629,
"step": 1140
},
{
"epoch": 4.272388059701493,
"grad_norm": 3.585435390472412,
"learning_rate": 4.368781094527363e-05,
"loss": 0.4874,
"step": 1145
},
{
"epoch": 4.291044776119403,
"grad_norm": 16.56496238708496,
"learning_rate": 4.363598673300166e-05,
"loss": 0.9753,
"step": 1150
},
{
"epoch": 4.309701492537314,
"grad_norm": 7.082263469696045,
"learning_rate": 4.358416252072969e-05,
"loss": 0.7295,
"step": 1155
},
{
"epoch": 4.3283582089552235,
"grad_norm": 14.251839637756348,
"learning_rate": 4.3532338308457714e-05,
"loss": 0.4749,
"step": 1160
},
{
"epoch": 4.347014925373134,
"grad_norm": 10.183996200561523,
"learning_rate": 4.3480514096185744e-05,
"loss": 0.9388,
"step": 1165
},
{
"epoch": 4.365671641791045,
"grad_norm": 11.86069393157959,
"learning_rate": 4.342868988391377e-05,
"loss": 0.8015,
"step": 1170
},
{
"epoch": 4.384328358208955,
"grad_norm": 8.476336479187012,
"learning_rate": 4.337686567164179e-05,
"loss": 0.8224,
"step": 1175
},
{
"epoch": 4.402985074626866,
"grad_norm": 14.603717803955078,
"learning_rate": 4.332504145936982e-05,
"loss": 0.5892,
"step": 1180
},
{
"epoch": 4.4216417910447765,
"grad_norm": 4.239728927612305,
"learning_rate": 4.3273217247097845e-05,
"loss": 0.5871,
"step": 1185
},
{
"epoch": 4.440298507462686,
"grad_norm": 7.121862411499023,
"learning_rate": 4.3221393034825875e-05,
"loss": 0.6393,
"step": 1190
},
{
"epoch": 4.458955223880597,
"grad_norm": 7.324426651000977,
"learning_rate": 4.3169568822553905e-05,
"loss": 0.6936,
"step": 1195
},
{
"epoch": 4.477611940298507,
"grad_norm": 10.668092727661133,
"learning_rate": 4.311774461028192e-05,
"loss": 0.7822,
"step": 1200
},
{
"epoch": 4.496268656716418,
"grad_norm": 8.423562049865723,
"learning_rate": 4.306592039800995e-05,
"loss": 0.6684,
"step": 1205
},
{
"epoch": 4.514925373134329,
"grad_norm": 14.151291847229004,
"learning_rate": 4.301409618573798e-05,
"loss": 0.82,
"step": 1210
},
{
"epoch": 4.5335820895522385,
"grad_norm": 8.986945152282715,
"learning_rate": 4.2962271973466006e-05,
"loss": 0.7146,
"step": 1215
},
{
"epoch": 4.552238805970149,
"grad_norm": 15.039894104003906,
"learning_rate": 4.2910447761194036e-05,
"loss": 0.9461,
"step": 1220
},
{
"epoch": 4.57089552238806,
"grad_norm": 14.114082336425781,
"learning_rate": 4.285862354892206e-05,
"loss": 0.7352,
"step": 1225
},
{
"epoch": 4.58955223880597,
"grad_norm": 9.63955307006836,
"learning_rate": 4.280679933665008e-05,
"loss": 0.6686,
"step": 1230
},
{
"epoch": 4.608208955223881,
"grad_norm": 9.240038871765137,
"learning_rate": 4.275497512437811e-05,
"loss": 0.966,
"step": 1235
},
{
"epoch": 4.6268656716417915,
"grad_norm": 30.38275718688965,
"learning_rate": 4.2703150912106136e-05,
"loss": 0.6015,
"step": 1240
},
{
"epoch": 4.645522388059701,
"grad_norm": 3.4407896995544434,
"learning_rate": 4.2651326699834166e-05,
"loss": 0.5758,
"step": 1245
},
{
"epoch": 4.664179104477612,
"grad_norm": 2.6312341690063477,
"learning_rate": 4.259950248756219e-05,
"loss": 0.7366,
"step": 1250
},
{
"epoch": 4.682835820895522,
"grad_norm": 5.770228862762451,
"learning_rate": 4.254767827529022e-05,
"loss": 0.5223,
"step": 1255
},
{
"epoch": 4.701492537313433,
"grad_norm": 9.451111793518066,
"learning_rate": 4.249585406301824e-05,
"loss": 0.6489,
"step": 1260
},
{
"epoch": 4.720149253731344,
"grad_norm": 9.007664680480957,
"learning_rate": 4.244402985074627e-05,
"loss": 0.6277,
"step": 1265
},
{
"epoch": 4.7388059701492535,
"grad_norm": 11.020060539245605,
"learning_rate": 4.23922056384743e-05,
"loss": 0.5666,
"step": 1270
},
{
"epoch": 4.757462686567164,
"grad_norm": 4.820965766906738,
"learning_rate": 4.234038142620233e-05,
"loss": 0.6379,
"step": 1275
},
{
"epoch": 4.776119402985074,
"grad_norm": 7.916781902313232,
"learning_rate": 4.228855721393035e-05,
"loss": 0.7684,
"step": 1280
},
{
"epoch": 4.794776119402985,
"grad_norm": 9.316835403442383,
"learning_rate": 4.223673300165838e-05,
"loss": 0.8076,
"step": 1285
},
{
"epoch": 4.813432835820896,
"grad_norm": 11.205623626708984,
"learning_rate": 4.2184908789386404e-05,
"loss": 0.7485,
"step": 1290
},
{
"epoch": 4.832089552238806,
"grad_norm": 9.189976692199707,
"learning_rate": 4.213308457711443e-05,
"loss": 0.8462,
"step": 1295
},
{
"epoch": 4.850746268656716,
"grad_norm": 10.27412223815918,
"learning_rate": 4.208126036484246e-05,
"loss": 0.6273,
"step": 1300
},
{
"epoch": 4.869402985074627,
"grad_norm": 9.638339042663574,
"learning_rate": 4.202943615257048e-05,
"loss": 0.7762,
"step": 1305
},
{
"epoch": 4.888059701492537,
"grad_norm": 5.456606388092041,
"learning_rate": 4.197761194029851e-05,
"loss": 0.6509,
"step": 1310
},
{
"epoch": 4.906716417910448,
"grad_norm": 8.314762115478516,
"learning_rate": 4.192578772802654e-05,
"loss": 0.644,
"step": 1315
},
{
"epoch": 4.925373134328359,
"grad_norm": 5.1142072677612305,
"learning_rate": 4.187396351575456e-05,
"loss": 0.7036,
"step": 1320
},
{
"epoch": 4.9440298507462686,
"grad_norm": 6.1798577308654785,
"learning_rate": 4.182213930348259e-05,
"loss": 0.4501,
"step": 1325
},
{
"epoch": 4.962686567164179,
"grad_norm": 12.538804054260254,
"learning_rate": 4.177031509121062e-05,
"loss": 0.9835,
"step": 1330
},
{
"epoch": 4.981343283582089,
"grad_norm": 11.114508628845215,
"learning_rate": 4.171849087893864e-05,
"loss": 0.4535,
"step": 1335
},
{
"epoch": 5.0,
"grad_norm": 24.881195068359375,
"learning_rate": 4.166666666666667e-05,
"loss": 0.5743,
"step": 1340
},
{
"epoch": 5.0,
"eval_accuracy": 0.69,
"eval_f1_macro": 0.6552111287405404,
"eval_f1_micro": 0.69,
"eval_f1_weighted": 0.6724256854256855,
"eval_loss": 1.1938023567199707,
"eval_precision_macro": 0.687453314659197,
"eval_precision_micro": 0.69,
"eval_precision_weighted": 0.7043214285714287,
"eval_recall_macro": 0.6736694677871148,
"eval_recall_micro": 0.69,
"eval_recall_weighted": 0.69,
"eval_runtime": 20.8959,
"eval_samples_per_second": 9.571,
"eval_steps_per_second": 0.622,
"step": 1340
}
],
"logging_steps": 5,
"max_steps": 5360,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 354552510105600.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}