lombardata's picture
Evaluation on the test set completed on 2024_09_03.
1e99df2 verified
raw
history blame
72.3 kB
{
"best_metric": 0.1308571696281433,
"best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/fine_scale/DinoVdeau-small-2024_08_31-batch-size32_epochs150_freeze/checkpoint-36582",
"epoch": 144.0,
"eval_steps": 500,
"global_step": 39312,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.19057519057519057,
"eval_f1_macro": 0.4058921954514261,
"eval_f1_micro": 0.7088941673264713,
"eval_loss": 0.19568666815757751,
"eval_roc_auc": 0.8060676064167129,
"eval_runtime": 426.0483,
"eval_samples_per_second": 6.774,
"eval_steps_per_second": 0.214,
"learning_rate": 0.001,
"step": 273
},
{
"epoch": 1.8315018315018317,
"grad_norm": 0.30737248063087463,
"learning_rate": 0.001,
"loss": 0.3189,
"step": 500
},
{
"epoch": 2.0,
"eval_accuracy": 0.21933471933471935,
"eval_f1_macro": 0.4867943512801917,
"eval_f1_micro": 0.738139514768845,
"eval_loss": 0.17198018729686737,
"eval_roc_auc": 0.8255075095586444,
"eval_runtime": 425.0166,
"eval_samples_per_second": 6.79,
"eval_steps_per_second": 0.214,
"learning_rate": 0.001,
"step": 546
},
{
"epoch": 3.0,
"eval_accuracy": 0.23215523215523215,
"eval_f1_macro": 0.5587016500092944,
"eval_f1_micro": 0.7578947368421052,
"eval_loss": 0.16209888458251953,
"eval_roc_auc": 0.8387630797560628,
"eval_runtime": 425.9119,
"eval_samples_per_second": 6.776,
"eval_steps_per_second": 0.214,
"learning_rate": 0.001,
"step": 819
},
{
"epoch": 3.663003663003663,
"grad_norm": 0.2619726359844208,
"learning_rate": 0.001,
"loss": 0.1897,
"step": 1000
},
{
"epoch": 4.0,
"eval_accuracy": 0.22487872487872487,
"eval_f1_macro": 0.5561953540051209,
"eval_f1_micro": 0.7463059684835497,
"eval_loss": 0.15948981046676636,
"eval_roc_auc": 0.8221271753092407,
"eval_runtime": 423.9484,
"eval_samples_per_second": 6.807,
"eval_steps_per_second": 0.215,
"learning_rate": 0.001,
"step": 1092
},
{
"epoch": 5.0,
"eval_accuracy": 0.23146223146223147,
"eval_f1_macro": 0.5723046956548954,
"eval_f1_micro": 0.7510718113612004,
"eval_loss": 0.15691693127155304,
"eval_roc_auc": 0.8244935635420478,
"eval_runtime": 423.6041,
"eval_samples_per_second": 6.813,
"eval_steps_per_second": 0.215,
"learning_rate": 0.001,
"step": 1365
},
{
"epoch": 5.4945054945054945,
"grad_norm": 0.17114631831645966,
"learning_rate": 0.001,
"loss": 0.1808,
"step": 1500
},
{
"epoch": 6.0,
"eval_accuracy": 0.2363132363132363,
"eval_f1_macro": 0.5786669115862841,
"eval_f1_micro": 0.7634727923836142,
"eval_loss": 0.15302371978759766,
"eval_roc_auc": 0.8365257318814997,
"eval_runtime": 427.5566,
"eval_samples_per_second": 6.75,
"eval_steps_per_second": 0.213,
"learning_rate": 0.001,
"step": 1638
},
{
"epoch": 7.0,
"eval_accuracy": 0.23354123354123354,
"eval_f1_macro": 0.5981729145672101,
"eval_f1_micro": 0.7651630269613162,
"eval_loss": 0.1523299366235733,
"eval_roc_auc": 0.838924594824006,
"eval_runtime": 430.1478,
"eval_samples_per_second": 6.709,
"eval_steps_per_second": 0.212,
"learning_rate": 0.001,
"step": 1911
},
{
"epoch": 7.326007326007326,
"grad_norm": 0.22214488685131073,
"learning_rate": 0.001,
"loss": 0.1763,
"step": 2000
},
{
"epoch": 8.0,
"eval_accuracy": 0.24185724185724186,
"eval_f1_macro": 0.587992292024695,
"eval_f1_micro": 0.7655172413793103,
"eval_loss": 0.15311872959136963,
"eval_roc_auc": 0.837740052624858,
"eval_runtime": 427.9308,
"eval_samples_per_second": 6.744,
"eval_steps_per_second": 0.213,
"learning_rate": 0.001,
"step": 2184
},
{
"epoch": 9.0,
"eval_accuracy": 0.24012474012474014,
"eval_f1_macro": 0.606908576330327,
"eval_f1_micro": 0.7699542669773061,
"eval_loss": 0.14992575347423553,
"eval_roc_auc": 0.8431046707780733,
"eval_runtime": 424.0382,
"eval_samples_per_second": 6.806,
"eval_steps_per_second": 0.215,
"learning_rate": 0.001,
"step": 2457
},
{
"epoch": 9.157509157509157,
"grad_norm": 0.1733015924692154,
"learning_rate": 0.001,
"loss": 0.1735,
"step": 2500
},
{
"epoch": 10.0,
"eval_accuracy": 0.24393624393624394,
"eval_f1_macro": 0.5829080312220596,
"eval_f1_micro": 0.7606115107913669,
"eval_loss": 0.1509619951248169,
"eval_roc_auc": 0.8277441062627229,
"eval_runtime": 424.8811,
"eval_samples_per_second": 6.792,
"eval_steps_per_second": 0.214,
"learning_rate": 0.001,
"step": 2730
},
{
"epoch": 10.989010989010989,
"grad_norm": 0.16356830298900604,
"learning_rate": 0.001,
"loss": 0.1723,
"step": 3000
},
{
"epoch": 11.0,
"eval_accuracy": 0.2505197505197505,
"eval_f1_macro": 0.5976223089766404,
"eval_f1_micro": 0.7689559002963221,
"eval_loss": 0.1520717293024063,
"eval_roc_auc": 0.8399853012032679,
"eval_runtime": 434.5331,
"eval_samples_per_second": 6.642,
"eval_steps_per_second": 0.209,
"learning_rate": 0.001,
"step": 3003
},
{
"epoch": 12.0,
"eval_accuracy": 0.2442827442827443,
"eval_f1_macro": 0.607405900640871,
"eval_f1_micro": 0.7759986516096409,
"eval_loss": 0.15027731657028198,
"eval_roc_auc": 0.8526551998703694,
"eval_runtime": 434.0545,
"eval_samples_per_second": 6.649,
"eval_steps_per_second": 0.21,
"learning_rate": 0.001,
"step": 3276
},
{
"epoch": 12.820512820512821,
"grad_norm": 0.1642971783876419,
"learning_rate": 0.001,
"loss": 0.1719,
"step": 3500
},
{
"epoch": 13.0,
"eval_accuracy": 0.24393624393624394,
"eval_f1_macro": 0.6003271512523337,
"eval_f1_micro": 0.7623558852444365,
"eval_loss": 0.1504218876361847,
"eval_roc_auc": 0.8301696089299148,
"eval_runtime": 426.4716,
"eval_samples_per_second": 6.767,
"eval_steps_per_second": 0.213,
"learning_rate": 0.001,
"step": 3549
},
{
"epoch": 14.0,
"eval_accuracy": 0.24462924462924462,
"eval_f1_macro": 0.602811285040826,
"eval_f1_micro": 0.7644358114073813,
"eval_loss": 0.1496724784374237,
"eval_roc_auc": 0.8342951177137805,
"eval_runtime": 428.909,
"eval_samples_per_second": 6.729,
"eval_steps_per_second": 0.212,
"learning_rate": 0.001,
"step": 3822
},
{
"epoch": 14.652014652014651,
"grad_norm": 0.1759812980890274,
"learning_rate": 0.001,
"loss": 0.1702,
"step": 4000
},
{
"epoch": 15.0,
"eval_accuracy": 0.2512127512127512,
"eval_f1_macro": 0.6066013767027806,
"eval_f1_micro": 0.7751615281210703,
"eval_loss": 0.14749661087989807,
"eval_roc_auc": 0.8445581856657356,
"eval_runtime": 424.6732,
"eval_samples_per_second": 6.796,
"eval_steps_per_second": 0.214,
"learning_rate": 0.001,
"step": 4095
},
{
"epoch": 16.0,
"eval_accuracy": 0.24636174636174638,
"eval_f1_macro": 0.5838354990739413,
"eval_f1_micro": 0.7645565108923241,
"eval_loss": 0.14998775720596313,
"eval_roc_auc": 0.8320747114163963,
"eval_runtime": 423.7704,
"eval_samples_per_second": 6.81,
"eval_steps_per_second": 0.215,
"learning_rate": 0.001,
"step": 4368
},
{
"epoch": 16.483516483516482,
"grad_norm": 0.14804692566394806,
"learning_rate": 0.001,
"loss": 0.1696,
"step": 4500
},
{
"epoch": 17.0,
"eval_accuracy": 0.24566874566874566,
"eval_f1_macro": 0.6073459016890155,
"eval_f1_micro": 0.7719883641341547,
"eval_loss": 0.15297245979309082,
"eval_roc_auc": 0.8464322218871764,
"eval_runtime": 424.9885,
"eval_samples_per_second": 6.791,
"eval_steps_per_second": 0.214,
"learning_rate": 0.001,
"step": 4641
},
{
"epoch": 18.0,
"eval_accuracy": 0.24393624393624394,
"eval_f1_macro": 0.614324753279198,
"eval_f1_micro": 0.7751951282271207,
"eval_loss": 0.14907290041446686,
"eval_roc_auc": 0.8475019020709771,
"eval_runtime": 420.1647,
"eval_samples_per_second": 6.869,
"eval_steps_per_second": 0.217,
"learning_rate": 0.001,
"step": 4914
},
{
"epoch": 18.315018315018314,
"grad_norm": 0.19223743677139282,
"learning_rate": 0.001,
"loss": 0.1717,
"step": 5000
},
{
"epoch": 19.0,
"eval_accuracy": 0.23458073458073458,
"eval_f1_macro": 0.6075499214740471,
"eval_f1_micro": 0.7739734788726388,
"eval_loss": 0.14951026439666748,
"eval_roc_auc": 0.848377592477135,
"eval_runtime": 427.9682,
"eval_samples_per_second": 6.743,
"eval_steps_per_second": 0.213,
"learning_rate": 0.001,
"step": 5187
},
{
"epoch": 20.0,
"eval_accuracy": 0.24532224532224534,
"eval_f1_macro": 0.595638442008225,
"eval_f1_micro": 0.7636993911381718,
"eval_loss": 0.14873762428760529,
"eval_roc_auc": 0.8322311292560515,
"eval_runtime": 421.5059,
"eval_samples_per_second": 6.847,
"eval_steps_per_second": 0.216,
"learning_rate": 0.001,
"step": 5460
},
{
"epoch": 20.146520146520146,
"grad_norm": 0.15787707269191742,
"learning_rate": 0.001,
"loss": 0.1705,
"step": 5500
},
{
"epoch": 21.0,
"eval_accuracy": 0.24740124740124741,
"eval_f1_macro": 0.6164990545073296,
"eval_f1_micro": 0.780452718426063,
"eval_loss": 0.14705629646778107,
"eval_roc_auc": 0.8539786012990958,
"eval_runtime": 429.6596,
"eval_samples_per_second": 6.717,
"eval_steps_per_second": 0.212,
"learning_rate": 0.001,
"step": 5733
},
{
"epoch": 21.978021978021978,
"grad_norm": 0.15392103791236877,
"learning_rate": 0.001,
"loss": 0.1706,
"step": 6000
},
{
"epoch": 22.0,
"eval_accuracy": 0.24532224532224534,
"eval_f1_macro": 0.6073576225776433,
"eval_f1_micro": 0.7753641707130079,
"eval_loss": 0.1508719027042389,
"eval_roc_auc": 0.8494150259851333,
"eval_runtime": 429.7216,
"eval_samples_per_second": 6.716,
"eval_steps_per_second": 0.212,
"learning_rate": 0.001,
"step": 6006
},
{
"epoch": 23.0,
"eval_accuracy": 0.2428967428967429,
"eval_f1_macro": 0.6127152502703448,
"eval_f1_micro": 0.771920553133395,
"eval_loss": 0.15015815198421478,
"eval_roc_auc": 0.8388299205154317,
"eval_runtime": 426.6602,
"eval_samples_per_second": 6.764,
"eval_steps_per_second": 0.213,
"learning_rate": 0.001,
"step": 6279
},
{
"epoch": 23.80952380952381,
"grad_norm": 0.1737624853849411,
"learning_rate": 0.001,
"loss": 0.1699,
"step": 6500
},
{
"epoch": 24.0,
"eval_accuracy": 0.24012474012474014,
"eval_f1_macro": 0.5849380548549015,
"eval_f1_micro": 0.7698941591532732,
"eval_loss": 0.14965225756168365,
"eval_roc_auc": 0.8406060899537385,
"eval_runtime": 430.4521,
"eval_samples_per_second": 6.705,
"eval_steps_per_second": 0.211,
"learning_rate": 0.001,
"step": 6552
},
{
"epoch": 25.0,
"eval_accuracy": 0.24255024255024255,
"eval_f1_macro": 0.6035289549510865,
"eval_f1_micro": 0.7761348897535668,
"eval_loss": 0.14702074229717255,
"eval_roc_auc": 0.8458632504863829,
"eval_runtime": 428.0693,
"eval_samples_per_second": 6.742,
"eval_steps_per_second": 0.213,
"learning_rate": 0.001,
"step": 6825
},
{
"epoch": 25.641025641025642,
"grad_norm": 0.1737377792596817,
"learning_rate": 0.001,
"loss": 0.1694,
"step": 7000
},
{
"epoch": 26.0,
"eval_accuracy": 0.24220374220374222,
"eval_f1_macro": 0.6064603919289959,
"eval_f1_micro": 0.7751430907604253,
"eval_loss": 0.14808295667171478,
"eval_roc_auc": 0.8465518457868458,
"eval_runtime": 438.4341,
"eval_samples_per_second": 6.583,
"eval_steps_per_second": 0.208,
"learning_rate": 0.001,
"step": 7098
},
{
"epoch": 27.0,
"eval_accuracy": 0.24740124740124741,
"eval_f1_macro": 0.6135774018658996,
"eval_f1_micro": 0.7689308343302761,
"eval_loss": 0.14581289887428284,
"eval_roc_auc": 0.8357120666953542,
"eval_runtime": 426.6923,
"eval_samples_per_second": 6.764,
"eval_steps_per_second": 0.213,
"learning_rate": 0.001,
"step": 7371
},
{
"epoch": 27.47252747252747,
"grad_norm": 0.16500511765480042,
"learning_rate": 0.001,
"loss": 0.17,
"step": 7500
},
{
"epoch": 28.0,
"eval_accuracy": 0.24462924462924462,
"eval_f1_macro": 0.6077297645661711,
"eval_f1_micro": 0.7751325049960902,
"eval_loss": 0.1453842669725418,
"eval_roc_auc": 0.8440532649625113,
"eval_runtime": 431.4145,
"eval_samples_per_second": 6.69,
"eval_steps_per_second": 0.211,
"learning_rate": 0.001,
"step": 7644
},
{
"epoch": 29.0,
"eval_accuracy": 0.24566874566874566,
"eval_f1_macro": 0.6107922701154117,
"eval_f1_micro": 0.7735191637630662,
"eval_loss": 0.14941243827342987,
"eval_roc_auc": 0.849050708300112,
"eval_runtime": 434.9588,
"eval_samples_per_second": 6.635,
"eval_steps_per_second": 0.209,
"learning_rate": 0.001,
"step": 7917
},
{
"epoch": 29.304029304029303,
"grad_norm": 0.1599486619234085,
"learning_rate": 0.001,
"loss": 0.1685,
"step": 8000
},
{
"epoch": 30.0,
"eval_accuracy": 0.24982674982674982,
"eval_f1_macro": 0.5982833860845571,
"eval_f1_micro": 0.7705324709843182,
"eval_loss": 0.14549985527992249,
"eval_roc_auc": 0.8366026732011344,
"eval_runtime": 434.3329,
"eval_samples_per_second": 6.645,
"eval_steps_per_second": 0.21,
"learning_rate": 0.001,
"step": 8190
},
{
"epoch": 31.0,
"eval_accuracy": 0.2532917532917533,
"eval_f1_macro": 0.6068619458731248,
"eval_f1_micro": 0.7784728768532008,
"eval_loss": 0.14541107416152954,
"eval_roc_auc": 0.8494949988142239,
"eval_runtime": 435.6219,
"eval_samples_per_second": 6.625,
"eval_steps_per_second": 0.209,
"learning_rate": 0.001,
"step": 8463
},
{
"epoch": 31.135531135531135,
"grad_norm": 0.1950293928384781,
"learning_rate": 0.001,
"loss": 0.1687,
"step": 8500
},
{
"epoch": 32.0,
"eval_accuracy": 0.24532224532224534,
"eval_f1_macro": 0.6145316287096297,
"eval_f1_micro": 0.7746102833519939,
"eval_loss": 0.14657220244407654,
"eval_roc_auc": 0.8460955499587395,
"eval_runtime": 434.8949,
"eval_samples_per_second": 6.636,
"eval_steps_per_second": 0.209,
"learning_rate": 0.001,
"step": 8736
},
{
"epoch": 32.967032967032964,
"grad_norm": 0.18405263125896454,
"learning_rate": 0.001,
"loss": 0.1679,
"step": 9000
},
{
"epoch": 33.0,
"eval_accuracy": 0.253984753984754,
"eval_f1_macro": 0.6124691593400795,
"eval_f1_micro": 0.777031154551008,
"eval_loss": 0.14459234476089478,
"eval_roc_auc": 0.843919167617255,
"eval_runtime": 440.1591,
"eval_samples_per_second": 6.557,
"eval_steps_per_second": 0.207,
"learning_rate": 0.001,
"step": 9009
},
{
"epoch": 34.0,
"eval_accuracy": 0.24462924462924462,
"eval_f1_macro": 0.6168054796129936,
"eval_f1_micro": 0.7781283769180896,
"eval_loss": 0.1468168944120407,
"eval_roc_auc": 0.8469846407097918,
"eval_runtime": 438.6105,
"eval_samples_per_second": 6.58,
"eval_steps_per_second": 0.207,
"learning_rate": 0.001,
"step": 9282
},
{
"epoch": 34.798534798534796,
"grad_norm": 0.17146140336990356,
"learning_rate": 0.001,
"loss": 0.168,
"step": 9500
},
{
"epoch": 35.0,
"eval_accuracy": 0.2494802494802495,
"eval_f1_macro": 0.6193343400891848,
"eval_f1_micro": 0.7766880749869814,
"eval_loss": 0.14858707785606384,
"eval_roc_auc": 0.8451765062846143,
"eval_runtime": 434.5802,
"eval_samples_per_second": 6.641,
"eval_steps_per_second": 0.209,
"learning_rate": 0.001,
"step": 9555
},
{
"epoch": 36.0,
"eval_accuracy": 0.24878724878724878,
"eval_f1_macro": 0.6092667253949349,
"eval_f1_micro": 0.7718835224773468,
"eval_loss": 0.14637114107608795,
"eval_roc_auc": 0.8391158347811251,
"eval_runtime": 439.3197,
"eval_samples_per_second": 6.569,
"eval_steps_per_second": 0.207,
"learning_rate": 0.001,
"step": 9828
},
{
"epoch": 36.63003663003663,
"grad_norm": 0.16876503825187683,
"learning_rate": 0.001,
"loss": 0.169,
"step": 10000
},
{
"epoch": 37.0,
"eval_accuracy": 0.24982674982674982,
"eval_f1_macro": 0.6127183895875491,
"eval_f1_micro": 0.7733602776435442,
"eval_loss": 0.1448281705379486,
"eval_roc_auc": 0.8402195590843876,
"eval_runtime": 437.3035,
"eval_samples_per_second": 6.6,
"eval_steps_per_second": 0.208,
"learning_rate": 0.001,
"step": 10101
},
{
"epoch": 38.0,
"eval_accuracy": 0.25225225225225223,
"eval_f1_macro": 0.6109962510638844,
"eval_f1_micro": 0.7814896880859042,
"eval_loss": 0.1450735628604889,
"eval_roc_auc": 0.8526187412743501,
"eval_runtime": 437.7229,
"eval_samples_per_second": 6.593,
"eval_steps_per_second": 0.208,
"learning_rate": 0.001,
"step": 10374
},
{
"epoch": 38.46153846153846,
"grad_norm": 0.19475676119327545,
"learning_rate": 0.001,
"loss": 0.167,
"step": 10500
},
{
"epoch": 39.0,
"eval_accuracy": 0.24982674982674982,
"eval_f1_macro": 0.6272196317832909,
"eval_f1_micro": 0.7824146207942057,
"eval_loss": 0.14469724893569946,
"eval_roc_auc": 0.8563424677452759,
"eval_runtime": 435.4486,
"eval_samples_per_second": 6.628,
"eval_steps_per_second": 0.209,
"learning_rate": 0.001,
"step": 10647
},
{
"epoch": 40.0,
"eval_accuracy": 0.25363825363825365,
"eval_f1_macro": 0.6265963634718456,
"eval_f1_micro": 0.7836651178652115,
"eval_loss": 0.14824891090393066,
"eval_roc_auc": 0.853692740688437,
"eval_runtime": 435.8824,
"eval_samples_per_second": 6.621,
"eval_steps_per_second": 0.209,
"learning_rate": 0.0001,
"step": 10920
},
{
"epoch": 40.29304029304029,
"grad_norm": 0.15533967316150665,
"learning_rate": 0.0001,
"loss": 0.1652,
"step": 11000
},
{
"epoch": 41.0,
"eval_accuracy": 0.2616077616077616,
"eval_f1_macro": 0.6323784470247855,
"eval_f1_micro": 0.7833456473553827,
"eval_loss": 0.14141727983951569,
"eval_roc_auc": 0.8483120796798727,
"eval_runtime": 435.7344,
"eval_samples_per_second": 6.623,
"eval_steps_per_second": 0.209,
"learning_rate": 0.0001,
"step": 11193
},
{
"epoch": 42.0,
"eval_accuracy": 0.26195426195426197,
"eval_f1_macro": 0.6371841233046203,
"eval_f1_micro": 0.7884351407000686,
"eval_loss": 0.13979895412921906,
"eval_roc_auc": 0.8545567611245666,
"eval_runtime": 438.4508,
"eval_samples_per_second": 6.582,
"eval_steps_per_second": 0.208,
"learning_rate": 0.0001,
"step": 11466
},
{
"epoch": 42.124542124542124,
"grad_norm": 0.1733330935239792,
"learning_rate": 0.0001,
"loss": 0.1608,
"step": 11500
},
{
"epoch": 43.0,
"eval_accuracy": 0.26403326403326405,
"eval_f1_macro": 0.6366820358518588,
"eval_f1_micro": 0.7871061893724783,
"eval_loss": 0.14107641577720642,
"eval_roc_auc": 0.853678548931782,
"eval_runtime": 434.1211,
"eval_samples_per_second": 6.648,
"eval_steps_per_second": 0.21,
"learning_rate": 0.0001,
"step": 11739
},
{
"epoch": 43.956043956043956,
"grad_norm": 0.19694675505161285,
"learning_rate": 0.0001,
"loss": 0.1596,
"step": 12000
},
{
"epoch": 44.0,
"eval_accuracy": 0.26126126126126126,
"eval_f1_macro": 0.6256922069455233,
"eval_f1_micro": 0.787878787878788,
"eval_loss": 0.13898694515228271,
"eval_roc_auc": 0.8537086091649239,
"eval_runtime": 434.0073,
"eval_samples_per_second": 6.65,
"eval_steps_per_second": 0.21,
"learning_rate": 0.0001,
"step": 12012
},
{
"epoch": 45.0,
"eval_accuracy": 0.2664587664587665,
"eval_f1_macro": 0.6421056073559387,
"eval_f1_micro": 0.7894011202068074,
"eval_loss": 0.13859130442142487,
"eval_roc_auc": 0.8538817942028954,
"eval_runtime": 432.4865,
"eval_samples_per_second": 6.673,
"eval_steps_per_second": 0.21,
"learning_rate": 0.0001,
"step": 12285
},
{
"epoch": 45.78754578754579,
"grad_norm": 0.18810147047042847,
"learning_rate": 0.0001,
"loss": 0.1582,
"step": 12500
},
{
"epoch": 46.0,
"eval_accuracy": 0.2664587664587665,
"eval_f1_macro": 0.6283048537279357,
"eval_f1_micro": 0.7873893327575039,
"eval_loss": 0.139601469039917,
"eval_roc_auc": 0.8521625527563127,
"eval_runtime": 421.9429,
"eval_samples_per_second": 6.84,
"eval_steps_per_second": 0.216,
"learning_rate": 0.0001,
"step": 12558
},
{
"epoch": 47.0,
"eval_accuracy": 0.2636867636867637,
"eval_f1_macro": 0.6286555138094179,
"eval_f1_micro": 0.7863567238757333,
"eval_loss": 0.13869330286979675,
"eval_roc_auc": 0.8499808451526433,
"eval_runtime": 424.0306,
"eval_samples_per_second": 6.806,
"eval_steps_per_second": 0.215,
"learning_rate": 0.0001,
"step": 12831
},
{
"epoch": 47.61904761904762,
"grad_norm": 0.15351006388664246,
"learning_rate": 0.0001,
"loss": 0.1584,
"step": 13000
},
{
"epoch": 48.0,
"eval_accuracy": 0.26784476784476785,
"eval_f1_macro": 0.6334934953582803,
"eval_f1_micro": 0.7913177234660741,
"eval_loss": 0.13777127861976624,
"eval_roc_auc": 0.8571892112602602,
"eval_runtime": 419.9652,
"eval_samples_per_second": 6.872,
"eval_steps_per_second": 0.217,
"learning_rate": 0.0001,
"step": 13104
},
{
"epoch": 49.0,
"eval_accuracy": 0.26403326403326405,
"eval_f1_macro": 0.6381777921693204,
"eval_f1_micro": 0.7933989479042932,
"eval_loss": 0.1377096027135849,
"eval_roc_auc": 0.8602965218660363,
"eval_runtime": 431.2306,
"eval_samples_per_second": 6.692,
"eval_steps_per_second": 0.211,
"learning_rate": 0.0001,
"step": 13377
},
{
"epoch": 49.45054945054945,
"grad_norm": 0.1798904836177826,
"learning_rate": 0.0001,
"loss": 0.157,
"step": 13500
},
{
"epoch": 50.0,
"eval_accuracy": 0.2674982674982675,
"eval_f1_macro": 0.6362718007605523,
"eval_f1_micro": 0.7918342891380639,
"eval_loss": 0.13755330443382263,
"eval_roc_auc": 0.8570210161405075,
"eval_runtime": 429.5809,
"eval_samples_per_second": 6.718,
"eval_steps_per_second": 0.212,
"learning_rate": 0.0001,
"step": 13650
},
{
"epoch": 51.0,
"eval_accuracy": 0.2661122661122661,
"eval_f1_macro": 0.6426825970872383,
"eval_f1_micro": 0.7928808087673094,
"eval_loss": 0.13754987716674805,
"eval_roc_auc": 0.8596608706709776,
"eval_runtime": 429.3766,
"eval_samples_per_second": 6.721,
"eval_steps_per_second": 0.212,
"learning_rate": 0.0001,
"step": 13923
},
{
"epoch": 51.282051282051285,
"grad_norm": 0.20376506447792053,
"learning_rate": 0.0001,
"loss": 0.1567,
"step": 14000
},
{
"epoch": 52.0,
"eval_accuracy": 0.26576576576576577,
"eval_f1_macro": 0.6367912909960436,
"eval_f1_micro": 0.7871186146434616,
"eval_loss": 0.13771678507328033,
"eval_roc_auc": 0.8506886757830149,
"eval_runtime": 424.3804,
"eval_samples_per_second": 6.801,
"eval_steps_per_second": 0.214,
"learning_rate": 0.0001,
"step": 14196
},
{
"epoch": 53.0,
"eval_accuracy": 0.2692307692307692,
"eval_f1_macro": 0.640555047060403,
"eval_f1_micro": 0.7928592630284527,
"eval_loss": 0.13740690052509308,
"eval_roc_auc": 0.8601326459765699,
"eval_runtime": 434.4832,
"eval_samples_per_second": 6.642,
"eval_steps_per_second": 0.209,
"learning_rate": 0.0001,
"step": 14469
},
{
"epoch": 53.11355311355312,
"grad_norm": 0.16348811984062195,
"learning_rate": 0.0001,
"loss": 0.1571,
"step": 14500
},
{
"epoch": 54.0,
"eval_accuracy": 0.27165627165627165,
"eval_f1_macro": 0.6412320555565514,
"eval_f1_micro": 0.7920979171140219,
"eval_loss": 0.1368684023618698,
"eval_roc_auc": 0.8562094300869534,
"eval_runtime": 425.2932,
"eval_samples_per_second": 6.786,
"eval_steps_per_second": 0.214,
"learning_rate": 0.0001,
"step": 14742
},
{
"epoch": 54.94505494505494,
"grad_norm": 0.20431332290172577,
"learning_rate": 0.0001,
"loss": 0.1548,
"step": 15000
},
{
"epoch": 55.0,
"eval_accuracy": 0.2702702702702703,
"eval_f1_macro": 0.6377616721633446,
"eval_f1_micro": 0.7914089347079037,
"eval_loss": 0.13703426718711853,
"eval_roc_auc": 0.8557803910164303,
"eval_runtime": 424.9893,
"eval_samples_per_second": 6.791,
"eval_steps_per_second": 0.214,
"learning_rate": 0.0001,
"step": 15015
},
{
"epoch": 56.0,
"eval_accuracy": 0.2643797643797644,
"eval_f1_macro": 0.6425003998141597,
"eval_f1_micro": 0.7931107623128156,
"eval_loss": 0.1364637017250061,
"eval_roc_auc": 0.8601515459625123,
"eval_runtime": 423.7139,
"eval_samples_per_second": 6.811,
"eval_steps_per_second": 0.215,
"learning_rate": 0.0001,
"step": 15288
},
{
"epoch": 56.776556776556774,
"grad_norm": 0.19714656472206116,
"learning_rate": 0.0001,
"loss": 0.155,
"step": 15500
},
{
"epoch": 57.0,
"eval_accuracy": 0.2674982674982675,
"eval_f1_macro": 0.6381793578718891,
"eval_f1_micro": 0.7926408585665006,
"eval_loss": 0.13675515353679657,
"eval_roc_auc": 0.8588114846455387,
"eval_runtime": 426.4919,
"eval_samples_per_second": 6.767,
"eval_steps_per_second": 0.213,
"learning_rate": 0.0001,
"step": 15561
},
{
"epoch": 58.0,
"eval_accuracy": 0.2674982674982675,
"eval_f1_macro": 0.637380953089336,
"eval_f1_micro": 0.791562634524322,
"eval_loss": 0.1364695280790329,
"eval_roc_auc": 0.855274853280308,
"eval_runtime": 425.8426,
"eval_samples_per_second": 6.777,
"eval_steps_per_second": 0.214,
"learning_rate": 0.0001,
"step": 15834
},
{
"epoch": 58.608058608058606,
"grad_norm": 0.19042669236660004,
"learning_rate": 0.0001,
"loss": 0.155,
"step": 16000
},
{
"epoch": 59.0,
"eval_accuracy": 0.2674982674982675,
"eval_f1_macro": 0.6428884521567982,
"eval_f1_micro": 0.7922245108135942,
"eval_loss": 0.13641765713691711,
"eval_roc_auc": 0.8565012329926954,
"eval_runtime": 423.8693,
"eval_samples_per_second": 6.809,
"eval_steps_per_second": 0.215,
"learning_rate": 0.0001,
"step": 16107
},
{
"epoch": 60.0,
"eval_accuracy": 0.26507276507276506,
"eval_f1_macro": 0.6357999016219877,
"eval_f1_micro": 0.7882888744307093,
"eval_loss": 0.13687649369239807,
"eval_roc_auc": 0.8514745744887481,
"eval_runtime": 423.4928,
"eval_samples_per_second": 6.815,
"eval_steps_per_second": 0.215,
"learning_rate": 0.0001,
"step": 16380
},
{
"epoch": 60.43956043956044,
"grad_norm": 0.18568764626979828,
"learning_rate": 0.0001,
"loss": 0.1546,
"step": 16500
},
{
"epoch": 61.0,
"eval_accuracy": 0.2713097713097713,
"eval_f1_macro": 0.6503848519713329,
"eval_f1_micro": 0.7945638702508654,
"eval_loss": 0.13638463616371155,
"eval_roc_auc": 0.8588833823919201,
"eval_runtime": 425.9119,
"eval_samples_per_second": 6.776,
"eval_steps_per_second": 0.214,
"learning_rate": 0.0001,
"step": 16653
},
{
"epoch": 62.0,
"eval_accuracy": 0.2751212751212751,
"eval_f1_macro": 0.6441767594174573,
"eval_f1_micro": 0.7931640039405492,
"eval_loss": 0.13563227653503418,
"eval_roc_auc": 0.8575138778747027,
"eval_runtime": 422.0661,
"eval_samples_per_second": 6.838,
"eval_steps_per_second": 0.216,
"learning_rate": 0.0001,
"step": 16926
},
{
"epoch": 62.27106227106227,
"grad_norm": 0.19402863085269928,
"learning_rate": 0.0001,
"loss": 0.1536,
"step": 17000
},
{
"epoch": 63.0,
"eval_accuracy": 0.27373527373527373,
"eval_f1_macro": 0.6515952055035917,
"eval_f1_micro": 0.7966116124638174,
"eval_loss": 0.1355270892381668,
"eval_roc_auc": 0.8610939161629354,
"eval_runtime": 426.9279,
"eval_samples_per_second": 6.76,
"eval_steps_per_second": 0.213,
"learning_rate": 0.0001,
"step": 17199
},
{
"epoch": 64.0,
"eval_accuracy": 0.26784476784476785,
"eval_f1_macro": 0.6450040026439422,
"eval_f1_micro": 0.7934075342465754,
"eval_loss": 0.13592010736465454,
"eval_roc_auc": 0.8577985580745997,
"eval_runtime": 426.0816,
"eval_samples_per_second": 6.773,
"eval_steps_per_second": 0.214,
"learning_rate": 0.0001,
"step": 17472
},
{
"epoch": 64.1025641025641,
"grad_norm": 0.22000150382518768,
"learning_rate": 0.0001,
"loss": 0.1544,
"step": 17500
},
{
"epoch": 65.0,
"eval_accuracy": 0.27061677061677064,
"eval_f1_macro": 0.64551501310817,
"eval_f1_micro": 0.7936467053015668,
"eval_loss": 0.13569533824920654,
"eval_roc_auc": 0.857159821715051,
"eval_runtime": 424.6551,
"eval_samples_per_second": 6.796,
"eval_steps_per_second": 0.214,
"learning_rate": 0.0001,
"step": 17745
},
{
"epoch": 65.93406593406593,
"grad_norm": 0.19799016416072845,
"learning_rate": 0.0001,
"loss": 0.1529,
"step": 18000
},
{
"epoch": 66.0,
"eval_accuracy": 0.2713097713097713,
"eval_f1_macro": 0.6477176853690674,
"eval_f1_micro": 0.794643237940888,
"eval_loss": 0.13565082848072052,
"eval_roc_auc": 0.8594942449609874,
"eval_runtime": 425.0795,
"eval_samples_per_second": 6.789,
"eval_steps_per_second": 0.214,
"learning_rate": 0.0001,
"step": 18018
},
{
"epoch": 67.0,
"eval_accuracy": 0.27546777546777546,
"eval_f1_macro": 0.6544361257862924,
"eval_f1_micro": 0.7965922095536813,
"eval_loss": 0.13533934950828552,
"eval_roc_auc": 0.8622831129363361,
"eval_runtime": 424.6762,
"eval_samples_per_second": 6.796,
"eval_steps_per_second": 0.214,
"learning_rate": 0.0001,
"step": 18291
},
{
"epoch": 67.76556776556777,
"grad_norm": 0.2619948983192444,
"learning_rate": 0.0001,
"loss": 0.1528,
"step": 18500
},
{
"epoch": 68.0,
"eval_accuracy": 0.2733887733887734,
"eval_f1_macro": 0.6519486064773884,
"eval_f1_micro": 0.7955772910907932,
"eval_loss": 0.1353396475315094,
"eval_roc_auc": 0.8608058154545816,
"eval_runtime": 421.8067,
"eval_samples_per_second": 6.842,
"eval_steps_per_second": 0.216,
"learning_rate": 0.0001,
"step": 18564
},
{
"epoch": 69.0,
"eval_accuracy": 0.26992376992376993,
"eval_f1_macro": 0.6515714856354324,
"eval_f1_micro": 0.7966188524590164,
"eval_loss": 0.13474246859550476,
"eval_roc_auc": 0.8602900698481241,
"eval_runtime": 423.2901,
"eval_samples_per_second": 6.818,
"eval_steps_per_second": 0.215,
"learning_rate": 0.0001,
"step": 18837
},
{
"epoch": 69.59706959706959,
"grad_norm": 0.18048201501369476,
"learning_rate": 0.0001,
"loss": 0.1528,
"step": 19000
},
{
"epoch": 70.0,
"eval_accuracy": 0.272002772002772,
"eval_f1_macro": 0.6441608871918139,
"eval_f1_micro": 0.7944687795241776,
"eval_loss": 0.13504748046398163,
"eval_roc_auc": 0.8574953132327267,
"eval_runtime": 423.3844,
"eval_samples_per_second": 6.817,
"eval_steps_per_second": 0.215,
"learning_rate": 0.0001,
"step": 19110
},
{
"epoch": 71.0,
"eval_accuracy": 0.27234927234927236,
"eval_f1_macro": 0.6441889860402124,
"eval_f1_micro": 0.7933057280883367,
"eval_loss": 0.13502468168735504,
"eval_roc_auc": 0.8556664277229126,
"eval_runtime": 422.6912,
"eval_samples_per_second": 6.828,
"eval_steps_per_second": 0.215,
"learning_rate": 0.0001,
"step": 19383
},
{
"epoch": 71.42857142857143,
"grad_norm": 0.24162879586219788,
"learning_rate": 0.0001,
"loss": 0.1522,
"step": 19500
},
{
"epoch": 72.0,
"eval_accuracy": 0.2758142758142758,
"eval_f1_macro": 0.6484748365424647,
"eval_f1_micro": 0.7969950486597234,
"eval_loss": 0.1344645917415619,
"eval_roc_auc": 0.8605409876174911,
"eval_runtime": 426.5755,
"eval_samples_per_second": 6.766,
"eval_steps_per_second": 0.213,
"learning_rate": 0.0001,
"step": 19656
},
{
"epoch": 73.0,
"eval_accuracy": 0.27616077616077617,
"eval_f1_macro": 0.6518769914193778,
"eval_f1_micro": 0.7977006599957419,
"eval_loss": 0.1341526359319687,
"eval_roc_auc": 0.8616010233088203,
"eval_runtime": 420.7226,
"eval_samples_per_second": 6.86,
"eval_steps_per_second": 0.216,
"learning_rate": 0.0001,
"step": 19929
},
{
"epoch": 73.26007326007326,
"grad_norm": 0.22451983392238617,
"learning_rate": 0.0001,
"loss": 0.1523,
"step": 20000
},
{
"epoch": 74.0,
"eval_accuracy": 0.2751212751212751,
"eval_f1_macro": 0.641334935505441,
"eval_f1_micro": 0.7914797229603171,
"eval_loss": 0.13499116897583008,
"eval_roc_auc": 0.8520198169504839,
"eval_runtime": 428.7922,
"eval_samples_per_second": 6.731,
"eval_steps_per_second": 0.212,
"learning_rate": 0.0001,
"step": 20202
},
{
"epoch": 75.0,
"eval_accuracy": 0.2751212751212751,
"eval_f1_macro": 0.6485229770180625,
"eval_f1_micro": 0.7946678133734681,
"eval_loss": 0.13461369276046753,
"eval_roc_auc": 0.8572354216588205,
"eval_runtime": 427.8784,
"eval_samples_per_second": 6.745,
"eval_steps_per_second": 0.213,
"learning_rate": 0.0001,
"step": 20475
},
{
"epoch": 75.0915750915751,
"grad_norm": 0.22029711306095123,
"learning_rate": 0.0001,
"loss": 0.1521,
"step": 20500
},
{
"epoch": 76.0,
"eval_accuracy": 0.2758142758142758,
"eval_f1_macro": 0.6478195810395848,
"eval_f1_micro": 0.7964594201659113,
"eval_loss": 0.13438266515731812,
"eval_roc_auc": 0.8597526207801657,
"eval_runtime": 424.3142,
"eval_samples_per_second": 6.802,
"eval_steps_per_second": 0.214,
"learning_rate": 0.0001,
"step": 20748
},
{
"epoch": 76.92307692307692,
"grad_norm": 0.2415299415588379,
"learning_rate": 0.0001,
"loss": 0.1515,
"step": 21000
},
{
"epoch": 77.0,
"eval_accuracy": 0.27754677754677753,
"eval_f1_macro": 0.6536737916153181,
"eval_f1_micro": 0.7977742853502102,
"eval_loss": 0.13460540771484375,
"eval_roc_auc": 0.8623314561225224,
"eval_runtime": 422.8083,
"eval_samples_per_second": 6.826,
"eval_steps_per_second": 0.215,
"learning_rate": 0.0001,
"step": 21021
},
{
"epoch": 78.0,
"eval_accuracy": 0.27754677754677753,
"eval_f1_macro": 0.6543115985953537,
"eval_f1_micro": 0.7978169818504888,
"eval_loss": 0.13411369919776917,
"eval_roc_auc": 0.8634738791194995,
"eval_runtime": 428.5067,
"eval_samples_per_second": 6.735,
"eval_steps_per_second": 0.212,
"learning_rate": 0.0001,
"step": 21294
},
{
"epoch": 78.75457875457876,
"grad_norm": 0.2636328637599945,
"learning_rate": 0.0001,
"loss": 0.1514,
"step": 21500
},
{
"epoch": 79.0,
"eval_accuracy": 0.2740817740817741,
"eval_f1_macro": 0.6523004018612216,
"eval_f1_micro": 0.7953020134228188,
"eval_loss": 0.13399606943130493,
"eval_roc_auc": 0.8574454542918126,
"eval_runtime": 436.7976,
"eval_samples_per_second": 6.607,
"eval_steps_per_second": 0.208,
"learning_rate": 0.0001,
"step": 21567
},
{
"epoch": 80.0,
"eval_accuracy": 0.27823977823977825,
"eval_f1_macro": 0.6545582038870168,
"eval_f1_micro": 0.7993085420355848,
"eval_loss": 0.1344238668680191,
"eval_roc_auc": 0.8652547567870936,
"eval_runtime": 431.9941,
"eval_samples_per_second": 6.681,
"eval_steps_per_second": 0.211,
"learning_rate": 0.0001,
"step": 21840
},
{
"epoch": 80.58608058608058,
"grad_norm": 0.23601791262626648,
"learning_rate": 0.0001,
"loss": 0.1516,
"step": 22000
},
{
"epoch": 81.0,
"eval_accuracy": 0.2758142758142758,
"eval_f1_macro": 0.6559691700651434,
"eval_f1_micro": 0.7966715529878418,
"eval_loss": 0.13405664265155792,
"eval_roc_auc": 0.8575861109650502,
"eval_runtime": 436.6356,
"eval_samples_per_second": 6.61,
"eval_steps_per_second": 0.208,
"learning_rate": 0.0001,
"step": 22113
},
{
"epoch": 82.0,
"eval_accuracy": 0.2765072765072765,
"eval_f1_macro": 0.6453669674995801,
"eval_f1_micro": 0.7947541551246537,
"eval_loss": 0.13407430052757263,
"eval_roc_auc": 0.8554945304057716,
"eval_runtime": 436.5794,
"eval_samples_per_second": 6.61,
"eval_steps_per_second": 0.208,
"learning_rate": 0.0001,
"step": 22386
},
{
"epoch": 82.41758241758242,
"grad_norm": 0.19588124752044678,
"learning_rate": 0.0001,
"loss": 0.149,
"step": 22500
},
{
"epoch": 83.0,
"eval_accuracy": 0.2702702702702703,
"eval_f1_macro": 0.645966570658811,
"eval_f1_micro": 0.7924365020985678,
"eval_loss": 0.1350804716348648,
"eval_roc_auc": 0.8543412288505268,
"eval_runtime": 433.6987,
"eval_samples_per_second": 6.654,
"eval_steps_per_second": 0.21,
"learning_rate": 0.0001,
"step": 22659
},
{
"epoch": 84.0,
"eval_accuracy": 0.27546777546777546,
"eval_f1_macro": 0.6512285101875886,
"eval_f1_micro": 0.7957293542577825,
"eval_loss": 0.13387472927570343,
"eval_roc_auc": 0.8585996545688873,
"eval_runtime": 432.4386,
"eval_samples_per_second": 6.674,
"eval_steps_per_second": 0.21,
"learning_rate": 0.0001,
"step": 22932
},
{
"epoch": 84.24908424908425,
"grad_norm": 0.2560372054576874,
"learning_rate": 0.0001,
"loss": 0.1515,
"step": 23000
},
{
"epoch": 85.0,
"eval_accuracy": 0.27927927927927926,
"eval_f1_macro": 0.6531817491521362,
"eval_f1_micro": 0.7990622335890879,
"eval_loss": 0.13341927528381348,
"eval_roc_auc": 0.8620406055936447,
"eval_runtime": 432.3488,
"eval_samples_per_second": 6.675,
"eval_steps_per_second": 0.21,
"learning_rate": 0.0001,
"step": 23205
},
{
"epoch": 86.0,
"eval_accuracy": 0.2747747747747748,
"eval_f1_macro": 0.6595866427349153,
"eval_f1_micro": 0.7988261313371896,
"eval_loss": 0.13337253034114838,
"eval_roc_auc": 0.8625331319838734,
"eval_runtime": 435.2436,
"eval_samples_per_second": 6.631,
"eval_steps_per_second": 0.209,
"learning_rate": 0.0001,
"step": 23478
},
{
"epoch": 86.08058608058609,
"grad_norm": 0.28640052676200867,
"learning_rate": 0.0001,
"loss": 0.1495,
"step": 23500
},
{
"epoch": 87.0,
"eval_accuracy": 0.27442827442827444,
"eval_f1_macro": 0.6467323251879672,
"eval_f1_micro": 0.7956179390619651,
"eval_loss": 0.1339845359325409,
"eval_roc_auc": 0.8590850582532711,
"eval_runtime": 438.7375,
"eval_samples_per_second": 6.578,
"eval_steps_per_second": 0.207,
"learning_rate": 0.0001,
"step": 23751
},
{
"epoch": 87.91208791208791,
"grad_norm": 0.23546907305717468,
"learning_rate": 0.0001,
"loss": 0.1496,
"step": 24000
},
{
"epoch": 88.0,
"eval_accuracy": 0.2747747747747748,
"eval_f1_macro": 0.648318545746826,
"eval_f1_micro": 0.7981612326551459,
"eval_loss": 0.13357459008693695,
"eval_roc_auc": 0.8619578829440303,
"eval_runtime": 432.3449,
"eval_samples_per_second": 6.675,
"eval_steps_per_second": 0.21,
"learning_rate": 0.0001,
"step": 24024
},
{
"epoch": 89.0,
"eval_accuracy": 0.2806652806652807,
"eval_f1_macro": 0.6585340844298272,
"eval_f1_micro": 0.8014968675104065,
"eval_loss": 0.13366733491420746,
"eval_roc_auc": 0.8672320387088881,
"eval_runtime": 431.6296,
"eval_samples_per_second": 6.686,
"eval_steps_per_second": 0.211,
"learning_rate": 0.0001,
"step": 24297
},
{
"epoch": 89.74358974358974,
"grad_norm": 0.24246211349964142,
"learning_rate": 0.0001,
"loss": 0.1493,
"step": 24500
},
{
"epoch": 90.0,
"eval_accuracy": 0.2772002772002772,
"eval_f1_macro": 0.66211749340029,
"eval_f1_micro": 0.8010798042854732,
"eval_loss": 0.1332736760377884,
"eval_roc_auc": 0.8661044781564988,
"eval_runtime": 425.5723,
"eval_samples_per_second": 6.781,
"eval_steps_per_second": 0.214,
"learning_rate": 0.0001,
"step": 24570
},
{
"epoch": 91.0,
"eval_accuracy": 0.27823977823977825,
"eval_f1_macro": 0.6528573832362276,
"eval_f1_micro": 0.7956933454403943,
"eval_loss": 0.13367226719856262,
"eval_roc_auc": 0.8562680347985093,
"eval_runtime": 443.8961,
"eval_samples_per_second": 6.502,
"eval_steps_per_second": 0.205,
"learning_rate": 0.0001,
"step": 24843
},
{
"epoch": 91.57509157509158,
"grad_norm": 0.22026851773262024,
"learning_rate": 0.0001,
"loss": 0.1496,
"step": 25000
},
{
"epoch": 92.0,
"eval_accuracy": 0.27546777546777546,
"eval_f1_macro": 0.6513649424471982,
"eval_f1_micro": 0.796086375587259,
"eval_loss": 0.13348612189292908,
"eval_roc_auc": 0.8573559442803198,
"eval_runtime": 443.9031,
"eval_samples_per_second": 6.501,
"eval_steps_per_second": 0.205,
"learning_rate": 0.0001,
"step": 25116
},
{
"epoch": 93.0,
"eval_accuracy": 0.2758142758142758,
"eval_f1_macro": 0.6559763883082907,
"eval_f1_micro": 0.8001861094662043,
"eval_loss": 0.1330718696117401,
"eval_roc_auc": 0.8648260530605368,
"eval_runtime": 436.5725,
"eval_samples_per_second": 6.611,
"eval_steps_per_second": 0.208,
"learning_rate": 0.0001,
"step": 25389
},
{
"epoch": 93.4065934065934,
"grad_norm": 0.28630152344703674,
"learning_rate": 0.0001,
"loss": 0.1493,
"step": 25500
},
{
"epoch": 94.0,
"eval_accuracy": 0.2758142758142758,
"eval_f1_macro": 0.6553585917255438,
"eval_f1_micro": 0.7995090362720617,
"eval_loss": 0.13329002261161804,
"eval_roc_auc": 0.864277443745379,
"eval_runtime": 442.8808,
"eval_samples_per_second": 6.516,
"eval_steps_per_second": 0.205,
"learning_rate": 0.0001,
"step": 25662
},
{
"epoch": 95.0,
"eval_accuracy": 0.2758142758142758,
"eval_f1_macro": 0.6579543710907207,
"eval_f1_micro": 0.7979651162790697,
"eval_loss": 0.13314621150493622,
"eval_roc_auc": 0.8606367216129991,
"eval_runtime": 436.3942,
"eval_samples_per_second": 6.613,
"eval_steps_per_second": 0.209,
"learning_rate": 0.0001,
"step": 25935
},
{
"epoch": 95.23809523809524,
"grad_norm": 0.25194719433784485,
"learning_rate": 0.0001,
"loss": 0.1482,
"step": 26000
},
{
"epoch": 96.0,
"eval_accuracy": 0.2751212751212751,
"eval_f1_macro": 0.6556445954379041,
"eval_f1_micro": 0.7992523999660183,
"eval_loss": 0.13279949128627777,
"eval_roc_auc": 0.8631226264354063,
"eval_runtime": 426.8086,
"eval_samples_per_second": 6.762,
"eval_steps_per_second": 0.213,
"learning_rate": 0.0001,
"step": 26208
},
{
"epoch": 97.0,
"eval_accuracy": 0.27823977823977825,
"eval_f1_macro": 0.6492741904723621,
"eval_f1_micro": 0.7977296181630549,
"eval_loss": 0.1332886964082718,
"eval_roc_auc": 0.8588905587527994,
"eval_runtime": 441.9848,
"eval_samples_per_second": 6.53,
"eval_steps_per_second": 0.206,
"learning_rate": 0.0001,
"step": 26481
},
{
"epoch": 97.06959706959707,
"grad_norm": 0.27280953526496887,
"learning_rate": 0.0001,
"loss": 0.1497,
"step": 26500
},
{
"epoch": 98.0,
"eval_accuracy": 0.27546777546777546,
"eval_f1_macro": 0.6600105762308898,
"eval_f1_micro": 0.799611141637432,
"eval_loss": 0.13266970217227936,
"eval_roc_auc": 0.864715456620441,
"eval_runtime": 439.781,
"eval_samples_per_second": 6.562,
"eval_steps_per_second": 0.207,
"learning_rate": 0.0001,
"step": 26754
},
{
"epoch": 98.9010989010989,
"grad_norm": 0.30599892139434814,
"learning_rate": 0.0001,
"loss": 0.1489,
"step": 27000
},
{
"epoch": 99.0,
"eval_accuracy": 0.27165627165627165,
"eval_f1_macro": 0.6589970862385839,
"eval_f1_micro": 0.7978809757764771,
"eval_loss": 0.13253149390220642,
"eval_roc_auc": 0.8607699202364255,
"eval_runtime": 438.5456,
"eval_samples_per_second": 6.581,
"eval_steps_per_second": 0.208,
"learning_rate": 0.0001,
"step": 27027
},
{
"epoch": 100.0,
"eval_accuracy": 0.27616077616077617,
"eval_f1_macro": 0.6570195655430786,
"eval_f1_micro": 0.797143840330351,
"eval_loss": 0.1329408884048462,
"eval_roc_auc": 0.8584810367011169,
"eval_runtime": 434.9771,
"eval_samples_per_second": 6.635,
"eval_steps_per_second": 0.209,
"learning_rate": 0.0001,
"step": 27300
},
{
"epoch": 100.73260073260073,
"grad_norm": 0.2732805013656616,
"learning_rate": 0.0001,
"loss": 0.1482,
"step": 27500
},
{
"epoch": 101.0,
"eval_accuracy": 0.28205128205128205,
"eval_f1_macro": 0.657951499975745,
"eval_f1_micro": 0.7991615690636095,
"eval_loss": 0.13274870812892914,
"eval_roc_auc": 0.861103560655407,
"eval_runtime": 435.4493,
"eval_samples_per_second": 6.628,
"eval_steps_per_second": 0.209,
"learning_rate": 0.0001,
"step": 27573
},
{
"epoch": 102.0,
"eval_accuracy": 0.2817047817047817,
"eval_f1_macro": 0.654306822863844,
"eval_f1_micro": 0.7986821274228745,
"eval_loss": 0.1326293796300888,
"eval_roc_auc": 0.8607733407448822,
"eval_runtime": 437.9645,
"eval_samples_per_second": 6.59,
"eval_steps_per_second": 0.208,
"learning_rate": 0.0001,
"step": 27846
},
{
"epoch": 102.56410256410257,
"grad_norm": 0.23533137142658234,
"learning_rate": 0.0001,
"loss": 0.1474,
"step": 28000
},
{
"epoch": 103.0,
"eval_accuracy": 0.2803187803187803,
"eval_f1_macro": 0.6518495856500403,
"eval_f1_micro": 0.7993688968487486,
"eval_loss": 0.13247379660606384,
"eval_roc_auc": 0.8620991566501659,
"eval_runtime": 426.0566,
"eval_samples_per_second": 6.774,
"eval_steps_per_second": 0.214,
"learning_rate": 0.0001,
"step": 28119
},
{
"epoch": 104.0,
"eval_accuracy": 0.27754677754677753,
"eval_f1_macro": 0.6612536009112525,
"eval_f1_micro": 0.8010850676047981,
"eval_loss": 0.13315415382385254,
"eval_roc_auc": 0.864729420343199,
"eval_runtime": 425.2679,
"eval_samples_per_second": 6.786,
"eval_steps_per_second": 0.214,
"learning_rate": 0.0001,
"step": 28392
},
{
"epoch": 104.3956043956044,
"grad_norm": 0.2809629738330841,
"learning_rate": 0.0001,
"loss": 0.1472,
"step": 28500
},
{
"epoch": 105.0,
"eval_accuracy": 0.2830907830907831,
"eval_f1_macro": 0.6635718544409769,
"eval_f1_micro": 0.8012698412698412,
"eval_loss": 0.13218620419502258,
"eval_roc_auc": 0.8652135899617869,
"eval_runtime": 425.1586,
"eval_samples_per_second": 6.788,
"eval_steps_per_second": 0.214,
"learning_rate": 0.0001,
"step": 28665
},
{
"epoch": 106.0,
"eval_accuracy": 0.2830907830907831,
"eval_f1_macro": 0.6588128942023547,
"eval_f1_micro": 0.800988243312319,
"eval_loss": 0.13239973783493042,
"eval_roc_auc": 0.8632750603887415,
"eval_runtime": 427.5404,
"eval_samples_per_second": 6.75,
"eval_steps_per_second": 0.213,
"learning_rate": 0.0001,
"step": 28938
},
{
"epoch": 106.22710622710623,
"grad_norm": 0.2568123936653137,
"learning_rate": 0.0001,
"loss": 0.148,
"step": 29000
},
{
"epoch": 107.0,
"eval_accuracy": 0.2785862785862786,
"eval_f1_macro": 0.650564106362156,
"eval_f1_micro": 0.7985513421389007,
"eval_loss": 0.13358280062675476,
"eval_roc_auc": 0.8618832353771251,
"eval_runtime": 425.2874,
"eval_samples_per_second": 6.786,
"eval_steps_per_second": 0.214,
"learning_rate": 0.0001,
"step": 29211
},
{
"epoch": 108.0,
"eval_accuracy": 0.2796257796257796,
"eval_f1_macro": 0.6501303094783896,
"eval_f1_micro": 0.7995554225623049,
"eval_loss": 0.13270235061645508,
"eval_roc_auc": 0.8615071940670409,
"eval_runtime": 432.9179,
"eval_samples_per_second": 6.666,
"eval_steps_per_second": 0.21,
"learning_rate": 0.0001,
"step": 29484
},
{
"epoch": 108.05860805860806,
"grad_norm": 0.29480934143066406,
"learning_rate": 0.0001,
"loss": 0.1477,
"step": 29500
},
{
"epoch": 109.0,
"eval_accuracy": 0.2806652806652807,
"eval_f1_macro": 0.6579556871315007,
"eval_f1_micro": 0.8000342553738118,
"eval_loss": 0.1318453699350357,
"eval_roc_auc": 0.8612993478767093,
"eval_runtime": 434.6895,
"eval_samples_per_second": 6.639,
"eval_steps_per_second": 0.209,
"learning_rate": 0.0001,
"step": 29757
},
{
"epoch": 109.89010989010988,
"grad_norm": 0.3718918561935425,
"learning_rate": 0.0001,
"loss": 0.1479,
"step": 30000
},
{
"epoch": 110.0,
"eval_accuracy": 0.2803187803187803,
"eval_f1_macro": 0.6582487839550253,
"eval_f1_micro": 0.7997274043785672,
"eval_loss": 0.13255637884140015,
"eval_roc_auc": 0.8626158546334878,
"eval_runtime": 427.7015,
"eval_samples_per_second": 6.748,
"eval_steps_per_second": 0.213,
"learning_rate": 0.0001,
"step": 30030
},
{
"epoch": 111.0,
"eval_accuracy": 0.2785862785862786,
"eval_f1_macro": 0.6608614747058748,
"eval_f1_micro": 0.8012935069355799,
"eval_loss": 0.1319260448217392,
"eval_roc_auc": 0.8637521073014844,
"eval_runtime": 422.4227,
"eval_samples_per_second": 6.832,
"eval_steps_per_second": 0.215,
"learning_rate": 0.0001,
"step": 30303
},
{
"epoch": 111.72161172161172,
"grad_norm": 0.3544025719165802,
"learning_rate": 0.0001,
"loss": 0.1466,
"step": 30500
},
{
"epoch": 112.0,
"eval_accuracy": 0.28101178101178104,
"eval_f1_macro": 0.6595016342799644,
"eval_f1_micro": 0.8019278738426415,
"eval_loss": 0.13223350048065186,
"eval_roc_auc": 0.8659084092462648,
"eval_runtime": 420.8235,
"eval_samples_per_second": 6.858,
"eval_steps_per_second": 0.216,
"learning_rate": 0.0001,
"step": 30576
},
{
"epoch": 113.0,
"eval_accuracy": 0.27997227997227997,
"eval_f1_macro": 0.6592029124671744,
"eval_f1_micro": 0.8024988392216453,
"eval_loss": 0.13213913142681122,
"eval_roc_auc": 0.8666766420318518,
"eval_runtime": 423.8949,
"eval_samples_per_second": 6.808,
"eval_steps_per_second": 0.215,
"learning_rate": 0.0001,
"step": 30849
},
{
"epoch": 113.55311355311355,
"grad_norm": 0.35069116950035095,
"learning_rate": 0.0001,
"loss": 0.1474,
"step": 31000
},
{
"epoch": 114.0,
"eval_accuracy": 0.2823977823977824,
"eval_f1_macro": 0.663088095209859,
"eval_f1_micro": 0.8025030654094965,
"eval_loss": 0.13204564154148102,
"eval_roc_auc": 0.8661983610533127,
"eval_runtime": 421.2287,
"eval_samples_per_second": 6.851,
"eval_steps_per_second": 0.216,
"learning_rate": 0.0001,
"step": 31122
},
{
"epoch": 115.0,
"eval_accuracy": 0.28378378378378377,
"eval_f1_macro": 0.659797224924612,
"eval_f1_micro": 0.8004266211604096,
"eval_loss": 0.1319342404603958,
"eval_roc_auc": 0.8625399730007867,
"eval_runtime": 424.6871,
"eval_samples_per_second": 6.796,
"eval_steps_per_second": 0.214,
"learning_rate": 0.0001,
"step": 31395
},
{
"epoch": 115.38461538461539,
"grad_norm": 0.29624369740486145,
"learning_rate": 1e-05,
"loss": 0.1468,
"step": 31500
},
{
"epoch": 116.0,
"eval_accuracy": 0.2844767844767845,
"eval_f1_macro": 0.6627361818946377,
"eval_f1_micro": 0.8022295974810655,
"eval_loss": 0.13186337053775787,
"eval_roc_auc": 0.8642598314802673,
"eval_runtime": 423.8673,
"eval_samples_per_second": 6.809,
"eval_steps_per_second": 0.215,
"learning_rate": 1e-05,
"step": 31668
},
{
"epoch": 117.0,
"eval_accuracy": 0.28205128205128205,
"eval_f1_macro": 0.6604165936303265,
"eval_f1_micro": 0.8012607547491268,
"eval_loss": 0.1317850947380066,
"eval_roc_auc": 0.8634466760169507,
"eval_runtime": 419.012,
"eval_samples_per_second": 6.888,
"eval_steps_per_second": 0.217,
"learning_rate": 1e-05,
"step": 31941
},
{
"epoch": 117.21611721611721,
"grad_norm": 0.28633400797843933,
"learning_rate": 1e-05,
"loss": 0.1455,
"step": 32000
},
{
"epoch": 118.0,
"eval_accuracy": 0.2796257796257796,
"eval_f1_macro": 0.6590147410119703,
"eval_f1_micro": 0.8002395926924228,
"eval_loss": 0.13159342110157013,
"eval_roc_auc": 0.8616373075259771,
"eval_runtime": 419.8006,
"eval_samples_per_second": 6.875,
"eval_steps_per_second": 0.217,
"learning_rate": 1e-05,
"step": 32214
},
{
"epoch": 119.0,
"eval_accuracy": 0.28274428274428276,
"eval_f1_macro": 0.6608406822787987,
"eval_f1_micro": 0.8036745185622182,
"eval_loss": 0.1319129317998886,
"eval_roc_auc": 0.8678011174197509,
"eval_runtime": 423.7674,
"eval_samples_per_second": 6.81,
"eval_steps_per_second": 0.215,
"learning_rate": 1e-05,
"step": 32487
},
{
"epoch": 119.04761904761905,
"grad_norm": 0.31120315194129944,
"learning_rate": 1e-05,
"loss": 0.1451,
"step": 32500
},
{
"epoch": 120.0,
"eval_accuracy": 0.28135828135828134,
"eval_f1_macro": 0.6614581971670047,
"eval_f1_micro": 0.803593372600534,
"eval_loss": 0.13164088129997253,
"eval_roc_auc": 0.8661674020983411,
"eval_runtime": 420.709,
"eval_samples_per_second": 6.86,
"eval_steps_per_second": 0.216,
"learning_rate": 1e-05,
"step": 32760
},
{
"epoch": 120.87912087912088,
"grad_norm": 0.31770700216293335,
"learning_rate": 1e-05,
"loss": 0.1454,
"step": 33000
},
{
"epoch": 121.0,
"eval_accuracy": 0.28101178101178104,
"eval_f1_macro": 0.6610641151618838,
"eval_f1_micro": 0.8012604863092451,
"eval_loss": 0.13184630870819092,
"eval_roc_auc": 0.8635064611392681,
"eval_runtime": 422.0264,
"eval_samples_per_second": 6.838,
"eval_steps_per_second": 0.216,
"learning_rate": 1e-05,
"step": 33033
},
{
"epoch": 122.0,
"eval_accuracy": 0.2817047817047817,
"eval_f1_macro": 0.6647378818356079,
"eval_f1_micro": 0.8049611099432415,
"eval_loss": 0.13215216994285583,
"eval_roc_auc": 0.8691576105910745,
"eval_runtime": 436.9114,
"eval_samples_per_second": 6.605,
"eval_steps_per_second": 0.208,
"learning_rate": 1e-05,
"step": 33306
},
{
"epoch": 122.71062271062272,
"grad_norm": 0.22290275990962982,
"learning_rate": 1e-05,
"loss": 0.145,
"step": 33500
},
{
"epoch": 123.0,
"eval_accuracy": 0.2817047817047817,
"eval_f1_macro": 0.6604978306251739,
"eval_f1_micro": 0.8010107932156931,
"eval_loss": 0.13187836110591888,
"eval_roc_auc": 0.8617537926061216,
"eval_runtime": 431.3938,
"eval_samples_per_second": 6.69,
"eval_steps_per_second": 0.211,
"learning_rate": 1e-05,
"step": 33579
},
{
"epoch": 124.0,
"eval_accuracy": 0.2806652806652807,
"eval_f1_macro": 0.6621515776947642,
"eval_f1_micro": 0.8018739352640545,
"eval_loss": 0.13141389191150665,
"eval_roc_auc": 0.8638029186192627,
"eval_runtime": 430.2675,
"eval_samples_per_second": 6.707,
"eval_steps_per_second": 0.211,
"learning_rate": 1e-05,
"step": 33852
},
{
"epoch": 124.54212454212454,
"grad_norm": 0.27631625533103943,
"learning_rate": 1e-05,
"loss": 0.1459,
"step": 34000
},
{
"epoch": 125.0,
"eval_accuracy": 0.2862092862092862,
"eval_f1_macro": 0.6640721616133445,
"eval_f1_micro": 0.804345987993574,
"eval_loss": 0.13139639794826508,
"eval_roc_auc": 0.8672404491355638,
"eval_runtime": 432.0509,
"eval_samples_per_second": 6.68,
"eval_steps_per_second": 0.211,
"learning_rate": 1e-05,
"step": 34125
},
{
"epoch": 126.0,
"eval_accuracy": 0.2862092862092862,
"eval_f1_macro": 0.663003919720051,
"eval_f1_micro": 0.804212663367593,
"eval_loss": 0.13103623688220978,
"eval_roc_auc": 0.8670350710768244,
"eval_runtime": 432.4499,
"eval_samples_per_second": 6.674,
"eval_steps_per_second": 0.21,
"learning_rate": 1e-05,
"step": 34398
},
{
"epoch": 126.37362637362638,
"grad_norm": 0.3177105188369751,
"learning_rate": 1e-05,
"loss": 0.1439,
"step": 34500
},
{
"epoch": 127.0,
"eval_accuracy": 0.28586278586278585,
"eval_f1_macro": 0.6597731906072118,
"eval_f1_micro": 0.8038346213944846,
"eval_loss": 0.13152988255023956,
"eval_roc_auc": 0.8672624342859965,
"eval_runtime": 431.3827,
"eval_samples_per_second": 6.69,
"eval_steps_per_second": 0.211,
"learning_rate": 1e-05,
"step": 34671
},
{
"epoch": 128.0,
"eval_accuracy": 0.2869022869022869,
"eval_f1_macro": 0.668197478893632,
"eval_f1_micro": 0.8042412977357216,
"eval_loss": 0.13113313913345337,
"eval_roc_auc": 0.8674002874836755,
"eval_runtime": 439.4627,
"eval_samples_per_second": 6.567,
"eval_steps_per_second": 0.207,
"learning_rate": 1e-05,
"step": 34944
},
{
"epoch": 128.2051282051282,
"grad_norm": 0.2520149350166321,
"learning_rate": 1e-05,
"loss": 0.1446,
"step": 35000
},
{
"epoch": 129.0,
"eval_accuracy": 0.28274428274428276,
"eval_f1_macro": 0.6652814888251478,
"eval_f1_micro": 0.8034694309287074,
"eval_loss": 0.13096605241298676,
"eval_roc_auc": 0.8665332355380903,
"eval_runtime": 443.7844,
"eval_samples_per_second": 6.503,
"eval_steps_per_second": 0.205,
"learning_rate": 1e-05,
"step": 35217
},
{
"epoch": 130.0,
"eval_accuracy": 0.28655578655578656,
"eval_f1_macro": 0.6657375892895663,
"eval_f1_micro": 0.8034491503931017,
"eval_loss": 0.1310083270072937,
"eval_roc_auc": 0.866799015752045,
"eval_runtime": 440.6588,
"eval_samples_per_second": 6.549,
"eval_steps_per_second": 0.207,
"learning_rate": 1e-05,
"step": 35490
},
{
"epoch": 130.03663003663004,
"grad_norm": 0.2916598916053772,
"learning_rate": 1e-05,
"loss": 0.1449,
"step": 35500
},
{
"epoch": 131.0,
"eval_accuracy": 0.2834372834372834,
"eval_f1_macro": 0.6709132204127336,
"eval_f1_micro": 0.8052362171687506,
"eval_loss": 0.13133247196674347,
"eval_roc_auc": 0.8699004377177725,
"eval_runtime": 446.7612,
"eval_samples_per_second": 6.46,
"eval_steps_per_second": 0.204,
"learning_rate": 1e-05,
"step": 35763
},
{
"epoch": 131.86813186813185,
"grad_norm": 0.3473760783672333,
"learning_rate": 1e-05,
"loss": 0.1442,
"step": 36000
},
{
"epoch": 132.0,
"eval_accuracy": 0.2806652806652807,
"eval_f1_macro": 0.6557913726655867,
"eval_f1_micro": 0.7985562048814026,
"eval_loss": 0.13149647414684296,
"eval_roc_auc": 0.8595249758820619,
"eval_runtime": 447.0484,
"eval_samples_per_second": 6.456,
"eval_steps_per_second": 0.204,
"learning_rate": 1e-05,
"step": 36036
},
{
"epoch": 133.0,
"eval_accuracy": 0.28794178794178793,
"eval_f1_macro": 0.6689392948255155,
"eval_f1_micro": 0.8051816958277256,
"eval_loss": 0.1311328113079071,
"eval_roc_auc": 0.8691700049040701,
"eval_runtime": 444.1217,
"eval_samples_per_second": 6.498,
"eval_steps_per_second": 0.205,
"learning_rate": 1e-05,
"step": 36309
},
{
"epoch": 133.6996336996337,
"grad_norm": 0.2959079444408417,
"learning_rate": 1e-05,
"loss": 0.1443,
"step": 36500
},
{
"epoch": 134.0,
"eval_accuracy": 0.28274428274428276,
"eval_f1_macro": 0.6648386499372343,
"eval_f1_micro": 0.802060714437774,
"eval_loss": 0.1308571696281433,
"eval_roc_auc": 0.8639881626262637,
"eval_runtime": 444.917,
"eval_samples_per_second": 6.487,
"eval_steps_per_second": 0.205,
"learning_rate": 1e-05,
"step": 36582
},
{
"epoch": 135.0,
"eval_accuracy": 0.2869022869022869,
"eval_f1_macro": 0.6684163123065296,
"eval_f1_micro": 0.8038277511961722,
"eval_loss": 0.13148072361946106,
"eval_roc_auc": 0.8665118674205556,
"eval_runtime": 437.5153,
"eval_samples_per_second": 6.596,
"eval_steps_per_second": 0.208,
"learning_rate": 1e-05,
"step": 36855
},
{
"epoch": 135.53113553113553,
"grad_norm": 0.3723543882369995,
"learning_rate": 1e-05,
"loss": 0.1438,
"step": 37000
},
{
"epoch": 136.0,
"eval_accuracy": 0.28274428274428276,
"eval_f1_macro": 0.659009971789042,
"eval_f1_micro": 0.8024591213764248,
"eval_loss": 0.13150115311145782,
"eval_roc_auc": 0.8634352340808195,
"eval_runtime": 444.5109,
"eval_samples_per_second": 6.493,
"eval_steps_per_second": 0.205,
"learning_rate": 1e-05,
"step": 37128
},
{
"epoch": 137.0,
"eval_accuracy": 0.28586278586278585,
"eval_f1_macro": 0.6666808903899752,
"eval_f1_micro": 0.8035592643051771,
"eval_loss": 0.1310679018497467,
"eval_roc_auc": 0.8648124783367798,
"eval_runtime": 434.2661,
"eval_samples_per_second": 6.646,
"eval_steps_per_second": 0.21,
"learning_rate": 1e-05,
"step": 37401
},
{
"epoch": 137.36263736263737,
"grad_norm": 0.36766815185546875,
"learning_rate": 1e-05,
"loss": 0.1452,
"step": 37500
},
{
"epoch": 138.0,
"eval_accuracy": 0.2844767844767845,
"eval_f1_macro": 0.6665598962110765,
"eval_f1_micro": 0.8035426731078905,
"eval_loss": 0.13124705851078033,
"eval_roc_auc": 0.8661277510277622,
"eval_runtime": 434.1413,
"eval_samples_per_second": 6.648,
"eval_steps_per_second": 0.21,
"learning_rate": 1e-05,
"step": 37674
},
{
"epoch": 139.0,
"eval_accuracy": 0.28967428967428965,
"eval_f1_macro": 0.6661043989752415,
"eval_f1_micro": 0.8052538519828238,
"eval_loss": 0.13104070723056793,
"eval_roc_auc": 0.8689438757606943,
"eval_runtime": 433.2581,
"eval_samples_per_second": 6.661,
"eval_steps_per_second": 0.21,
"learning_rate": 1e-05,
"step": 37947
},
{
"epoch": 139.19413919413918,
"grad_norm": 0.35373228788375854,
"learning_rate": 1e-05,
"loss": 0.144,
"step": 38000
},
{
"epoch": 140.0,
"eval_accuracy": 0.2834372834372834,
"eval_f1_macro": 0.663466069531375,
"eval_f1_micro": 0.8020416843896214,
"eval_loss": 0.13169734179973602,
"eval_roc_auc": 0.8642539428402185,
"eval_runtime": 435.0147,
"eval_samples_per_second": 6.634,
"eval_steps_per_second": 0.209,
"learning_rate": 1e-05,
"step": 38220
},
{
"epoch": 141.0,
"eval_accuracy": 0.2875952875952876,
"eval_f1_macro": 0.6687691213000826,
"eval_f1_micro": 0.8046521463311481,
"eval_loss": 0.13089434802532196,
"eval_roc_auc": 0.867299000192085,
"eval_runtime": 429.8469,
"eval_samples_per_second": 6.714,
"eval_steps_per_second": 0.212,
"learning_rate": 1.0000000000000002e-06,
"step": 38493
},
{
"epoch": 141.02564102564102,
"grad_norm": 0.2815115451812744,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.1445,
"step": 38500
},
{
"epoch": 142.0,
"eval_accuracy": 0.28586278586278585,
"eval_f1_macro": 0.6642894279153319,
"eval_f1_micro": 0.8041640110473762,
"eval_loss": 0.13103386759757996,
"eval_roc_auc": 0.8657067870399482,
"eval_runtime": 425.5573,
"eval_samples_per_second": 6.782,
"eval_steps_per_second": 0.214,
"learning_rate": 1.0000000000000002e-06,
"step": 38766
},
{
"epoch": 142.85714285714286,
"grad_norm": 0.3381010890007019,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.1441,
"step": 39000
},
{
"epoch": 143.0,
"eval_accuracy": 0.2872487872487873,
"eval_f1_macro": 0.6623287859816251,
"eval_f1_micro": 0.8019270122783083,
"eval_loss": 0.13144278526306152,
"eval_roc_auc": 0.8635436440782548,
"eval_runtime": 433.7658,
"eval_samples_per_second": 6.653,
"eval_steps_per_second": 0.21,
"learning_rate": 1.0000000000000002e-06,
"step": 39039
},
{
"epoch": 144.0,
"eval_accuracy": 0.28378378378378377,
"eval_f1_macro": 0.6647534218687892,
"eval_f1_micro": 0.8024974515800204,
"eval_loss": 0.1311902105808258,
"eval_roc_auc": 0.8649097280870156,
"eval_runtime": 446.8955,
"eval_samples_per_second": 6.458,
"eval_steps_per_second": 0.204,
"learning_rate": 1.0000000000000002e-06,
"step": 39312
},
{
"epoch": 144.0,
"learning_rate": 1.0000000000000002e-06,
"step": 39312,
"total_flos": 1.3598709030716368e+20,
"train_loss": 0.157796386979584,
"train_runtime": 249885.5342,
"train_samples_per_second": 5.232,
"train_steps_per_second": 0.164
}
],
"logging_steps": 500,
"max_steps": 40950,
"num_input_tokens_seen": 0,
"num_train_epochs": 150,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 10,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.3598709030716368e+20,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}