{ "best_metric": 0.1308571696281433, "best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/fine_scale/DinoVdeau-small-2024_08_31-batch-size32_epochs150_freeze/checkpoint-36582", "epoch": 144.0, "eval_steps": 500, "global_step": 39312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.19057519057519057, "eval_f1_macro": 0.4058921954514261, "eval_f1_micro": 0.7088941673264713, "eval_loss": 0.19568666815757751, "eval_roc_auc": 0.8060676064167129, "eval_runtime": 426.0483, "eval_samples_per_second": 6.774, "eval_steps_per_second": 0.214, "learning_rate": 0.001, "step": 273 }, { "epoch": 1.8315018315018317, "grad_norm": 0.30737248063087463, "learning_rate": 0.001, "loss": 0.3189, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.21933471933471935, "eval_f1_macro": 0.4867943512801917, "eval_f1_micro": 0.738139514768845, "eval_loss": 0.17198018729686737, "eval_roc_auc": 0.8255075095586444, "eval_runtime": 425.0166, "eval_samples_per_second": 6.79, "eval_steps_per_second": 0.214, "learning_rate": 0.001, "step": 546 }, { "epoch": 3.0, "eval_accuracy": 0.23215523215523215, "eval_f1_macro": 0.5587016500092944, "eval_f1_micro": 0.7578947368421052, "eval_loss": 0.16209888458251953, "eval_roc_auc": 0.8387630797560628, "eval_runtime": 425.9119, "eval_samples_per_second": 6.776, "eval_steps_per_second": 0.214, "learning_rate": 0.001, "step": 819 }, { "epoch": 3.663003663003663, "grad_norm": 0.2619726359844208, "learning_rate": 0.001, "loss": 0.1897, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.22487872487872487, "eval_f1_macro": 0.5561953540051209, "eval_f1_micro": 0.7463059684835497, "eval_loss": 0.15948981046676636, "eval_roc_auc": 0.8221271753092407, "eval_runtime": 423.9484, "eval_samples_per_second": 6.807, "eval_steps_per_second": 0.215, "learning_rate": 0.001, "step": 1092 }, { "epoch": 5.0, "eval_accuracy": 0.23146223146223147, "eval_f1_macro": 0.5723046956548954, "eval_f1_micro": 0.7510718113612004, "eval_loss": 0.15691693127155304, "eval_roc_auc": 0.8244935635420478, "eval_runtime": 423.6041, "eval_samples_per_second": 6.813, "eval_steps_per_second": 0.215, "learning_rate": 0.001, "step": 1365 }, { "epoch": 5.4945054945054945, "grad_norm": 0.17114631831645966, "learning_rate": 0.001, "loss": 0.1808, "step": 1500 }, { "epoch": 6.0, "eval_accuracy": 0.2363132363132363, "eval_f1_macro": 0.5786669115862841, "eval_f1_micro": 0.7634727923836142, "eval_loss": 0.15302371978759766, "eval_roc_auc": 0.8365257318814997, "eval_runtime": 427.5566, "eval_samples_per_second": 6.75, "eval_steps_per_second": 0.213, "learning_rate": 0.001, "step": 1638 }, { "epoch": 7.0, "eval_accuracy": 0.23354123354123354, "eval_f1_macro": 0.5981729145672101, "eval_f1_micro": 0.7651630269613162, "eval_loss": 0.1523299366235733, "eval_roc_auc": 0.838924594824006, "eval_runtime": 430.1478, "eval_samples_per_second": 6.709, "eval_steps_per_second": 0.212, "learning_rate": 0.001, "step": 1911 }, { "epoch": 7.326007326007326, "grad_norm": 0.22214488685131073, "learning_rate": 0.001, "loss": 0.1763, "step": 2000 }, { "epoch": 8.0, "eval_accuracy": 0.24185724185724186, "eval_f1_macro": 0.587992292024695, "eval_f1_micro": 0.7655172413793103, "eval_loss": 0.15311872959136963, "eval_roc_auc": 0.837740052624858, "eval_runtime": 427.9308, "eval_samples_per_second": 6.744, "eval_steps_per_second": 0.213, "learning_rate": 0.001, "step": 2184 }, { "epoch": 9.0, "eval_accuracy": 0.24012474012474014, "eval_f1_macro": 0.606908576330327, "eval_f1_micro": 0.7699542669773061, "eval_loss": 0.14992575347423553, "eval_roc_auc": 0.8431046707780733, "eval_runtime": 424.0382, "eval_samples_per_second": 6.806, "eval_steps_per_second": 0.215, "learning_rate": 0.001, "step": 2457 }, { "epoch": 9.157509157509157, "grad_norm": 0.1733015924692154, "learning_rate": 0.001, "loss": 0.1735, "step": 2500 }, { "epoch": 10.0, "eval_accuracy": 0.24393624393624394, "eval_f1_macro": 0.5829080312220596, "eval_f1_micro": 0.7606115107913669, "eval_loss": 0.1509619951248169, "eval_roc_auc": 0.8277441062627229, "eval_runtime": 424.8811, "eval_samples_per_second": 6.792, "eval_steps_per_second": 0.214, "learning_rate": 0.001, "step": 2730 }, { "epoch": 10.989010989010989, "grad_norm": 0.16356830298900604, "learning_rate": 0.001, "loss": 0.1723, "step": 3000 }, { "epoch": 11.0, "eval_accuracy": 0.2505197505197505, "eval_f1_macro": 0.5976223089766404, "eval_f1_micro": 0.7689559002963221, "eval_loss": 0.1520717293024063, "eval_roc_auc": 0.8399853012032679, "eval_runtime": 434.5331, "eval_samples_per_second": 6.642, "eval_steps_per_second": 0.209, "learning_rate": 0.001, "step": 3003 }, { "epoch": 12.0, "eval_accuracy": 0.2442827442827443, "eval_f1_macro": 0.607405900640871, "eval_f1_micro": 0.7759986516096409, "eval_loss": 0.15027731657028198, "eval_roc_auc": 0.8526551998703694, "eval_runtime": 434.0545, "eval_samples_per_second": 6.649, "eval_steps_per_second": 0.21, "learning_rate": 0.001, "step": 3276 }, { "epoch": 12.820512820512821, "grad_norm": 0.1642971783876419, "learning_rate": 0.001, "loss": 0.1719, "step": 3500 }, { "epoch": 13.0, "eval_accuracy": 0.24393624393624394, "eval_f1_macro": 0.6003271512523337, "eval_f1_micro": 0.7623558852444365, "eval_loss": 0.1504218876361847, "eval_roc_auc": 0.8301696089299148, "eval_runtime": 426.4716, "eval_samples_per_second": 6.767, "eval_steps_per_second": 0.213, "learning_rate": 0.001, "step": 3549 }, { "epoch": 14.0, "eval_accuracy": 0.24462924462924462, "eval_f1_macro": 0.602811285040826, "eval_f1_micro": 0.7644358114073813, "eval_loss": 0.1496724784374237, "eval_roc_auc": 0.8342951177137805, "eval_runtime": 428.909, "eval_samples_per_second": 6.729, "eval_steps_per_second": 0.212, "learning_rate": 0.001, "step": 3822 }, { "epoch": 14.652014652014651, "grad_norm": 0.1759812980890274, "learning_rate": 0.001, "loss": 0.1702, "step": 4000 }, { "epoch": 15.0, "eval_accuracy": 0.2512127512127512, "eval_f1_macro": 0.6066013767027806, "eval_f1_micro": 0.7751615281210703, "eval_loss": 0.14749661087989807, "eval_roc_auc": 0.8445581856657356, "eval_runtime": 424.6732, "eval_samples_per_second": 6.796, "eval_steps_per_second": 0.214, "learning_rate": 0.001, "step": 4095 }, { "epoch": 16.0, "eval_accuracy": 0.24636174636174638, "eval_f1_macro": 0.5838354990739413, "eval_f1_micro": 0.7645565108923241, "eval_loss": 0.14998775720596313, "eval_roc_auc": 0.8320747114163963, "eval_runtime": 423.7704, "eval_samples_per_second": 6.81, "eval_steps_per_second": 0.215, "learning_rate": 0.001, "step": 4368 }, { "epoch": 16.483516483516482, "grad_norm": 0.14804692566394806, "learning_rate": 0.001, "loss": 0.1696, "step": 4500 }, { "epoch": 17.0, "eval_accuracy": 0.24566874566874566, "eval_f1_macro": 0.6073459016890155, "eval_f1_micro": 0.7719883641341547, "eval_loss": 0.15297245979309082, "eval_roc_auc": 0.8464322218871764, "eval_runtime": 424.9885, "eval_samples_per_second": 6.791, "eval_steps_per_second": 0.214, "learning_rate": 0.001, "step": 4641 }, { "epoch": 18.0, "eval_accuracy": 0.24393624393624394, "eval_f1_macro": 0.614324753279198, "eval_f1_micro": 0.7751951282271207, "eval_loss": 0.14907290041446686, "eval_roc_auc": 0.8475019020709771, "eval_runtime": 420.1647, "eval_samples_per_second": 6.869, "eval_steps_per_second": 0.217, "learning_rate": 0.001, "step": 4914 }, { "epoch": 18.315018315018314, "grad_norm": 0.19223743677139282, "learning_rate": 0.001, "loss": 0.1717, "step": 5000 }, { "epoch": 19.0, "eval_accuracy": 0.23458073458073458, "eval_f1_macro": 0.6075499214740471, "eval_f1_micro": 0.7739734788726388, "eval_loss": 0.14951026439666748, "eval_roc_auc": 0.848377592477135, "eval_runtime": 427.9682, "eval_samples_per_second": 6.743, "eval_steps_per_second": 0.213, "learning_rate": 0.001, "step": 5187 }, { "epoch": 20.0, "eval_accuracy": 0.24532224532224534, "eval_f1_macro": 0.595638442008225, "eval_f1_micro": 0.7636993911381718, "eval_loss": 0.14873762428760529, "eval_roc_auc": 0.8322311292560515, "eval_runtime": 421.5059, "eval_samples_per_second": 6.847, "eval_steps_per_second": 0.216, "learning_rate": 0.001, "step": 5460 }, { "epoch": 20.146520146520146, "grad_norm": 0.15787707269191742, "learning_rate": 0.001, "loss": 0.1705, "step": 5500 }, { "epoch": 21.0, "eval_accuracy": 0.24740124740124741, "eval_f1_macro": 0.6164990545073296, "eval_f1_micro": 0.780452718426063, "eval_loss": 0.14705629646778107, "eval_roc_auc": 0.8539786012990958, "eval_runtime": 429.6596, "eval_samples_per_second": 6.717, "eval_steps_per_second": 0.212, "learning_rate": 0.001, "step": 5733 }, { "epoch": 21.978021978021978, "grad_norm": 0.15392103791236877, "learning_rate": 0.001, "loss": 0.1706, "step": 6000 }, { "epoch": 22.0, "eval_accuracy": 0.24532224532224534, "eval_f1_macro": 0.6073576225776433, "eval_f1_micro": 0.7753641707130079, "eval_loss": 0.1508719027042389, "eval_roc_auc": 0.8494150259851333, "eval_runtime": 429.7216, "eval_samples_per_second": 6.716, "eval_steps_per_second": 0.212, "learning_rate": 0.001, "step": 6006 }, { "epoch": 23.0, "eval_accuracy": 0.2428967428967429, "eval_f1_macro": 0.6127152502703448, "eval_f1_micro": 0.771920553133395, "eval_loss": 0.15015815198421478, "eval_roc_auc": 0.8388299205154317, "eval_runtime": 426.6602, "eval_samples_per_second": 6.764, "eval_steps_per_second": 0.213, "learning_rate": 0.001, "step": 6279 }, { "epoch": 23.80952380952381, "grad_norm": 0.1737624853849411, "learning_rate": 0.001, "loss": 0.1699, "step": 6500 }, { "epoch": 24.0, "eval_accuracy": 0.24012474012474014, "eval_f1_macro": 0.5849380548549015, "eval_f1_micro": 0.7698941591532732, "eval_loss": 0.14965225756168365, "eval_roc_auc": 0.8406060899537385, "eval_runtime": 430.4521, "eval_samples_per_second": 6.705, "eval_steps_per_second": 0.211, "learning_rate": 0.001, "step": 6552 }, { "epoch": 25.0, "eval_accuracy": 0.24255024255024255, "eval_f1_macro": 0.6035289549510865, "eval_f1_micro": 0.7761348897535668, "eval_loss": 0.14702074229717255, "eval_roc_auc": 0.8458632504863829, "eval_runtime": 428.0693, "eval_samples_per_second": 6.742, "eval_steps_per_second": 0.213, "learning_rate": 0.001, "step": 6825 }, { "epoch": 25.641025641025642, "grad_norm": 0.1737377792596817, "learning_rate": 0.001, "loss": 0.1694, "step": 7000 }, { "epoch": 26.0, "eval_accuracy": 0.24220374220374222, "eval_f1_macro": 0.6064603919289959, "eval_f1_micro": 0.7751430907604253, "eval_loss": 0.14808295667171478, "eval_roc_auc": 0.8465518457868458, "eval_runtime": 438.4341, "eval_samples_per_second": 6.583, "eval_steps_per_second": 0.208, "learning_rate": 0.001, "step": 7098 }, { "epoch": 27.0, "eval_accuracy": 0.24740124740124741, "eval_f1_macro": 0.6135774018658996, "eval_f1_micro": 0.7689308343302761, "eval_loss": 0.14581289887428284, "eval_roc_auc": 0.8357120666953542, "eval_runtime": 426.6923, "eval_samples_per_second": 6.764, "eval_steps_per_second": 0.213, "learning_rate": 0.001, "step": 7371 }, { "epoch": 27.47252747252747, "grad_norm": 0.16500511765480042, "learning_rate": 0.001, "loss": 0.17, "step": 7500 }, { "epoch": 28.0, "eval_accuracy": 0.24462924462924462, "eval_f1_macro": 0.6077297645661711, "eval_f1_micro": 0.7751325049960902, "eval_loss": 0.1453842669725418, "eval_roc_auc": 0.8440532649625113, "eval_runtime": 431.4145, "eval_samples_per_second": 6.69, "eval_steps_per_second": 0.211, "learning_rate": 0.001, "step": 7644 }, { "epoch": 29.0, "eval_accuracy": 0.24566874566874566, "eval_f1_macro": 0.6107922701154117, "eval_f1_micro": 0.7735191637630662, "eval_loss": 0.14941243827342987, "eval_roc_auc": 0.849050708300112, "eval_runtime": 434.9588, "eval_samples_per_second": 6.635, "eval_steps_per_second": 0.209, "learning_rate": 0.001, "step": 7917 }, { "epoch": 29.304029304029303, "grad_norm": 0.1599486619234085, "learning_rate": 0.001, "loss": 0.1685, "step": 8000 }, { "epoch": 30.0, "eval_accuracy": 0.24982674982674982, "eval_f1_macro": 0.5982833860845571, "eval_f1_micro": 0.7705324709843182, "eval_loss": 0.14549985527992249, "eval_roc_auc": 0.8366026732011344, "eval_runtime": 434.3329, "eval_samples_per_second": 6.645, "eval_steps_per_second": 0.21, "learning_rate": 0.001, "step": 8190 }, { "epoch": 31.0, "eval_accuracy": 0.2532917532917533, "eval_f1_macro": 0.6068619458731248, "eval_f1_micro": 0.7784728768532008, "eval_loss": 0.14541107416152954, "eval_roc_auc": 0.8494949988142239, "eval_runtime": 435.6219, "eval_samples_per_second": 6.625, "eval_steps_per_second": 0.209, "learning_rate": 0.001, "step": 8463 }, { "epoch": 31.135531135531135, "grad_norm": 0.1950293928384781, "learning_rate": 0.001, "loss": 0.1687, "step": 8500 }, { "epoch": 32.0, "eval_accuracy": 0.24532224532224534, "eval_f1_macro": 0.6145316287096297, "eval_f1_micro": 0.7746102833519939, "eval_loss": 0.14657220244407654, "eval_roc_auc": 0.8460955499587395, "eval_runtime": 434.8949, "eval_samples_per_second": 6.636, "eval_steps_per_second": 0.209, "learning_rate": 0.001, "step": 8736 }, { "epoch": 32.967032967032964, "grad_norm": 0.18405263125896454, "learning_rate": 0.001, "loss": 0.1679, "step": 9000 }, { "epoch": 33.0, "eval_accuracy": 0.253984753984754, "eval_f1_macro": 0.6124691593400795, "eval_f1_micro": 0.777031154551008, "eval_loss": 0.14459234476089478, "eval_roc_auc": 0.843919167617255, "eval_runtime": 440.1591, "eval_samples_per_second": 6.557, "eval_steps_per_second": 0.207, "learning_rate": 0.001, "step": 9009 }, { "epoch": 34.0, "eval_accuracy": 0.24462924462924462, "eval_f1_macro": 0.6168054796129936, "eval_f1_micro": 0.7781283769180896, "eval_loss": 0.1468168944120407, "eval_roc_auc": 0.8469846407097918, "eval_runtime": 438.6105, "eval_samples_per_second": 6.58, "eval_steps_per_second": 0.207, "learning_rate": 0.001, "step": 9282 }, { "epoch": 34.798534798534796, "grad_norm": 0.17146140336990356, "learning_rate": 0.001, "loss": 0.168, "step": 9500 }, { "epoch": 35.0, "eval_accuracy": 0.2494802494802495, "eval_f1_macro": 0.6193343400891848, "eval_f1_micro": 0.7766880749869814, "eval_loss": 0.14858707785606384, "eval_roc_auc": 0.8451765062846143, "eval_runtime": 434.5802, "eval_samples_per_second": 6.641, "eval_steps_per_second": 0.209, "learning_rate": 0.001, "step": 9555 }, { "epoch": 36.0, "eval_accuracy": 0.24878724878724878, "eval_f1_macro": 0.6092667253949349, "eval_f1_micro": 0.7718835224773468, "eval_loss": 0.14637114107608795, "eval_roc_auc": 0.8391158347811251, "eval_runtime": 439.3197, "eval_samples_per_second": 6.569, "eval_steps_per_second": 0.207, "learning_rate": 0.001, "step": 9828 }, { "epoch": 36.63003663003663, "grad_norm": 0.16876503825187683, "learning_rate": 0.001, "loss": 0.169, "step": 10000 }, { "epoch": 37.0, "eval_accuracy": 0.24982674982674982, "eval_f1_macro": 0.6127183895875491, "eval_f1_micro": 0.7733602776435442, "eval_loss": 0.1448281705379486, "eval_roc_auc": 0.8402195590843876, "eval_runtime": 437.3035, "eval_samples_per_second": 6.6, "eval_steps_per_second": 0.208, "learning_rate": 0.001, "step": 10101 }, { "epoch": 38.0, "eval_accuracy": 0.25225225225225223, "eval_f1_macro": 0.6109962510638844, "eval_f1_micro": 0.7814896880859042, "eval_loss": 0.1450735628604889, "eval_roc_auc": 0.8526187412743501, "eval_runtime": 437.7229, "eval_samples_per_second": 6.593, "eval_steps_per_second": 0.208, "learning_rate": 0.001, "step": 10374 }, { "epoch": 38.46153846153846, "grad_norm": 0.19475676119327545, "learning_rate": 0.001, "loss": 0.167, "step": 10500 }, { "epoch": 39.0, "eval_accuracy": 0.24982674982674982, "eval_f1_macro": 0.6272196317832909, "eval_f1_micro": 0.7824146207942057, "eval_loss": 0.14469724893569946, "eval_roc_auc": 0.8563424677452759, "eval_runtime": 435.4486, "eval_samples_per_second": 6.628, "eval_steps_per_second": 0.209, "learning_rate": 0.001, "step": 10647 }, { "epoch": 40.0, "eval_accuracy": 0.25363825363825365, "eval_f1_macro": 0.6265963634718456, "eval_f1_micro": 0.7836651178652115, "eval_loss": 0.14824891090393066, "eval_roc_auc": 0.853692740688437, "eval_runtime": 435.8824, "eval_samples_per_second": 6.621, "eval_steps_per_second": 0.209, "learning_rate": 0.0001, "step": 10920 }, { "epoch": 40.29304029304029, "grad_norm": 0.15533967316150665, "learning_rate": 0.0001, "loss": 0.1652, "step": 11000 }, { "epoch": 41.0, "eval_accuracy": 0.2616077616077616, "eval_f1_macro": 0.6323784470247855, "eval_f1_micro": 0.7833456473553827, "eval_loss": 0.14141727983951569, "eval_roc_auc": 0.8483120796798727, "eval_runtime": 435.7344, "eval_samples_per_second": 6.623, "eval_steps_per_second": 0.209, "learning_rate": 0.0001, "step": 11193 }, { "epoch": 42.0, "eval_accuracy": 0.26195426195426197, "eval_f1_macro": 0.6371841233046203, "eval_f1_micro": 0.7884351407000686, "eval_loss": 0.13979895412921906, "eval_roc_auc": 0.8545567611245666, "eval_runtime": 438.4508, "eval_samples_per_second": 6.582, "eval_steps_per_second": 0.208, "learning_rate": 0.0001, "step": 11466 }, { "epoch": 42.124542124542124, "grad_norm": 0.1733330935239792, "learning_rate": 0.0001, "loss": 0.1608, "step": 11500 }, { "epoch": 43.0, "eval_accuracy": 0.26403326403326405, "eval_f1_macro": 0.6366820358518588, "eval_f1_micro": 0.7871061893724783, "eval_loss": 0.14107641577720642, "eval_roc_auc": 0.853678548931782, "eval_runtime": 434.1211, "eval_samples_per_second": 6.648, "eval_steps_per_second": 0.21, "learning_rate": 0.0001, "step": 11739 }, { "epoch": 43.956043956043956, "grad_norm": 0.19694675505161285, "learning_rate": 0.0001, "loss": 0.1596, "step": 12000 }, { "epoch": 44.0, "eval_accuracy": 0.26126126126126126, "eval_f1_macro": 0.6256922069455233, "eval_f1_micro": 0.787878787878788, "eval_loss": 0.13898694515228271, "eval_roc_auc": 0.8537086091649239, "eval_runtime": 434.0073, "eval_samples_per_second": 6.65, "eval_steps_per_second": 0.21, "learning_rate": 0.0001, "step": 12012 }, { "epoch": 45.0, "eval_accuracy": 0.2664587664587665, "eval_f1_macro": 0.6421056073559387, "eval_f1_micro": 0.7894011202068074, "eval_loss": 0.13859130442142487, "eval_roc_auc": 0.8538817942028954, "eval_runtime": 432.4865, "eval_samples_per_second": 6.673, "eval_steps_per_second": 0.21, "learning_rate": 0.0001, "step": 12285 }, { "epoch": 45.78754578754579, "grad_norm": 0.18810147047042847, "learning_rate": 0.0001, "loss": 0.1582, "step": 12500 }, { "epoch": 46.0, "eval_accuracy": 0.2664587664587665, "eval_f1_macro": 0.6283048537279357, "eval_f1_micro": 0.7873893327575039, "eval_loss": 0.139601469039917, "eval_roc_auc": 0.8521625527563127, "eval_runtime": 421.9429, "eval_samples_per_second": 6.84, "eval_steps_per_second": 0.216, "learning_rate": 0.0001, "step": 12558 }, { "epoch": 47.0, "eval_accuracy": 0.2636867636867637, "eval_f1_macro": 0.6286555138094179, "eval_f1_micro": 0.7863567238757333, "eval_loss": 0.13869330286979675, "eval_roc_auc": 0.8499808451526433, "eval_runtime": 424.0306, "eval_samples_per_second": 6.806, "eval_steps_per_second": 0.215, "learning_rate": 0.0001, "step": 12831 }, { "epoch": 47.61904761904762, "grad_norm": 0.15351006388664246, "learning_rate": 0.0001, "loss": 0.1584, "step": 13000 }, { "epoch": 48.0, "eval_accuracy": 0.26784476784476785, "eval_f1_macro": 0.6334934953582803, "eval_f1_micro": 0.7913177234660741, "eval_loss": 0.13777127861976624, "eval_roc_auc": 0.8571892112602602, "eval_runtime": 419.9652, "eval_samples_per_second": 6.872, "eval_steps_per_second": 0.217, "learning_rate": 0.0001, "step": 13104 }, { "epoch": 49.0, "eval_accuracy": 0.26403326403326405, "eval_f1_macro": 0.6381777921693204, "eval_f1_micro": 0.7933989479042932, "eval_loss": 0.1377096027135849, "eval_roc_auc": 0.8602965218660363, "eval_runtime": 431.2306, "eval_samples_per_second": 6.692, "eval_steps_per_second": 0.211, "learning_rate": 0.0001, "step": 13377 }, { "epoch": 49.45054945054945, "grad_norm": 0.1798904836177826, "learning_rate": 0.0001, "loss": 0.157, "step": 13500 }, { "epoch": 50.0, "eval_accuracy": 0.2674982674982675, "eval_f1_macro": 0.6362718007605523, "eval_f1_micro": 0.7918342891380639, "eval_loss": 0.13755330443382263, "eval_roc_auc": 0.8570210161405075, "eval_runtime": 429.5809, "eval_samples_per_second": 6.718, "eval_steps_per_second": 0.212, "learning_rate": 0.0001, "step": 13650 }, { "epoch": 51.0, "eval_accuracy": 0.2661122661122661, "eval_f1_macro": 0.6426825970872383, "eval_f1_micro": 0.7928808087673094, "eval_loss": 0.13754987716674805, "eval_roc_auc": 0.8596608706709776, "eval_runtime": 429.3766, "eval_samples_per_second": 6.721, "eval_steps_per_second": 0.212, "learning_rate": 0.0001, "step": 13923 }, { "epoch": 51.282051282051285, "grad_norm": 0.20376506447792053, "learning_rate": 0.0001, "loss": 0.1567, "step": 14000 }, { "epoch": 52.0, "eval_accuracy": 0.26576576576576577, "eval_f1_macro": 0.6367912909960436, "eval_f1_micro": 0.7871186146434616, "eval_loss": 0.13771678507328033, "eval_roc_auc": 0.8506886757830149, "eval_runtime": 424.3804, "eval_samples_per_second": 6.801, "eval_steps_per_second": 0.214, "learning_rate": 0.0001, "step": 14196 }, { "epoch": 53.0, "eval_accuracy": 0.2692307692307692, "eval_f1_macro": 0.640555047060403, "eval_f1_micro": 0.7928592630284527, "eval_loss": 0.13740690052509308, "eval_roc_auc": 0.8601326459765699, "eval_runtime": 434.4832, "eval_samples_per_second": 6.642, "eval_steps_per_second": 0.209, "learning_rate": 0.0001, "step": 14469 }, { "epoch": 53.11355311355312, "grad_norm": 0.16348811984062195, "learning_rate": 0.0001, "loss": 0.1571, "step": 14500 }, { "epoch": 54.0, "eval_accuracy": 0.27165627165627165, "eval_f1_macro": 0.6412320555565514, "eval_f1_micro": 0.7920979171140219, "eval_loss": 0.1368684023618698, "eval_roc_auc": 0.8562094300869534, "eval_runtime": 425.2932, "eval_samples_per_second": 6.786, "eval_steps_per_second": 0.214, "learning_rate": 0.0001, "step": 14742 }, { "epoch": 54.94505494505494, "grad_norm": 0.20431332290172577, "learning_rate": 0.0001, "loss": 0.1548, "step": 15000 }, { "epoch": 55.0, "eval_accuracy": 0.2702702702702703, "eval_f1_macro": 0.6377616721633446, "eval_f1_micro": 0.7914089347079037, "eval_loss": 0.13703426718711853, "eval_roc_auc": 0.8557803910164303, "eval_runtime": 424.9893, "eval_samples_per_second": 6.791, "eval_steps_per_second": 0.214, "learning_rate": 0.0001, "step": 15015 }, { "epoch": 56.0, "eval_accuracy": 0.2643797643797644, "eval_f1_macro": 0.6425003998141597, "eval_f1_micro": 0.7931107623128156, "eval_loss": 0.1364637017250061, "eval_roc_auc": 0.8601515459625123, "eval_runtime": 423.7139, "eval_samples_per_second": 6.811, "eval_steps_per_second": 0.215, "learning_rate": 0.0001, "step": 15288 }, { "epoch": 56.776556776556774, "grad_norm": 0.19714656472206116, "learning_rate": 0.0001, "loss": 0.155, "step": 15500 }, { "epoch": 57.0, "eval_accuracy": 0.2674982674982675, "eval_f1_macro": 0.6381793578718891, "eval_f1_micro": 0.7926408585665006, "eval_loss": 0.13675515353679657, "eval_roc_auc": 0.8588114846455387, "eval_runtime": 426.4919, "eval_samples_per_second": 6.767, "eval_steps_per_second": 0.213, "learning_rate": 0.0001, "step": 15561 }, { "epoch": 58.0, "eval_accuracy": 0.2674982674982675, "eval_f1_macro": 0.637380953089336, "eval_f1_micro": 0.791562634524322, "eval_loss": 0.1364695280790329, "eval_roc_auc": 0.855274853280308, "eval_runtime": 425.8426, "eval_samples_per_second": 6.777, "eval_steps_per_second": 0.214, "learning_rate": 0.0001, "step": 15834 }, { "epoch": 58.608058608058606, "grad_norm": 0.19042669236660004, "learning_rate": 0.0001, "loss": 0.155, "step": 16000 }, { "epoch": 59.0, "eval_accuracy": 0.2674982674982675, "eval_f1_macro": 0.6428884521567982, "eval_f1_micro": 0.7922245108135942, "eval_loss": 0.13641765713691711, "eval_roc_auc": 0.8565012329926954, "eval_runtime": 423.8693, "eval_samples_per_second": 6.809, "eval_steps_per_second": 0.215, "learning_rate": 0.0001, "step": 16107 }, { "epoch": 60.0, "eval_accuracy": 0.26507276507276506, "eval_f1_macro": 0.6357999016219877, "eval_f1_micro": 0.7882888744307093, "eval_loss": 0.13687649369239807, "eval_roc_auc": 0.8514745744887481, "eval_runtime": 423.4928, "eval_samples_per_second": 6.815, "eval_steps_per_second": 0.215, "learning_rate": 0.0001, "step": 16380 }, { "epoch": 60.43956043956044, "grad_norm": 0.18568764626979828, "learning_rate": 0.0001, "loss": 0.1546, "step": 16500 }, { "epoch": 61.0, "eval_accuracy": 0.2713097713097713, "eval_f1_macro": 0.6503848519713329, "eval_f1_micro": 0.7945638702508654, "eval_loss": 0.13638463616371155, "eval_roc_auc": 0.8588833823919201, "eval_runtime": 425.9119, "eval_samples_per_second": 6.776, "eval_steps_per_second": 0.214, "learning_rate": 0.0001, "step": 16653 }, { "epoch": 62.0, "eval_accuracy": 0.2751212751212751, "eval_f1_macro": 0.6441767594174573, "eval_f1_micro": 0.7931640039405492, "eval_loss": 0.13563227653503418, "eval_roc_auc": 0.8575138778747027, "eval_runtime": 422.0661, "eval_samples_per_second": 6.838, "eval_steps_per_second": 0.216, "learning_rate": 0.0001, "step": 16926 }, { "epoch": 62.27106227106227, "grad_norm": 0.19402863085269928, "learning_rate": 0.0001, "loss": 0.1536, "step": 17000 }, { "epoch": 63.0, "eval_accuracy": 0.27373527373527373, "eval_f1_macro": 0.6515952055035917, "eval_f1_micro": 0.7966116124638174, "eval_loss": 0.1355270892381668, "eval_roc_auc": 0.8610939161629354, "eval_runtime": 426.9279, "eval_samples_per_second": 6.76, "eval_steps_per_second": 0.213, "learning_rate": 0.0001, "step": 17199 }, { "epoch": 64.0, "eval_accuracy": 0.26784476784476785, "eval_f1_macro": 0.6450040026439422, "eval_f1_micro": 0.7934075342465754, "eval_loss": 0.13592010736465454, "eval_roc_auc": 0.8577985580745997, "eval_runtime": 426.0816, "eval_samples_per_second": 6.773, "eval_steps_per_second": 0.214, "learning_rate": 0.0001, "step": 17472 }, { "epoch": 64.1025641025641, "grad_norm": 0.22000150382518768, "learning_rate": 0.0001, "loss": 0.1544, "step": 17500 }, { "epoch": 65.0, "eval_accuracy": 0.27061677061677064, "eval_f1_macro": 0.64551501310817, "eval_f1_micro": 0.7936467053015668, "eval_loss": 0.13569533824920654, "eval_roc_auc": 0.857159821715051, "eval_runtime": 424.6551, "eval_samples_per_second": 6.796, "eval_steps_per_second": 0.214, "learning_rate": 0.0001, "step": 17745 }, { "epoch": 65.93406593406593, "grad_norm": 0.19799016416072845, "learning_rate": 0.0001, "loss": 0.1529, "step": 18000 }, { "epoch": 66.0, "eval_accuracy": 0.2713097713097713, "eval_f1_macro": 0.6477176853690674, "eval_f1_micro": 0.794643237940888, "eval_loss": 0.13565082848072052, "eval_roc_auc": 0.8594942449609874, "eval_runtime": 425.0795, "eval_samples_per_second": 6.789, "eval_steps_per_second": 0.214, "learning_rate": 0.0001, "step": 18018 }, { "epoch": 67.0, "eval_accuracy": 0.27546777546777546, "eval_f1_macro": 0.6544361257862924, "eval_f1_micro": 0.7965922095536813, "eval_loss": 0.13533934950828552, "eval_roc_auc": 0.8622831129363361, "eval_runtime": 424.6762, "eval_samples_per_second": 6.796, "eval_steps_per_second": 0.214, "learning_rate": 0.0001, "step": 18291 }, { "epoch": 67.76556776556777, "grad_norm": 0.2619948983192444, "learning_rate": 0.0001, "loss": 0.1528, "step": 18500 }, { "epoch": 68.0, "eval_accuracy": 0.2733887733887734, "eval_f1_macro": 0.6519486064773884, "eval_f1_micro": 0.7955772910907932, "eval_loss": 0.1353396475315094, "eval_roc_auc": 0.8608058154545816, "eval_runtime": 421.8067, "eval_samples_per_second": 6.842, "eval_steps_per_second": 0.216, "learning_rate": 0.0001, "step": 18564 }, { "epoch": 69.0, "eval_accuracy": 0.26992376992376993, "eval_f1_macro": 0.6515714856354324, "eval_f1_micro": 0.7966188524590164, "eval_loss": 0.13474246859550476, "eval_roc_auc": 0.8602900698481241, "eval_runtime": 423.2901, "eval_samples_per_second": 6.818, "eval_steps_per_second": 0.215, "learning_rate": 0.0001, "step": 18837 }, { "epoch": 69.59706959706959, "grad_norm": 0.18048201501369476, "learning_rate": 0.0001, "loss": 0.1528, "step": 19000 }, { "epoch": 70.0, "eval_accuracy": 0.272002772002772, "eval_f1_macro": 0.6441608871918139, "eval_f1_micro": 0.7944687795241776, "eval_loss": 0.13504748046398163, "eval_roc_auc": 0.8574953132327267, "eval_runtime": 423.3844, "eval_samples_per_second": 6.817, "eval_steps_per_second": 0.215, "learning_rate": 0.0001, "step": 19110 }, { "epoch": 71.0, "eval_accuracy": 0.27234927234927236, "eval_f1_macro": 0.6441889860402124, "eval_f1_micro": 0.7933057280883367, "eval_loss": 0.13502468168735504, "eval_roc_auc": 0.8556664277229126, "eval_runtime": 422.6912, "eval_samples_per_second": 6.828, "eval_steps_per_second": 0.215, "learning_rate": 0.0001, "step": 19383 }, { "epoch": 71.42857142857143, "grad_norm": 0.24162879586219788, "learning_rate": 0.0001, "loss": 0.1522, "step": 19500 }, { "epoch": 72.0, "eval_accuracy": 0.2758142758142758, "eval_f1_macro": 0.6484748365424647, "eval_f1_micro": 0.7969950486597234, "eval_loss": 0.1344645917415619, "eval_roc_auc": 0.8605409876174911, "eval_runtime": 426.5755, "eval_samples_per_second": 6.766, "eval_steps_per_second": 0.213, "learning_rate": 0.0001, "step": 19656 }, { "epoch": 73.0, "eval_accuracy": 0.27616077616077617, "eval_f1_macro": 0.6518769914193778, "eval_f1_micro": 0.7977006599957419, "eval_loss": 0.1341526359319687, "eval_roc_auc": 0.8616010233088203, "eval_runtime": 420.7226, "eval_samples_per_second": 6.86, "eval_steps_per_second": 0.216, "learning_rate": 0.0001, "step": 19929 }, { "epoch": 73.26007326007326, "grad_norm": 0.22451983392238617, "learning_rate": 0.0001, "loss": 0.1523, "step": 20000 }, { "epoch": 74.0, "eval_accuracy": 0.2751212751212751, "eval_f1_macro": 0.641334935505441, "eval_f1_micro": 0.7914797229603171, "eval_loss": 0.13499116897583008, "eval_roc_auc": 0.8520198169504839, "eval_runtime": 428.7922, "eval_samples_per_second": 6.731, "eval_steps_per_second": 0.212, "learning_rate": 0.0001, "step": 20202 }, { "epoch": 75.0, "eval_accuracy": 0.2751212751212751, "eval_f1_macro": 0.6485229770180625, "eval_f1_micro": 0.7946678133734681, "eval_loss": 0.13461369276046753, "eval_roc_auc": 0.8572354216588205, "eval_runtime": 427.8784, "eval_samples_per_second": 6.745, "eval_steps_per_second": 0.213, "learning_rate": 0.0001, "step": 20475 }, { "epoch": 75.0915750915751, "grad_norm": 0.22029711306095123, "learning_rate": 0.0001, "loss": 0.1521, "step": 20500 }, { "epoch": 76.0, "eval_accuracy": 0.2758142758142758, "eval_f1_macro": 0.6478195810395848, "eval_f1_micro": 0.7964594201659113, "eval_loss": 0.13438266515731812, "eval_roc_auc": 0.8597526207801657, "eval_runtime": 424.3142, "eval_samples_per_second": 6.802, "eval_steps_per_second": 0.214, "learning_rate": 0.0001, "step": 20748 }, { "epoch": 76.92307692307692, "grad_norm": 0.2415299415588379, "learning_rate": 0.0001, "loss": 0.1515, "step": 21000 }, { "epoch": 77.0, "eval_accuracy": 0.27754677754677753, "eval_f1_macro": 0.6536737916153181, "eval_f1_micro": 0.7977742853502102, "eval_loss": 0.13460540771484375, "eval_roc_auc": 0.8623314561225224, "eval_runtime": 422.8083, "eval_samples_per_second": 6.826, "eval_steps_per_second": 0.215, "learning_rate": 0.0001, "step": 21021 }, { "epoch": 78.0, "eval_accuracy": 0.27754677754677753, "eval_f1_macro": 0.6543115985953537, "eval_f1_micro": 0.7978169818504888, "eval_loss": 0.13411369919776917, "eval_roc_auc": 0.8634738791194995, "eval_runtime": 428.5067, "eval_samples_per_second": 6.735, "eval_steps_per_second": 0.212, "learning_rate": 0.0001, "step": 21294 }, { "epoch": 78.75457875457876, "grad_norm": 0.2636328637599945, "learning_rate": 0.0001, "loss": 0.1514, "step": 21500 }, { "epoch": 79.0, "eval_accuracy": 0.2740817740817741, "eval_f1_macro": 0.6523004018612216, "eval_f1_micro": 0.7953020134228188, "eval_loss": 0.13399606943130493, "eval_roc_auc": 0.8574454542918126, "eval_runtime": 436.7976, "eval_samples_per_second": 6.607, "eval_steps_per_second": 0.208, "learning_rate": 0.0001, "step": 21567 }, { "epoch": 80.0, "eval_accuracy": 0.27823977823977825, "eval_f1_macro": 0.6545582038870168, "eval_f1_micro": 0.7993085420355848, "eval_loss": 0.1344238668680191, "eval_roc_auc": 0.8652547567870936, "eval_runtime": 431.9941, "eval_samples_per_second": 6.681, "eval_steps_per_second": 0.211, "learning_rate": 0.0001, "step": 21840 }, { "epoch": 80.58608058608058, "grad_norm": 0.23601791262626648, "learning_rate": 0.0001, "loss": 0.1516, "step": 22000 }, { "epoch": 81.0, "eval_accuracy": 0.2758142758142758, "eval_f1_macro": 0.6559691700651434, "eval_f1_micro": 0.7966715529878418, "eval_loss": 0.13405664265155792, "eval_roc_auc": 0.8575861109650502, "eval_runtime": 436.6356, "eval_samples_per_second": 6.61, "eval_steps_per_second": 0.208, "learning_rate": 0.0001, "step": 22113 }, { "epoch": 82.0, "eval_accuracy": 0.2765072765072765, "eval_f1_macro": 0.6453669674995801, "eval_f1_micro": 0.7947541551246537, "eval_loss": 0.13407430052757263, "eval_roc_auc": 0.8554945304057716, "eval_runtime": 436.5794, "eval_samples_per_second": 6.61, "eval_steps_per_second": 0.208, "learning_rate": 0.0001, "step": 22386 }, { "epoch": 82.41758241758242, "grad_norm": 0.19588124752044678, "learning_rate": 0.0001, "loss": 0.149, "step": 22500 }, { "epoch": 83.0, "eval_accuracy": 0.2702702702702703, "eval_f1_macro": 0.645966570658811, "eval_f1_micro": 0.7924365020985678, "eval_loss": 0.1350804716348648, "eval_roc_auc": 0.8543412288505268, "eval_runtime": 433.6987, "eval_samples_per_second": 6.654, "eval_steps_per_second": 0.21, "learning_rate": 0.0001, "step": 22659 }, { "epoch": 84.0, "eval_accuracy": 0.27546777546777546, "eval_f1_macro": 0.6512285101875886, "eval_f1_micro": 0.7957293542577825, "eval_loss": 0.13387472927570343, "eval_roc_auc": 0.8585996545688873, "eval_runtime": 432.4386, "eval_samples_per_second": 6.674, "eval_steps_per_second": 0.21, "learning_rate": 0.0001, "step": 22932 }, { "epoch": 84.24908424908425, "grad_norm": 0.2560372054576874, "learning_rate": 0.0001, "loss": 0.1515, "step": 23000 }, { "epoch": 85.0, "eval_accuracy": 0.27927927927927926, "eval_f1_macro": 0.6531817491521362, "eval_f1_micro": 0.7990622335890879, "eval_loss": 0.13341927528381348, "eval_roc_auc": 0.8620406055936447, "eval_runtime": 432.3488, "eval_samples_per_second": 6.675, "eval_steps_per_second": 0.21, "learning_rate": 0.0001, "step": 23205 }, { "epoch": 86.0, "eval_accuracy": 0.2747747747747748, "eval_f1_macro": 0.6595866427349153, "eval_f1_micro": 0.7988261313371896, "eval_loss": 0.13337253034114838, "eval_roc_auc": 0.8625331319838734, "eval_runtime": 435.2436, "eval_samples_per_second": 6.631, "eval_steps_per_second": 0.209, "learning_rate": 0.0001, "step": 23478 }, { "epoch": 86.08058608058609, "grad_norm": 0.28640052676200867, "learning_rate": 0.0001, "loss": 0.1495, "step": 23500 }, { "epoch": 87.0, "eval_accuracy": 0.27442827442827444, "eval_f1_macro": 0.6467323251879672, "eval_f1_micro": 0.7956179390619651, "eval_loss": 0.1339845359325409, "eval_roc_auc": 0.8590850582532711, "eval_runtime": 438.7375, "eval_samples_per_second": 6.578, "eval_steps_per_second": 0.207, "learning_rate": 0.0001, "step": 23751 }, { "epoch": 87.91208791208791, "grad_norm": 0.23546907305717468, "learning_rate": 0.0001, "loss": 0.1496, "step": 24000 }, { "epoch": 88.0, "eval_accuracy": 0.2747747747747748, "eval_f1_macro": 0.648318545746826, "eval_f1_micro": 0.7981612326551459, "eval_loss": 0.13357459008693695, "eval_roc_auc": 0.8619578829440303, "eval_runtime": 432.3449, "eval_samples_per_second": 6.675, "eval_steps_per_second": 0.21, "learning_rate": 0.0001, "step": 24024 }, { "epoch": 89.0, "eval_accuracy": 0.2806652806652807, "eval_f1_macro": 0.6585340844298272, "eval_f1_micro": 0.8014968675104065, "eval_loss": 0.13366733491420746, "eval_roc_auc": 0.8672320387088881, "eval_runtime": 431.6296, "eval_samples_per_second": 6.686, "eval_steps_per_second": 0.211, "learning_rate": 0.0001, "step": 24297 }, { "epoch": 89.74358974358974, "grad_norm": 0.24246211349964142, "learning_rate": 0.0001, "loss": 0.1493, "step": 24500 }, { "epoch": 90.0, "eval_accuracy": 0.2772002772002772, "eval_f1_macro": 0.66211749340029, "eval_f1_micro": 0.8010798042854732, "eval_loss": 0.1332736760377884, "eval_roc_auc": 0.8661044781564988, "eval_runtime": 425.5723, "eval_samples_per_second": 6.781, "eval_steps_per_second": 0.214, "learning_rate": 0.0001, "step": 24570 }, { "epoch": 91.0, "eval_accuracy": 0.27823977823977825, "eval_f1_macro": 0.6528573832362276, "eval_f1_micro": 0.7956933454403943, "eval_loss": 0.13367226719856262, "eval_roc_auc": 0.8562680347985093, "eval_runtime": 443.8961, "eval_samples_per_second": 6.502, "eval_steps_per_second": 0.205, "learning_rate": 0.0001, "step": 24843 }, { "epoch": 91.57509157509158, "grad_norm": 0.22026851773262024, "learning_rate": 0.0001, "loss": 0.1496, "step": 25000 }, { "epoch": 92.0, "eval_accuracy": 0.27546777546777546, "eval_f1_macro": 0.6513649424471982, "eval_f1_micro": 0.796086375587259, "eval_loss": 0.13348612189292908, "eval_roc_auc": 0.8573559442803198, "eval_runtime": 443.9031, "eval_samples_per_second": 6.501, "eval_steps_per_second": 0.205, "learning_rate": 0.0001, "step": 25116 }, { "epoch": 93.0, "eval_accuracy": 0.2758142758142758, "eval_f1_macro": 0.6559763883082907, "eval_f1_micro": 0.8001861094662043, "eval_loss": 0.1330718696117401, "eval_roc_auc": 0.8648260530605368, "eval_runtime": 436.5725, "eval_samples_per_second": 6.611, "eval_steps_per_second": 0.208, "learning_rate": 0.0001, "step": 25389 }, { "epoch": 93.4065934065934, "grad_norm": 0.28630152344703674, "learning_rate": 0.0001, "loss": 0.1493, "step": 25500 }, { "epoch": 94.0, "eval_accuracy": 0.2758142758142758, "eval_f1_macro": 0.6553585917255438, "eval_f1_micro": 0.7995090362720617, "eval_loss": 0.13329002261161804, "eval_roc_auc": 0.864277443745379, "eval_runtime": 442.8808, "eval_samples_per_second": 6.516, "eval_steps_per_second": 0.205, "learning_rate": 0.0001, "step": 25662 }, { "epoch": 95.0, "eval_accuracy": 0.2758142758142758, "eval_f1_macro": 0.6579543710907207, "eval_f1_micro": 0.7979651162790697, "eval_loss": 0.13314621150493622, "eval_roc_auc": 0.8606367216129991, "eval_runtime": 436.3942, "eval_samples_per_second": 6.613, "eval_steps_per_second": 0.209, "learning_rate": 0.0001, "step": 25935 }, { "epoch": 95.23809523809524, "grad_norm": 0.25194719433784485, "learning_rate": 0.0001, "loss": 0.1482, "step": 26000 }, { "epoch": 96.0, "eval_accuracy": 0.2751212751212751, "eval_f1_macro": 0.6556445954379041, "eval_f1_micro": 0.7992523999660183, "eval_loss": 0.13279949128627777, "eval_roc_auc": 0.8631226264354063, "eval_runtime": 426.8086, "eval_samples_per_second": 6.762, "eval_steps_per_second": 0.213, "learning_rate": 0.0001, "step": 26208 }, { "epoch": 97.0, "eval_accuracy": 0.27823977823977825, "eval_f1_macro": 0.6492741904723621, "eval_f1_micro": 0.7977296181630549, "eval_loss": 0.1332886964082718, "eval_roc_auc": 0.8588905587527994, "eval_runtime": 441.9848, "eval_samples_per_second": 6.53, "eval_steps_per_second": 0.206, "learning_rate": 0.0001, "step": 26481 }, { "epoch": 97.06959706959707, "grad_norm": 0.27280953526496887, "learning_rate": 0.0001, "loss": 0.1497, "step": 26500 }, { "epoch": 98.0, "eval_accuracy": 0.27546777546777546, "eval_f1_macro": 0.6600105762308898, "eval_f1_micro": 0.799611141637432, "eval_loss": 0.13266970217227936, "eval_roc_auc": 0.864715456620441, "eval_runtime": 439.781, "eval_samples_per_second": 6.562, "eval_steps_per_second": 0.207, "learning_rate": 0.0001, "step": 26754 }, { "epoch": 98.9010989010989, "grad_norm": 0.30599892139434814, "learning_rate": 0.0001, "loss": 0.1489, "step": 27000 }, { "epoch": 99.0, "eval_accuracy": 0.27165627165627165, "eval_f1_macro": 0.6589970862385839, "eval_f1_micro": 0.7978809757764771, "eval_loss": 0.13253149390220642, "eval_roc_auc": 0.8607699202364255, "eval_runtime": 438.5456, "eval_samples_per_second": 6.581, "eval_steps_per_second": 0.208, "learning_rate": 0.0001, "step": 27027 }, { "epoch": 100.0, "eval_accuracy": 0.27616077616077617, "eval_f1_macro": 0.6570195655430786, "eval_f1_micro": 0.797143840330351, "eval_loss": 0.1329408884048462, "eval_roc_auc": 0.8584810367011169, "eval_runtime": 434.9771, "eval_samples_per_second": 6.635, "eval_steps_per_second": 0.209, "learning_rate": 0.0001, "step": 27300 }, { "epoch": 100.73260073260073, "grad_norm": 0.2732805013656616, "learning_rate": 0.0001, "loss": 0.1482, "step": 27500 }, { "epoch": 101.0, "eval_accuracy": 0.28205128205128205, "eval_f1_macro": 0.657951499975745, "eval_f1_micro": 0.7991615690636095, "eval_loss": 0.13274870812892914, "eval_roc_auc": 0.861103560655407, "eval_runtime": 435.4493, "eval_samples_per_second": 6.628, "eval_steps_per_second": 0.209, "learning_rate": 0.0001, "step": 27573 }, { "epoch": 102.0, "eval_accuracy": 0.2817047817047817, "eval_f1_macro": 0.654306822863844, "eval_f1_micro": 0.7986821274228745, "eval_loss": 0.1326293796300888, "eval_roc_auc": 0.8607733407448822, "eval_runtime": 437.9645, "eval_samples_per_second": 6.59, "eval_steps_per_second": 0.208, "learning_rate": 0.0001, "step": 27846 }, { "epoch": 102.56410256410257, "grad_norm": 0.23533137142658234, "learning_rate": 0.0001, "loss": 0.1474, "step": 28000 }, { "epoch": 103.0, "eval_accuracy": 0.2803187803187803, "eval_f1_macro": 0.6518495856500403, "eval_f1_micro": 0.7993688968487486, "eval_loss": 0.13247379660606384, "eval_roc_auc": 0.8620991566501659, "eval_runtime": 426.0566, "eval_samples_per_second": 6.774, "eval_steps_per_second": 0.214, "learning_rate": 0.0001, "step": 28119 }, { "epoch": 104.0, "eval_accuracy": 0.27754677754677753, "eval_f1_macro": 0.6612536009112525, "eval_f1_micro": 0.8010850676047981, "eval_loss": 0.13315415382385254, "eval_roc_auc": 0.864729420343199, "eval_runtime": 425.2679, "eval_samples_per_second": 6.786, "eval_steps_per_second": 0.214, "learning_rate": 0.0001, "step": 28392 }, { "epoch": 104.3956043956044, "grad_norm": 0.2809629738330841, "learning_rate": 0.0001, "loss": 0.1472, "step": 28500 }, { "epoch": 105.0, "eval_accuracy": 0.2830907830907831, "eval_f1_macro": 0.6635718544409769, "eval_f1_micro": 0.8012698412698412, "eval_loss": 0.13218620419502258, "eval_roc_auc": 0.8652135899617869, "eval_runtime": 425.1586, "eval_samples_per_second": 6.788, "eval_steps_per_second": 0.214, "learning_rate": 0.0001, "step": 28665 }, { "epoch": 106.0, "eval_accuracy": 0.2830907830907831, "eval_f1_macro": 0.6588128942023547, "eval_f1_micro": 0.800988243312319, "eval_loss": 0.13239973783493042, "eval_roc_auc": 0.8632750603887415, "eval_runtime": 427.5404, "eval_samples_per_second": 6.75, "eval_steps_per_second": 0.213, "learning_rate": 0.0001, "step": 28938 }, { "epoch": 106.22710622710623, "grad_norm": 0.2568123936653137, "learning_rate": 0.0001, "loss": 0.148, "step": 29000 }, { "epoch": 107.0, "eval_accuracy": 0.2785862785862786, "eval_f1_macro": 0.650564106362156, "eval_f1_micro": 0.7985513421389007, "eval_loss": 0.13358280062675476, "eval_roc_auc": 0.8618832353771251, "eval_runtime": 425.2874, "eval_samples_per_second": 6.786, "eval_steps_per_second": 0.214, "learning_rate": 0.0001, "step": 29211 }, { "epoch": 108.0, "eval_accuracy": 0.2796257796257796, "eval_f1_macro": 0.6501303094783896, "eval_f1_micro": 0.7995554225623049, "eval_loss": 0.13270235061645508, "eval_roc_auc": 0.8615071940670409, "eval_runtime": 432.9179, "eval_samples_per_second": 6.666, "eval_steps_per_second": 0.21, "learning_rate": 0.0001, "step": 29484 }, { "epoch": 108.05860805860806, "grad_norm": 0.29480934143066406, "learning_rate": 0.0001, "loss": 0.1477, "step": 29500 }, { "epoch": 109.0, "eval_accuracy": 0.2806652806652807, "eval_f1_macro": 0.6579556871315007, "eval_f1_micro": 0.8000342553738118, "eval_loss": 0.1318453699350357, "eval_roc_auc": 0.8612993478767093, "eval_runtime": 434.6895, "eval_samples_per_second": 6.639, "eval_steps_per_second": 0.209, "learning_rate": 0.0001, "step": 29757 }, { "epoch": 109.89010989010988, "grad_norm": 0.3718918561935425, "learning_rate": 0.0001, "loss": 0.1479, "step": 30000 }, { "epoch": 110.0, "eval_accuracy": 0.2803187803187803, "eval_f1_macro": 0.6582487839550253, "eval_f1_micro": 0.7997274043785672, "eval_loss": 0.13255637884140015, "eval_roc_auc": 0.8626158546334878, "eval_runtime": 427.7015, "eval_samples_per_second": 6.748, "eval_steps_per_second": 0.213, "learning_rate": 0.0001, "step": 30030 }, { "epoch": 111.0, "eval_accuracy": 0.2785862785862786, "eval_f1_macro": 0.6608614747058748, "eval_f1_micro": 0.8012935069355799, "eval_loss": 0.1319260448217392, "eval_roc_auc": 0.8637521073014844, "eval_runtime": 422.4227, "eval_samples_per_second": 6.832, "eval_steps_per_second": 0.215, "learning_rate": 0.0001, "step": 30303 }, { "epoch": 111.72161172161172, "grad_norm": 0.3544025719165802, "learning_rate": 0.0001, "loss": 0.1466, "step": 30500 }, { "epoch": 112.0, "eval_accuracy": 0.28101178101178104, "eval_f1_macro": 0.6595016342799644, "eval_f1_micro": 0.8019278738426415, "eval_loss": 0.13223350048065186, "eval_roc_auc": 0.8659084092462648, "eval_runtime": 420.8235, "eval_samples_per_second": 6.858, "eval_steps_per_second": 0.216, "learning_rate": 0.0001, "step": 30576 }, { "epoch": 113.0, "eval_accuracy": 0.27997227997227997, "eval_f1_macro": 0.6592029124671744, "eval_f1_micro": 0.8024988392216453, "eval_loss": 0.13213913142681122, "eval_roc_auc": 0.8666766420318518, "eval_runtime": 423.8949, "eval_samples_per_second": 6.808, "eval_steps_per_second": 0.215, "learning_rate": 0.0001, "step": 30849 }, { "epoch": 113.55311355311355, "grad_norm": 0.35069116950035095, "learning_rate": 0.0001, "loss": 0.1474, "step": 31000 }, { "epoch": 114.0, "eval_accuracy": 0.2823977823977824, "eval_f1_macro": 0.663088095209859, "eval_f1_micro": 0.8025030654094965, "eval_loss": 0.13204564154148102, "eval_roc_auc": 0.8661983610533127, "eval_runtime": 421.2287, "eval_samples_per_second": 6.851, "eval_steps_per_second": 0.216, "learning_rate": 0.0001, "step": 31122 }, { "epoch": 115.0, "eval_accuracy": 0.28378378378378377, "eval_f1_macro": 0.659797224924612, "eval_f1_micro": 0.8004266211604096, "eval_loss": 0.1319342404603958, "eval_roc_auc": 0.8625399730007867, "eval_runtime": 424.6871, "eval_samples_per_second": 6.796, "eval_steps_per_second": 0.214, "learning_rate": 0.0001, "step": 31395 }, { "epoch": 115.38461538461539, "grad_norm": 0.29624369740486145, "learning_rate": 1e-05, "loss": 0.1468, "step": 31500 }, { "epoch": 116.0, "eval_accuracy": 0.2844767844767845, "eval_f1_macro": 0.6627361818946377, "eval_f1_micro": 0.8022295974810655, "eval_loss": 0.13186337053775787, "eval_roc_auc": 0.8642598314802673, "eval_runtime": 423.8673, "eval_samples_per_second": 6.809, "eval_steps_per_second": 0.215, "learning_rate": 1e-05, "step": 31668 }, { "epoch": 117.0, "eval_accuracy": 0.28205128205128205, "eval_f1_macro": 0.6604165936303265, "eval_f1_micro": 0.8012607547491268, "eval_loss": 0.1317850947380066, "eval_roc_auc": 0.8634466760169507, "eval_runtime": 419.012, "eval_samples_per_second": 6.888, "eval_steps_per_second": 0.217, "learning_rate": 1e-05, "step": 31941 }, { "epoch": 117.21611721611721, "grad_norm": 0.28633400797843933, "learning_rate": 1e-05, "loss": 0.1455, "step": 32000 }, { "epoch": 118.0, "eval_accuracy": 0.2796257796257796, "eval_f1_macro": 0.6590147410119703, "eval_f1_micro": 0.8002395926924228, "eval_loss": 0.13159342110157013, "eval_roc_auc": 0.8616373075259771, "eval_runtime": 419.8006, "eval_samples_per_second": 6.875, "eval_steps_per_second": 0.217, "learning_rate": 1e-05, "step": 32214 }, { "epoch": 119.0, "eval_accuracy": 0.28274428274428276, "eval_f1_macro": 0.6608406822787987, "eval_f1_micro": 0.8036745185622182, "eval_loss": 0.1319129317998886, "eval_roc_auc": 0.8678011174197509, "eval_runtime": 423.7674, "eval_samples_per_second": 6.81, "eval_steps_per_second": 0.215, "learning_rate": 1e-05, "step": 32487 }, { "epoch": 119.04761904761905, "grad_norm": 0.31120315194129944, "learning_rate": 1e-05, "loss": 0.1451, "step": 32500 }, { "epoch": 120.0, "eval_accuracy": 0.28135828135828134, "eval_f1_macro": 0.6614581971670047, "eval_f1_micro": 0.803593372600534, "eval_loss": 0.13164088129997253, "eval_roc_auc": 0.8661674020983411, "eval_runtime": 420.709, "eval_samples_per_second": 6.86, "eval_steps_per_second": 0.216, "learning_rate": 1e-05, "step": 32760 }, { "epoch": 120.87912087912088, "grad_norm": 0.31770700216293335, "learning_rate": 1e-05, "loss": 0.1454, "step": 33000 }, { "epoch": 121.0, "eval_accuracy": 0.28101178101178104, "eval_f1_macro": 0.6610641151618838, "eval_f1_micro": 0.8012604863092451, "eval_loss": 0.13184630870819092, "eval_roc_auc": 0.8635064611392681, "eval_runtime": 422.0264, "eval_samples_per_second": 6.838, "eval_steps_per_second": 0.216, "learning_rate": 1e-05, "step": 33033 }, { "epoch": 122.0, "eval_accuracy": 0.2817047817047817, "eval_f1_macro": 0.6647378818356079, "eval_f1_micro": 0.8049611099432415, "eval_loss": 0.13215216994285583, "eval_roc_auc": 0.8691576105910745, "eval_runtime": 436.9114, "eval_samples_per_second": 6.605, "eval_steps_per_second": 0.208, "learning_rate": 1e-05, "step": 33306 }, { "epoch": 122.71062271062272, "grad_norm": 0.22290275990962982, "learning_rate": 1e-05, "loss": 0.145, "step": 33500 }, { "epoch": 123.0, "eval_accuracy": 0.2817047817047817, "eval_f1_macro": 0.6604978306251739, "eval_f1_micro": 0.8010107932156931, "eval_loss": 0.13187836110591888, "eval_roc_auc": 0.8617537926061216, "eval_runtime": 431.3938, "eval_samples_per_second": 6.69, "eval_steps_per_second": 0.211, "learning_rate": 1e-05, "step": 33579 }, { "epoch": 124.0, "eval_accuracy": 0.2806652806652807, "eval_f1_macro": 0.6621515776947642, "eval_f1_micro": 0.8018739352640545, "eval_loss": 0.13141389191150665, "eval_roc_auc": 0.8638029186192627, "eval_runtime": 430.2675, "eval_samples_per_second": 6.707, "eval_steps_per_second": 0.211, "learning_rate": 1e-05, "step": 33852 }, { "epoch": 124.54212454212454, "grad_norm": 0.27631625533103943, "learning_rate": 1e-05, "loss": 0.1459, "step": 34000 }, { "epoch": 125.0, "eval_accuracy": 0.2862092862092862, "eval_f1_macro": 0.6640721616133445, "eval_f1_micro": 0.804345987993574, "eval_loss": 0.13139639794826508, "eval_roc_auc": 0.8672404491355638, "eval_runtime": 432.0509, "eval_samples_per_second": 6.68, "eval_steps_per_second": 0.211, "learning_rate": 1e-05, "step": 34125 }, { "epoch": 126.0, "eval_accuracy": 0.2862092862092862, "eval_f1_macro": 0.663003919720051, "eval_f1_micro": 0.804212663367593, "eval_loss": 0.13103623688220978, "eval_roc_auc": 0.8670350710768244, "eval_runtime": 432.4499, "eval_samples_per_second": 6.674, "eval_steps_per_second": 0.21, "learning_rate": 1e-05, "step": 34398 }, { "epoch": 126.37362637362638, "grad_norm": 0.3177105188369751, "learning_rate": 1e-05, "loss": 0.1439, "step": 34500 }, { "epoch": 127.0, "eval_accuracy": 0.28586278586278585, "eval_f1_macro": 0.6597731906072118, "eval_f1_micro": 0.8038346213944846, "eval_loss": 0.13152988255023956, "eval_roc_auc": 0.8672624342859965, "eval_runtime": 431.3827, "eval_samples_per_second": 6.69, "eval_steps_per_second": 0.211, "learning_rate": 1e-05, "step": 34671 }, { "epoch": 128.0, "eval_accuracy": 0.2869022869022869, "eval_f1_macro": 0.668197478893632, "eval_f1_micro": 0.8042412977357216, "eval_loss": 0.13113313913345337, "eval_roc_auc": 0.8674002874836755, "eval_runtime": 439.4627, "eval_samples_per_second": 6.567, "eval_steps_per_second": 0.207, "learning_rate": 1e-05, "step": 34944 }, { "epoch": 128.2051282051282, "grad_norm": 0.2520149350166321, "learning_rate": 1e-05, "loss": 0.1446, "step": 35000 }, { "epoch": 129.0, "eval_accuracy": 0.28274428274428276, "eval_f1_macro": 0.6652814888251478, "eval_f1_micro": 0.8034694309287074, "eval_loss": 0.13096605241298676, "eval_roc_auc": 0.8665332355380903, "eval_runtime": 443.7844, "eval_samples_per_second": 6.503, "eval_steps_per_second": 0.205, "learning_rate": 1e-05, "step": 35217 }, { "epoch": 130.0, "eval_accuracy": 0.28655578655578656, "eval_f1_macro": 0.6657375892895663, "eval_f1_micro": 0.8034491503931017, "eval_loss": 0.1310083270072937, "eval_roc_auc": 0.866799015752045, "eval_runtime": 440.6588, "eval_samples_per_second": 6.549, "eval_steps_per_second": 0.207, "learning_rate": 1e-05, "step": 35490 }, { "epoch": 130.03663003663004, "grad_norm": 0.2916598916053772, "learning_rate": 1e-05, "loss": 0.1449, "step": 35500 }, { "epoch": 131.0, "eval_accuracy": 0.2834372834372834, "eval_f1_macro": 0.6709132204127336, "eval_f1_micro": 0.8052362171687506, "eval_loss": 0.13133247196674347, "eval_roc_auc": 0.8699004377177725, "eval_runtime": 446.7612, "eval_samples_per_second": 6.46, "eval_steps_per_second": 0.204, "learning_rate": 1e-05, "step": 35763 }, { "epoch": 131.86813186813185, "grad_norm": 0.3473760783672333, "learning_rate": 1e-05, "loss": 0.1442, "step": 36000 }, { "epoch": 132.0, "eval_accuracy": 0.2806652806652807, "eval_f1_macro": 0.6557913726655867, "eval_f1_micro": 0.7985562048814026, "eval_loss": 0.13149647414684296, "eval_roc_auc": 0.8595249758820619, "eval_runtime": 447.0484, "eval_samples_per_second": 6.456, "eval_steps_per_second": 0.204, "learning_rate": 1e-05, "step": 36036 }, { "epoch": 133.0, "eval_accuracy": 0.28794178794178793, "eval_f1_macro": 0.6689392948255155, "eval_f1_micro": 0.8051816958277256, "eval_loss": 0.1311328113079071, "eval_roc_auc": 0.8691700049040701, "eval_runtime": 444.1217, "eval_samples_per_second": 6.498, "eval_steps_per_second": 0.205, "learning_rate": 1e-05, "step": 36309 }, { "epoch": 133.6996336996337, "grad_norm": 0.2959079444408417, "learning_rate": 1e-05, "loss": 0.1443, "step": 36500 }, { "epoch": 134.0, "eval_accuracy": 0.28274428274428276, "eval_f1_macro": 0.6648386499372343, "eval_f1_micro": 0.802060714437774, "eval_loss": 0.1308571696281433, "eval_roc_auc": 0.8639881626262637, "eval_runtime": 444.917, "eval_samples_per_second": 6.487, "eval_steps_per_second": 0.205, "learning_rate": 1e-05, "step": 36582 }, { "epoch": 135.0, "eval_accuracy": 0.2869022869022869, "eval_f1_macro": 0.6684163123065296, "eval_f1_micro": 0.8038277511961722, "eval_loss": 0.13148072361946106, "eval_roc_auc": 0.8665118674205556, "eval_runtime": 437.5153, "eval_samples_per_second": 6.596, "eval_steps_per_second": 0.208, "learning_rate": 1e-05, "step": 36855 }, { "epoch": 135.53113553113553, "grad_norm": 0.3723543882369995, "learning_rate": 1e-05, "loss": 0.1438, "step": 37000 }, { "epoch": 136.0, "eval_accuracy": 0.28274428274428276, "eval_f1_macro": 0.659009971789042, "eval_f1_micro": 0.8024591213764248, "eval_loss": 0.13150115311145782, "eval_roc_auc": 0.8634352340808195, "eval_runtime": 444.5109, "eval_samples_per_second": 6.493, "eval_steps_per_second": 0.205, "learning_rate": 1e-05, "step": 37128 }, { "epoch": 137.0, "eval_accuracy": 0.28586278586278585, "eval_f1_macro": 0.6666808903899752, "eval_f1_micro": 0.8035592643051771, "eval_loss": 0.1310679018497467, "eval_roc_auc": 0.8648124783367798, "eval_runtime": 434.2661, "eval_samples_per_second": 6.646, "eval_steps_per_second": 0.21, "learning_rate": 1e-05, "step": 37401 }, { "epoch": 137.36263736263737, "grad_norm": 0.36766815185546875, "learning_rate": 1e-05, "loss": 0.1452, "step": 37500 }, { "epoch": 138.0, "eval_accuracy": 0.2844767844767845, "eval_f1_macro": 0.6665598962110765, "eval_f1_micro": 0.8035426731078905, "eval_loss": 0.13124705851078033, "eval_roc_auc": 0.8661277510277622, "eval_runtime": 434.1413, "eval_samples_per_second": 6.648, "eval_steps_per_second": 0.21, "learning_rate": 1e-05, "step": 37674 }, { "epoch": 139.0, "eval_accuracy": 0.28967428967428965, "eval_f1_macro": 0.6661043989752415, "eval_f1_micro": 0.8052538519828238, "eval_loss": 0.13104070723056793, "eval_roc_auc": 0.8689438757606943, "eval_runtime": 433.2581, "eval_samples_per_second": 6.661, "eval_steps_per_second": 0.21, "learning_rate": 1e-05, "step": 37947 }, { "epoch": 139.19413919413918, "grad_norm": 0.35373228788375854, "learning_rate": 1e-05, "loss": 0.144, "step": 38000 }, { "epoch": 140.0, "eval_accuracy": 0.2834372834372834, "eval_f1_macro": 0.663466069531375, "eval_f1_micro": 0.8020416843896214, "eval_loss": 0.13169734179973602, "eval_roc_auc": 0.8642539428402185, "eval_runtime": 435.0147, "eval_samples_per_second": 6.634, "eval_steps_per_second": 0.209, "learning_rate": 1e-05, "step": 38220 }, { "epoch": 141.0, "eval_accuracy": 0.2875952875952876, "eval_f1_macro": 0.6687691213000826, "eval_f1_micro": 0.8046521463311481, "eval_loss": 0.13089434802532196, "eval_roc_auc": 0.867299000192085, "eval_runtime": 429.8469, "eval_samples_per_second": 6.714, "eval_steps_per_second": 0.212, "learning_rate": 1.0000000000000002e-06, "step": 38493 }, { "epoch": 141.02564102564102, "grad_norm": 0.2815115451812744, "learning_rate": 1.0000000000000002e-06, "loss": 0.1445, "step": 38500 }, { "epoch": 142.0, "eval_accuracy": 0.28586278586278585, "eval_f1_macro": 0.6642894279153319, "eval_f1_micro": 0.8041640110473762, "eval_loss": 0.13103386759757996, "eval_roc_auc": 0.8657067870399482, "eval_runtime": 425.5573, "eval_samples_per_second": 6.782, "eval_steps_per_second": 0.214, "learning_rate": 1.0000000000000002e-06, "step": 38766 }, { "epoch": 142.85714285714286, "grad_norm": 0.3381010890007019, "learning_rate": 1.0000000000000002e-06, "loss": 0.1441, "step": 39000 }, { "epoch": 143.0, "eval_accuracy": 0.2872487872487873, "eval_f1_macro": 0.6623287859816251, "eval_f1_micro": 0.8019270122783083, "eval_loss": 0.13144278526306152, "eval_roc_auc": 0.8635436440782548, "eval_runtime": 433.7658, "eval_samples_per_second": 6.653, "eval_steps_per_second": 0.21, "learning_rate": 1.0000000000000002e-06, "step": 39039 }, { "epoch": 144.0, "eval_accuracy": 0.28378378378378377, "eval_f1_macro": 0.6647534218687892, "eval_f1_micro": 0.8024974515800204, "eval_loss": 0.1311902105808258, "eval_roc_auc": 0.8649097280870156, "eval_runtime": 446.8955, "eval_samples_per_second": 6.458, "eval_steps_per_second": 0.204, "learning_rate": 1.0000000000000002e-06, "step": 39312 }, { "epoch": 144.0, "learning_rate": 1.0000000000000002e-06, "step": 39312, "total_flos": 1.3598709030716368e+20, "train_loss": 0.157796386979584, "train_runtime": 249885.5342, "train_samples_per_second": 5.232, "train_steps_per_second": 0.164 } ], "logging_steps": 500, "max_steps": 40950, "num_input_tokens_seen": 0, "num_train_epochs": 150, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3598709030716368e+20, "train_batch_size": 32, "trial_name": null, "trial_params": null }