|
{ |
|
"best_metric": 0.1308571696281433, |
|
"best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/fine_scale/DinoVdeau-small-2024_08_31-batch-size32_epochs150_freeze/checkpoint-36582", |
|
"epoch": 144.0, |
|
"eval_steps": 500, |
|
"global_step": 39312, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.19057519057519057, |
|
"eval_f1_macro": 0.4058921954514261, |
|
"eval_f1_micro": 0.7088941673264713, |
|
"eval_loss": 0.19568666815757751, |
|
"eval_roc_auc": 0.8060676064167129, |
|
"eval_runtime": 426.0483, |
|
"eval_samples_per_second": 6.774, |
|
"eval_steps_per_second": 0.214, |
|
"learning_rate": 0.001, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.8315018315018317, |
|
"grad_norm": 0.30737248063087463, |
|
"learning_rate": 0.001, |
|
"loss": 0.3189, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.21933471933471935, |
|
"eval_f1_macro": 0.4867943512801917, |
|
"eval_f1_micro": 0.738139514768845, |
|
"eval_loss": 0.17198018729686737, |
|
"eval_roc_auc": 0.8255075095586444, |
|
"eval_runtime": 425.0166, |
|
"eval_samples_per_second": 6.79, |
|
"eval_steps_per_second": 0.214, |
|
"learning_rate": 0.001, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.23215523215523215, |
|
"eval_f1_macro": 0.5587016500092944, |
|
"eval_f1_micro": 0.7578947368421052, |
|
"eval_loss": 0.16209888458251953, |
|
"eval_roc_auc": 0.8387630797560628, |
|
"eval_runtime": 425.9119, |
|
"eval_samples_per_second": 6.776, |
|
"eval_steps_per_second": 0.214, |
|
"learning_rate": 0.001, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 3.663003663003663, |
|
"grad_norm": 0.2619726359844208, |
|
"learning_rate": 0.001, |
|
"loss": 0.1897, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.22487872487872487, |
|
"eval_f1_macro": 0.5561953540051209, |
|
"eval_f1_micro": 0.7463059684835497, |
|
"eval_loss": 0.15948981046676636, |
|
"eval_roc_auc": 0.8221271753092407, |
|
"eval_runtime": 423.9484, |
|
"eval_samples_per_second": 6.807, |
|
"eval_steps_per_second": 0.215, |
|
"learning_rate": 0.001, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.23146223146223147, |
|
"eval_f1_macro": 0.5723046956548954, |
|
"eval_f1_micro": 0.7510718113612004, |
|
"eval_loss": 0.15691693127155304, |
|
"eval_roc_auc": 0.8244935635420478, |
|
"eval_runtime": 423.6041, |
|
"eval_samples_per_second": 6.813, |
|
"eval_steps_per_second": 0.215, |
|
"learning_rate": 0.001, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 5.4945054945054945, |
|
"grad_norm": 0.17114631831645966, |
|
"learning_rate": 0.001, |
|
"loss": 0.1808, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.2363132363132363, |
|
"eval_f1_macro": 0.5786669115862841, |
|
"eval_f1_micro": 0.7634727923836142, |
|
"eval_loss": 0.15302371978759766, |
|
"eval_roc_auc": 0.8365257318814997, |
|
"eval_runtime": 427.5566, |
|
"eval_samples_per_second": 6.75, |
|
"eval_steps_per_second": 0.213, |
|
"learning_rate": 0.001, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.23354123354123354, |
|
"eval_f1_macro": 0.5981729145672101, |
|
"eval_f1_micro": 0.7651630269613162, |
|
"eval_loss": 0.1523299366235733, |
|
"eval_roc_auc": 0.838924594824006, |
|
"eval_runtime": 430.1478, |
|
"eval_samples_per_second": 6.709, |
|
"eval_steps_per_second": 0.212, |
|
"learning_rate": 0.001, |
|
"step": 1911 |
|
}, |
|
{ |
|
"epoch": 7.326007326007326, |
|
"grad_norm": 0.22214488685131073, |
|
"learning_rate": 0.001, |
|
"loss": 0.1763, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.24185724185724186, |
|
"eval_f1_macro": 0.587992292024695, |
|
"eval_f1_micro": 0.7655172413793103, |
|
"eval_loss": 0.15311872959136963, |
|
"eval_roc_auc": 0.837740052624858, |
|
"eval_runtime": 427.9308, |
|
"eval_samples_per_second": 6.744, |
|
"eval_steps_per_second": 0.213, |
|
"learning_rate": 0.001, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.24012474012474014, |
|
"eval_f1_macro": 0.606908576330327, |
|
"eval_f1_micro": 0.7699542669773061, |
|
"eval_loss": 0.14992575347423553, |
|
"eval_roc_auc": 0.8431046707780733, |
|
"eval_runtime": 424.0382, |
|
"eval_samples_per_second": 6.806, |
|
"eval_steps_per_second": 0.215, |
|
"learning_rate": 0.001, |
|
"step": 2457 |
|
}, |
|
{ |
|
"epoch": 9.157509157509157, |
|
"grad_norm": 0.1733015924692154, |
|
"learning_rate": 0.001, |
|
"loss": 0.1735, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.24393624393624394, |
|
"eval_f1_macro": 0.5829080312220596, |
|
"eval_f1_micro": 0.7606115107913669, |
|
"eval_loss": 0.1509619951248169, |
|
"eval_roc_auc": 0.8277441062627229, |
|
"eval_runtime": 424.8811, |
|
"eval_samples_per_second": 6.792, |
|
"eval_steps_per_second": 0.214, |
|
"learning_rate": 0.001, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 10.989010989010989, |
|
"grad_norm": 0.16356830298900604, |
|
"learning_rate": 0.001, |
|
"loss": 0.1723, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.2505197505197505, |
|
"eval_f1_macro": 0.5976223089766404, |
|
"eval_f1_micro": 0.7689559002963221, |
|
"eval_loss": 0.1520717293024063, |
|
"eval_roc_auc": 0.8399853012032679, |
|
"eval_runtime": 434.5331, |
|
"eval_samples_per_second": 6.642, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.001, |
|
"step": 3003 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.2442827442827443, |
|
"eval_f1_macro": 0.607405900640871, |
|
"eval_f1_micro": 0.7759986516096409, |
|
"eval_loss": 0.15027731657028198, |
|
"eval_roc_auc": 0.8526551998703694, |
|
"eval_runtime": 434.0545, |
|
"eval_samples_per_second": 6.649, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 0.001, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 12.820512820512821, |
|
"grad_norm": 0.1642971783876419, |
|
"learning_rate": 0.001, |
|
"loss": 0.1719, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.24393624393624394, |
|
"eval_f1_macro": 0.6003271512523337, |
|
"eval_f1_micro": 0.7623558852444365, |
|
"eval_loss": 0.1504218876361847, |
|
"eval_roc_auc": 0.8301696089299148, |
|
"eval_runtime": 426.4716, |
|
"eval_samples_per_second": 6.767, |
|
"eval_steps_per_second": 0.213, |
|
"learning_rate": 0.001, |
|
"step": 3549 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.24462924462924462, |
|
"eval_f1_macro": 0.602811285040826, |
|
"eval_f1_micro": 0.7644358114073813, |
|
"eval_loss": 0.1496724784374237, |
|
"eval_roc_auc": 0.8342951177137805, |
|
"eval_runtime": 428.909, |
|
"eval_samples_per_second": 6.729, |
|
"eval_steps_per_second": 0.212, |
|
"learning_rate": 0.001, |
|
"step": 3822 |
|
}, |
|
{ |
|
"epoch": 14.652014652014651, |
|
"grad_norm": 0.1759812980890274, |
|
"learning_rate": 0.001, |
|
"loss": 0.1702, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.2512127512127512, |
|
"eval_f1_macro": 0.6066013767027806, |
|
"eval_f1_micro": 0.7751615281210703, |
|
"eval_loss": 0.14749661087989807, |
|
"eval_roc_auc": 0.8445581856657356, |
|
"eval_runtime": 424.6732, |
|
"eval_samples_per_second": 6.796, |
|
"eval_steps_per_second": 0.214, |
|
"learning_rate": 0.001, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.24636174636174638, |
|
"eval_f1_macro": 0.5838354990739413, |
|
"eval_f1_micro": 0.7645565108923241, |
|
"eval_loss": 0.14998775720596313, |
|
"eval_roc_auc": 0.8320747114163963, |
|
"eval_runtime": 423.7704, |
|
"eval_samples_per_second": 6.81, |
|
"eval_steps_per_second": 0.215, |
|
"learning_rate": 0.001, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 16.483516483516482, |
|
"grad_norm": 0.14804692566394806, |
|
"learning_rate": 0.001, |
|
"loss": 0.1696, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.24566874566874566, |
|
"eval_f1_macro": 0.6073459016890155, |
|
"eval_f1_micro": 0.7719883641341547, |
|
"eval_loss": 0.15297245979309082, |
|
"eval_roc_auc": 0.8464322218871764, |
|
"eval_runtime": 424.9885, |
|
"eval_samples_per_second": 6.791, |
|
"eval_steps_per_second": 0.214, |
|
"learning_rate": 0.001, |
|
"step": 4641 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.24393624393624394, |
|
"eval_f1_macro": 0.614324753279198, |
|
"eval_f1_micro": 0.7751951282271207, |
|
"eval_loss": 0.14907290041446686, |
|
"eval_roc_auc": 0.8475019020709771, |
|
"eval_runtime": 420.1647, |
|
"eval_samples_per_second": 6.869, |
|
"eval_steps_per_second": 0.217, |
|
"learning_rate": 0.001, |
|
"step": 4914 |
|
}, |
|
{ |
|
"epoch": 18.315018315018314, |
|
"grad_norm": 0.19223743677139282, |
|
"learning_rate": 0.001, |
|
"loss": 0.1717, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.23458073458073458, |
|
"eval_f1_macro": 0.6075499214740471, |
|
"eval_f1_micro": 0.7739734788726388, |
|
"eval_loss": 0.14951026439666748, |
|
"eval_roc_auc": 0.848377592477135, |
|
"eval_runtime": 427.9682, |
|
"eval_samples_per_second": 6.743, |
|
"eval_steps_per_second": 0.213, |
|
"learning_rate": 0.001, |
|
"step": 5187 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.24532224532224534, |
|
"eval_f1_macro": 0.595638442008225, |
|
"eval_f1_micro": 0.7636993911381718, |
|
"eval_loss": 0.14873762428760529, |
|
"eval_roc_auc": 0.8322311292560515, |
|
"eval_runtime": 421.5059, |
|
"eval_samples_per_second": 6.847, |
|
"eval_steps_per_second": 0.216, |
|
"learning_rate": 0.001, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 20.146520146520146, |
|
"grad_norm": 0.15787707269191742, |
|
"learning_rate": 0.001, |
|
"loss": 0.1705, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.24740124740124741, |
|
"eval_f1_macro": 0.6164990545073296, |
|
"eval_f1_micro": 0.780452718426063, |
|
"eval_loss": 0.14705629646778107, |
|
"eval_roc_auc": 0.8539786012990958, |
|
"eval_runtime": 429.6596, |
|
"eval_samples_per_second": 6.717, |
|
"eval_steps_per_second": 0.212, |
|
"learning_rate": 0.001, |
|
"step": 5733 |
|
}, |
|
{ |
|
"epoch": 21.978021978021978, |
|
"grad_norm": 0.15392103791236877, |
|
"learning_rate": 0.001, |
|
"loss": 0.1706, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.24532224532224534, |
|
"eval_f1_macro": 0.6073576225776433, |
|
"eval_f1_micro": 0.7753641707130079, |
|
"eval_loss": 0.1508719027042389, |
|
"eval_roc_auc": 0.8494150259851333, |
|
"eval_runtime": 429.7216, |
|
"eval_samples_per_second": 6.716, |
|
"eval_steps_per_second": 0.212, |
|
"learning_rate": 0.001, |
|
"step": 6006 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.2428967428967429, |
|
"eval_f1_macro": 0.6127152502703448, |
|
"eval_f1_micro": 0.771920553133395, |
|
"eval_loss": 0.15015815198421478, |
|
"eval_roc_auc": 0.8388299205154317, |
|
"eval_runtime": 426.6602, |
|
"eval_samples_per_second": 6.764, |
|
"eval_steps_per_second": 0.213, |
|
"learning_rate": 0.001, |
|
"step": 6279 |
|
}, |
|
{ |
|
"epoch": 23.80952380952381, |
|
"grad_norm": 0.1737624853849411, |
|
"learning_rate": 0.001, |
|
"loss": 0.1699, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.24012474012474014, |
|
"eval_f1_macro": 0.5849380548549015, |
|
"eval_f1_micro": 0.7698941591532732, |
|
"eval_loss": 0.14965225756168365, |
|
"eval_roc_auc": 0.8406060899537385, |
|
"eval_runtime": 430.4521, |
|
"eval_samples_per_second": 6.705, |
|
"eval_steps_per_second": 0.211, |
|
"learning_rate": 0.001, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.24255024255024255, |
|
"eval_f1_macro": 0.6035289549510865, |
|
"eval_f1_micro": 0.7761348897535668, |
|
"eval_loss": 0.14702074229717255, |
|
"eval_roc_auc": 0.8458632504863829, |
|
"eval_runtime": 428.0693, |
|
"eval_samples_per_second": 6.742, |
|
"eval_steps_per_second": 0.213, |
|
"learning_rate": 0.001, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 25.641025641025642, |
|
"grad_norm": 0.1737377792596817, |
|
"learning_rate": 0.001, |
|
"loss": 0.1694, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.24220374220374222, |
|
"eval_f1_macro": 0.6064603919289959, |
|
"eval_f1_micro": 0.7751430907604253, |
|
"eval_loss": 0.14808295667171478, |
|
"eval_roc_auc": 0.8465518457868458, |
|
"eval_runtime": 438.4341, |
|
"eval_samples_per_second": 6.583, |
|
"eval_steps_per_second": 0.208, |
|
"learning_rate": 0.001, |
|
"step": 7098 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.24740124740124741, |
|
"eval_f1_macro": 0.6135774018658996, |
|
"eval_f1_micro": 0.7689308343302761, |
|
"eval_loss": 0.14581289887428284, |
|
"eval_roc_auc": 0.8357120666953542, |
|
"eval_runtime": 426.6923, |
|
"eval_samples_per_second": 6.764, |
|
"eval_steps_per_second": 0.213, |
|
"learning_rate": 0.001, |
|
"step": 7371 |
|
}, |
|
{ |
|
"epoch": 27.47252747252747, |
|
"grad_norm": 0.16500511765480042, |
|
"learning_rate": 0.001, |
|
"loss": 0.17, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.24462924462924462, |
|
"eval_f1_macro": 0.6077297645661711, |
|
"eval_f1_micro": 0.7751325049960902, |
|
"eval_loss": 0.1453842669725418, |
|
"eval_roc_auc": 0.8440532649625113, |
|
"eval_runtime": 431.4145, |
|
"eval_samples_per_second": 6.69, |
|
"eval_steps_per_second": 0.211, |
|
"learning_rate": 0.001, |
|
"step": 7644 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.24566874566874566, |
|
"eval_f1_macro": 0.6107922701154117, |
|
"eval_f1_micro": 0.7735191637630662, |
|
"eval_loss": 0.14941243827342987, |
|
"eval_roc_auc": 0.849050708300112, |
|
"eval_runtime": 434.9588, |
|
"eval_samples_per_second": 6.635, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.001, |
|
"step": 7917 |
|
}, |
|
{ |
|
"epoch": 29.304029304029303, |
|
"grad_norm": 0.1599486619234085, |
|
"learning_rate": 0.001, |
|
"loss": 0.1685, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.24982674982674982, |
|
"eval_f1_macro": 0.5982833860845571, |
|
"eval_f1_micro": 0.7705324709843182, |
|
"eval_loss": 0.14549985527992249, |
|
"eval_roc_auc": 0.8366026732011344, |
|
"eval_runtime": 434.3329, |
|
"eval_samples_per_second": 6.645, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 0.001, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.2532917532917533, |
|
"eval_f1_macro": 0.6068619458731248, |
|
"eval_f1_micro": 0.7784728768532008, |
|
"eval_loss": 0.14541107416152954, |
|
"eval_roc_auc": 0.8494949988142239, |
|
"eval_runtime": 435.6219, |
|
"eval_samples_per_second": 6.625, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.001, |
|
"step": 8463 |
|
}, |
|
{ |
|
"epoch": 31.135531135531135, |
|
"grad_norm": 0.1950293928384781, |
|
"learning_rate": 0.001, |
|
"loss": 0.1687, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.24532224532224534, |
|
"eval_f1_macro": 0.6145316287096297, |
|
"eval_f1_micro": 0.7746102833519939, |
|
"eval_loss": 0.14657220244407654, |
|
"eval_roc_auc": 0.8460955499587395, |
|
"eval_runtime": 434.8949, |
|
"eval_samples_per_second": 6.636, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.001, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 32.967032967032964, |
|
"grad_norm": 0.18405263125896454, |
|
"learning_rate": 0.001, |
|
"loss": 0.1679, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.253984753984754, |
|
"eval_f1_macro": 0.6124691593400795, |
|
"eval_f1_micro": 0.777031154551008, |
|
"eval_loss": 0.14459234476089478, |
|
"eval_roc_auc": 0.843919167617255, |
|
"eval_runtime": 440.1591, |
|
"eval_samples_per_second": 6.557, |
|
"eval_steps_per_second": 0.207, |
|
"learning_rate": 0.001, |
|
"step": 9009 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.24462924462924462, |
|
"eval_f1_macro": 0.6168054796129936, |
|
"eval_f1_micro": 0.7781283769180896, |
|
"eval_loss": 0.1468168944120407, |
|
"eval_roc_auc": 0.8469846407097918, |
|
"eval_runtime": 438.6105, |
|
"eval_samples_per_second": 6.58, |
|
"eval_steps_per_second": 0.207, |
|
"learning_rate": 0.001, |
|
"step": 9282 |
|
}, |
|
{ |
|
"epoch": 34.798534798534796, |
|
"grad_norm": 0.17146140336990356, |
|
"learning_rate": 0.001, |
|
"loss": 0.168, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.2494802494802495, |
|
"eval_f1_macro": 0.6193343400891848, |
|
"eval_f1_micro": 0.7766880749869814, |
|
"eval_loss": 0.14858707785606384, |
|
"eval_roc_auc": 0.8451765062846143, |
|
"eval_runtime": 434.5802, |
|
"eval_samples_per_second": 6.641, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.001, |
|
"step": 9555 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.24878724878724878, |
|
"eval_f1_macro": 0.6092667253949349, |
|
"eval_f1_micro": 0.7718835224773468, |
|
"eval_loss": 0.14637114107608795, |
|
"eval_roc_auc": 0.8391158347811251, |
|
"eval_runtime": 439.3197, |
|
"eval_samples_per_second": 6.569, |
|
"eval_steps_per_second": 0.207, |
|
"learning_rate": 0.001, |
|
"step": 9828 |
|
}, |
|
{ |
|
"epoch": 36.63003663003663, |
|
"grad_norm": 0.16876503825187683, |
|
"learning_rate": 0.001, |
|
"loss": 0.169, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.24982674982674982, |
|
"eval_f1_macro": 0.6127183895875491, |
|
"eval_f1_micro": 0.7733602776435442, |
|
"eval_loss": 0.1448281705379486, |
|
"eval_roc_auc": 0.8402195590843876, |
|
"eval_runtime": 437.3035, |
|
"eval_samples_per_second": 6.6, |
|
"eval_steps_per_second": 0.208, |
|
"learning_rate": 0.001, |
|
"step": 10101 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.25225225225225223, |
|
"eval_f1_macro": 0.6109962510638844, |
|
"eval_f1_micro": 0.7814896880859042, |
|
"eval_loss": 0.1450735628604889, |
|
"eval_roc_auc": 0.8526187412743501, |
|
"eval_runtime": 437.7229, |
|
"eval_samples_per_second": 6.593, |
|
"eval_steps_per_second": 0.208, |
|
"learning_rate": 0.001, |
|
"step": 10374 |
|
}, |
|
{ |
|
"epoch": 38.46153846153846, |
|
"grad_norm": 0.19475676119327545, |
|
"learning_rate": 0.001, |
|
"loss": 0.167, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.24982674982674982, |
|
"eval_f1_macro": 0.6272196317832909, |
|
"eval_f1_micro": 0.7824146207942057, |
|
"eval_loss": 0.14469724893569946, |
|
"eval_roc_auc": 0.8563424677452759, |
|
"eval_runtime": 435.4486, |
|
"eval_samples_per_second": 6.628, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.001, |
|
"step": 10647 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.25363825363825365, |
|
"eval_f1_macro": 0.6265963634718456, |
|
"eval_f1_micro": 0.7836651178652115, |
|
"eval_loss": 0.14824891090393066, |
|
"eval_roc_auc": 0.853692740688437, |
|
"eval_runtime": 435.8824, |
|
"eval_samples_per_second": 6.621, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.0001, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 40.29304029304029, |
|
"grad_norm": 0.15533967316150665, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1652, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.2616077616077616, |
|
"eval_f1_macro": 0.6323784470247855, |
|
"eval_f1_micro": 0.7833456473553827, |
|
"eval_loss": 0.14141727983951569, |
|
"eval_roc_auc": 0.8483120796798727, |
|
"eval_runtime": 435.7344, |
|
"eval_samples_per_second": 6.623, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.0001, |
|
"step": 11193 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.26195426195426197, |
|
"eval_f1_macro": 0.6371841233046203, |
|
"eval_f1_micro": 0.7884351407000686, |
|
"eval_loss": 0.13979895412921906, |
|
"eval_roc_auc": 0.8545567611245666, |
|
"eval_runtime": 438.4508, |
|
"eval_samples_per_second": 6.582, |
|
"eval_steps_per_second": 0.208, |
|
"learning_rate": 0.0001, |
|
"step": 11466 |
|
}, |
|
{ |
|
"epoch": 42.124542124542124, |
|
"grad_norm": 0.1733330935239792, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1608, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.26403326403326405, |
|
"eval_f1_macro": 0.6366820358518588, |
|
"eval_f1_micro": 0.7871061893724783, |
|
"eval_loss": 0.14107641577720642, |
|
"eval_roc_auc": 0.853678548931782, |
|
"eval_runtime": 434.1211, |
|
"eval_samples_per_second": 6.648, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 0.0001, |
|
"step": 11739 |
|
}, |
|
{ |
|
"epoch": 43.956043956043956, |
|
"grad_norm": 0.19694675505161285, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1596, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.26126126126126126, |
|
"eval_f1_macro": 0.6256922069455233, |
|
"eval_f1_micro": 0.787878787878788, |
|
"eval_loss": 0.13898694515228271, |
|
"eval_roc_auc": 0.8537086091649239, |
|
"eval_runtime": 434.0073, |
|
"eval_samples_per_second": 6.65, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 0.0001, |
|
"step": 12012 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.2664587664587665, |
|
"eval_f1_macro": 0.6421056073559387, |
|
"eval_f1_micro": 0.7894011202068074, |
|
"eval_loss": 0.13859130442142487, |
|
"eval_roc_auc": 0.8538817942028954, |
|
"eval_runtime": 432.4865, |
|
"eval_samples_per_second": 6.673, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 0.0001, |
|
"step": 12285 |
|
}, |
|
{ |
|
"epoch": 45.78754578754579, |
|
"grad_norm": 0.18810147047042847, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1582, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.2664587664587665, |
|
"eval_f1_macro": 0.6283048537279357, |
|
"eval_f1_micro": 0.7873893327575039, |
|
"eval_loss": 0.139601469039917, |
|
"eval_roc_auc": 0.8521625527563127, |
|
"eval_runtime": 421.9429, |
|
"eval_samples_per_second": 6.84, |
|
"eval_steps_per_second": 0.216, |
|
"learning_rate": 0.0001, |
|
"step": 12558 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.2636867636867637, |
|
"eval_f1_macro": 0.6286555138094179, |
|
"eval_f1_micro": 0.7863567238757333, |
|
"eval_loss": 0.13869330286979675, |
|
"eval_roc_auc": 0.8499808451526433, |
|
"eval_runtime": 424.0306, |
|
"eval_samples_per_second": 6.806, |
|
"eval_steps_per_second": 0.215, |
|
"learning_rate": 0.0001, |
|
"step": 12831 |
|
}, |
|
{ |
|
"epoch": 47.61904761904762, |
|
"grad_norm": 0.15351006388664246, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1584, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.26784476784476785, |
|
"eval_f1_macro": 0.6334934953582803, |
|
"eval_f1_micro": 0.7913177234660741, |
|
"eval_loss": 0.13777127861976624, |
|
"eval_roc_auc": 0.8571892112602602, |
|
"eval_runtime": 419.9652, |
|
"eval_samples_per_second": 6.872, |
|
"eval_steps_per_second": 0.217, |
|
"learning_rate": 0.0001, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.26403326403326405, |
|
"eval_f1_macro": 0.6381777921693204, |
|
"eval_f1_micro": 0.7933989479042932, |
|
"eval_loss": 0.1377096027135849, |
|
"eval_roc_auc": 0.8602965218660363, |
|
"eval_runtime": 431.2306, |
|
"eval_samples_per_second": 6.692, |
|
"eval_steps_per_second": 0.211, |
|
"learning_rate": 0.0001, |
|
"step": 13377 |
|
}, |
|
{ |
|
"epoch": 49.45054945054945, |
|
"grad_norm": 0.1798904836177826, |
|
"learning_rate": 0.0001, |
|
"loss": 0.157, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.2674982674982675, |
|
"eval_f1_macro": 0.6362718007605523, |
|
"eval_f1_micro": 0.7918342891380639, |
|
"eval_loss": 0.13755330443382263, |
|
"eval_roc_auc": 0.8570210161405075, |
|
"eval_runtime": 429.5809, |
|
"eval_samples_per_second": 6.718, |
|
"eval_steps_per_second": 0.212, |
|
"learning_rate": 0.0001, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.2661122661122661, |
|
"eval_f1_macro": 0.6426825970872383, |
|
"eval_f1_micro": 0.7928808087673094, |
|
"eval_loss": 0.13754987716674805, |
|
"eval_roc_auc": 0.8596608706709776, |
|
"eval_runtime": 429.3766, |
|
"eval_samples_per_second": 6.721, |
|
"eval_steps_per_second": 0.212, |
|
"learning_rate": 0.0001, |
|
"step": 13923 |
|
}, |
|
{ |
|
"epoch": 51.282051282051285, |
|
"grad_norm": 0.20376506447792053, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1567, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.26576576576576577, |
|
"eval_f1_macro": 0.6367912909960436, |
|
"eval_f1_micro": 0.7871186146434616, |
|
"eval_loss": 0.13771678507328033, |
|
"eval_roc_auc": 0.8506886757830149, |
|
"eval_runtime": 424.3804, |
|
"eval_samples_per_second": 6.801, |
|
"eval_steps_per_second": 0.214, |
|
"learning_rate": 0.0001, |
|
"step": 14196 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.2692307692307692, |
|
"eval_f1_macro": 0.640555047060403, |
|
"eval_f1_micro": 0.7928592630284527, |
|
"eval_loss": 0.13740690052509308, |
|
"eval_roc_auc": 0.8601326459765699, |
|
"eval_runtime": 434.4832, |
|
"eval_samples_per_second": 6.642, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.0001, |
|
"step": 14469 |
|
}, |
|
{ |
|
"epoch": 53.11355311355312, |
|
"grad_norm": 0.16348811984062195, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1571, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.27165627165627165, |
|
"eval_f1_macro": 0.6412320555565514, |
|
"eval_f1_micro": 0.7920979171140219, |
|
"eval_loss": 0.1368684023618698, |
|
"eval_roc_auc": 0.8562094300869534, |
|
"eval_runtime": 425.2932, |
|
"eval_samples_per_second": 6.786, |
|
"eval_steps_per_second": 0.214, |
|
"learning_rate": 0.0001, |
|
"step": 14742 |
|
}, |
|
{ |
|
"epoch": 54.94505494505494, |
|
"grad_norm": 0.20431332290172577, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1548, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.2702702702702703, |
|
"eval_f1_macro": 0.6377616721633446, |
|
"eval_f1_micro": 0.7914089347079037, |
|
"eval_loss": 0.13703426718711853, |
|
"eval_roc_auc": 0.8557803910164303, |
|
"eval_runtime": 424.9893, |
|
"eval_samples_per_second": 6.791, |
|
"eval_steps_per_second": 0.214, |
|
"learning_rate": 0.0001, |
|
"step": 15015 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.2643797643797644, |
|
"eval_f1_macro": 0.6425003998141597, |
|
"eval_f1_micro": 0.7931107623128156, |
|
"eval_loss": 0.1364637017250061, |
|
"eval_roc_auc": 0.8601515459625123, |
|
"eval_runtime": 423.7139, |
|
"eval_samples_per_second": 6.811, |
|
"eval_steps_per_second": 0.215, |
|
"learning_rate": 0.0001, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 56.776556776556774, |
|
"grad_norm": 0.19714656472206116, |
|
"learning_rate": 0.0001, |
|
"loss": 0.155, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.2674982674982675, |
|
"eval_f1_macro": 0.6381793578718891, |
|
"eval_f1_micro": 0.7926408585665006, |
|
"eval_loss": 0.13675515353679657, |
|
"eval_roc_auc": 0.8588114846455387, |
|
"eval_runtime": 426.4919, |
|
"eval_samples_per_second": 6.767, |
|
"eval_steps_per_second": 0.213, |
|
"learning_rate": 0.0001, |
|
"step": 15561 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.2674982674982675, |
|
"eval_f1_macro": 0.637380953089336, |
|
"eval_f1_micro": 0.791562634524322, |
|
"eval_loss": 0.1364695280790329, |
|
"eval_roc_auc": 0.855274853280308, |
|
"eval_runtime": 425.8426, |
|
"eval_samples_per_second": 6.777, |
|
"eval_steps_per_second": 0.214, |
|
"learning_rate": 0.0001, |
|
"step": 15834 |
|
}, |
|
{ |
|
"epoch": 58.608058608058606, |
|
"grad_norm": 0.19042669236660004, |
|
"learning_rate": 0.0001, |
|
"loss": 0.155, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.2674982674982675, |
|
"eval_f1_macro": 0.6428884521567982, |
|
"eval_f1_micro": 0.7922245108135942, |
|
"eval_loss": 0.13641765713691711, |
|
"eval_roc_auc": 0.8565012329926954, |
|
"eval_runtime": 423.8693, |
|
"eval_samples_per_second": 6.809, |
|
"eval_steps_per_second": 0.215, |
|
"learning_rate": 0.0001, |
|
"step": 16107 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.26507276507276506, |
|
"eval_f1_macro": 0.6357999016219877, |
|
"eval_f1_micro": 0.7882888744307093, |
|
"eval_loss": 0.13687649369239807, |
|
"eval_roc_auc": 0.8514745744887481, |
|
"eval_runtime": 423.4928, |
|
"eval_samples_per_second": 6.815, |
|
"eval_steps_per_second": 0.215, |
|
"learning_rate": 0.0001, |
|
"step": 16380 |
|
}, |
|
{ |
|
"epoch": 60.43956043956044, |
|
"grad_norm": 0.18568764626979828, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1546, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.2713097713097713, |
|
"eval_f1_macro": 0.6503848519713329, |
|
"eval_f1_micro": 0.7945638702508654, |
|
"eval_loss": 0.13638463616371155, |
|
"eval_roc_auc": 0.8588833823919201, |
|
"eval_runtime": 425.9119, |
|
"eval_samples_per_second": 6.776, |
|
"eval_steps_per_second": 0.214, |
|
"learning_rate": 0.0001, |
|
"step": 16653 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.2751212751212751, |
|
"eval_f1_macro": 0.6441767594174573, |
|
"eval_f1_micro": 0.7931640039405492, |
|
"eval_loss": 0.13563227653503418, |
|
"eval_roc_auc": 0.8575138778747027, |
|
"eval_runtime": 422.0661, |
|
"eval_samples_per_second": 6.838, |
|
"eval_steps_per_second": 0.216, |
|
"learning_rate": 0.0001, |
|
"step": 16926 |
|
}, |
|
{ |
|
"epoch": 62.27106227106227, |
|
"grad_norm": 0.19402863085269928, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1536, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.27373527373527373, |
|
"eval_f1_macro": 0.6515952055035917, |
|
"eval_f1_micro": 0.7966116124638174, |
|
"eval_loss": 0.1355270892381668, |
|
"eval_roc_auc": 0.8610939161629354, |
|
"eval_runtime": 426.9279, |
|
"eval_samples_per_second": 6.76, |
|
"eval_steps_per_second": 0.213, |
|
"learning_rate": 0.0001, |
|
"step": 17199 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.26784476784476785, |
|
"eval_f1_macro": 0.6450040026439422, |
|
"eval_f1_micro": 0.7934075342465754, |
|
"eval_loss": 0.13592010736465454, |
|
"eval_roc_auc": 0.8577985580745997, |
|
"eval_runtime": 426.0816, |
|
"eval_samples_per_second": 6.773, |
|
"eval_steps_per_second": 0.214, |
|
"learning_rate": 0.0001, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 64.1025641025641, |
|
"grad_norm": 0.22000150382518768, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1544, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.27061677061677064, |
|
"eval_f1_macro": 0.64551501310817, |
|
"eval_f1_micro": 0.7936467053015668, |
|
"eval_loss": 0.13569533824920654, |
|
"eval_roc_auc": 0.857159821715051, |
|
"eval_runtime": 424.6551, |
|
"eval_samples_per_second": 6.796, |
|
"eval_steps_per_second": 0.214, |
|
"learning_rate": 0.0001, |
|
"step": 17745 |
|
}, |
|
{ |
|
"epoch": 65.93406593406593, |
|
"grad_norm": 0.19799016416072845, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1529, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.2713097713097713, |
|
"eval_f1_macro": 0.6477176853690674, |
|
"eval_f1_micro": 0.794643237940888, |
|
"eval_loss": 0.13565082848072052, |
|
"eval_roc_auc": 0.8594942449609874, |
|
"eval_runtime": 425.0795, |
|
"eval_samples_per_second": 6.789, |
|
"eval_steps_per_second": 0.214, |
|
"learning_rate": 0.0001, |
|
"step": 18018 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.27546777546777546, |
|
"eval_f1_macro": 0.6544361257862924, |
|
"eval_f1_micro": 0.7965922095536813, |
|
"eval_loss": 0.13533934950828552, |
|
"eval_roc_auc": 0.8622831129363361, |
|
"eval_runtime": 424.6762, |
|
"eval_samples_per_second": 6.796, |
|
"eval_steps_per_second": 0.214, |
|
"learning_rate": 0.0001, |
|
"step": 18291 |
|
}, |
|
{ |
|
"epoch": 67.76556776556777, |
|
"grad_norm": 0.2619948983192444, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1528, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.2733887733887734, |
|
"eval_f1_macro": 0.6519486064773884, |
|
"eval_f1_micro": 0.7955772910907932, |
|
"eval_loss": 0.1353396475315094, |
|
"eval_roc_auc": 0.8608058154545816, |
|
"eval_runtime": 421.8067, |
|
"eval_samples_per_second": 6.842, |
|
"eval_steps_per_second": 0.216, |
|
"learning_rate": 0.0001, |
|
"step": 18564 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.26992376992376993, |
|
"eval_f1_macro": 0.6515714856354324, |
|
"eval_f1_micro": 0.7966188524590164, |
|
"eval_loss": 0.13474246859550476, |
|
"eval_roc_auc": 0.8602900698481241, |
|
"eval_runtime": 423.2901, |
|
"eval_samples_per_second": 6.818, |
|
"eval_steps_per_second": 0.215, |
|
"learning_rate": 0.0001, |
|
"step": 18837 |
|
}, |
|
{ |
|
"epoch": 69.59706959706959, |
|
"grad_norm": 0.18048201501369476, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1528, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.272002772002772, |
|
"eval_f1_macro": 0.6441608871918139, |
|
"eval_f1_micro": 0.7944687795241776, |
|
"eval_loss": 0.13504748046398163, |
|
"eval_roc_auc": 0.8574953132327267, |
|
"eval_runtime": 423.3844, |
|
"eval_samples_per_second": 6.817, |
|
"eval_steps_per_second": 0.215, |
|
"learning_rate": 0.0001, |
|
"step": 19110 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.27234927234927236, |
|
"eval_f1_macro": 0.6441889860402124, |
|
"eval_f1_micro": 0.7933057280883367, |
|
"eval_loss": 0.13502468168735504, |
|
"eval_roc_auc": 0.8556664277229126, |
|
"eval_runtime": 422.6912, |
|
"eval_samples_per_second": 6.828, |
|
"eval_steps_per_second": 0.215, |
|
"learning_rate": 0.0001, |
|
"step": 19383 |
|
}, |
|
{ |
|
"epoch": 71.42857142857143, |
|
"grad_norm": 0.24162879586219788, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1522, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.2758142758142758, |
|
"eval_f1_macro": 0.6484748365424647, |
|
"eval_f1_micro": 0.7969950486597234, |
|
"eval_loss": 0.1344645917415619, |
|
"eval_roc_auc": 0.8605409876174911, |
|
"eval_runtime": 426.5755, |
|
"eval_samples_per_second": 6.766, |
|
"eval_steps_per_second": 0.213, |
|
"learning_rate": 0.0001, |
|
"step": 19656 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.27616077616077617, |
|
"eval_f1_macro": 0.6518769914193778, |
|
"eval_f1_micro": 0.7977006599957419, |
|
"eval_loss": 0.1341526359319687, |
|
"eval_roc_auc": 0.8616010233088203, |
|
"eval_runtime": 420.7226, |
|
"eval_samples_per_second": 6.86, |
|
"eval_steps_per_second": 0.216, |
|
"learning_rate": 0.0001, |
|
"step": 19929 |
|
}, |
|
{ |
|
"epoch": 73.26007326007326, |
|
"grad_norm": 0.22451983392238617, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1523, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.2751212751212751, |
|
"eval_f1_macro": 0.641334935505441, |
|
"eval_f1_micro": 0.7914797229603171, |
|
"eval_loss": 0.13499116897583008, |
|
"eval_roc_auc": 0.8520198169504839, |
|
"eval_runtime": 428.7922, |
|
"eval_samples_per_second": 6.731, |
|
"eval_steps_per_second": 0.212, |
|
"learning_rate": 0.0001, |
|
"step": 20202 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.2751212751212751, |
|
"eval_f1_macro": 0.6485229770180625, |
|
"eval_f1_micro": 0.7946678133734681, |
|
"eval_loss": 0.13461369276046753, |
|
"eval_roc_auc": 0.8572354216588205, |
|
"eval_runtime": 427.8784, |
|
"eval_samples_per_second": 6.745, |
|
"eval_steps_per_second": 0.213, |
|
"learning_rate": 0.0001, |
|
"step": 20475 |
|
}, |
|
{ |
|
"epoch": 75.0915750915751, |
|
"grad_norm": 0.22029711306095123, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1521, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.2758142758142758, |
|
"eval_f1_macro": 0.6478195810395848, |
|
"eval_f1_micro": 0.7964594201659113, |
|
"eval_loss": 0.13438266515731812, |
|
"eval_roc_auc": 0.8597526207801657, |
|
"eval_runtime": 424.3142, |
|
"eval_samples_per_second": 6.802, |
|
"eval_steps_per_second": 0.214, |
|
"learning_rate": 0.0001, |
|
"step": 20748 |
|
}, |
|
{ |
|
"epoch": 76.92307692307692, |
|
"grad_norm": 0.2415299415588379, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1515, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.27754677754677753, |
|
"eval_f1_macro": 0.6536737916153181, |
|
"eval_f1_micro": 0.7977742853502102, |
|
"eval_loss": 0.13460540771484375, |
|
"eval_roc_auc": 0.8623314561225224, |
|
"eval_runtime": 422.8083, |
|
"eval_samples_per_second": 6.826, |
|
"eval_steps_per_second": 0.215, |
|
"learning_rate": 0.0001, |
|
"step": 21021 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.27754677754677753, |
|
"eval_f1_macro": 0.6543115985953537, |
|
"eval_f1_micro": 0.7978169818504888, |
|
"eval_loss": 0.13411369919776917, |
|
"eval_roc_auc": 0.8634738791194995, |
|
"eval_runtime": 428.5067, |
|
"eval_samples_per_second": 6.735, |
|
"eval_steps_per_second": 0.212, |
|
"learning_rate": 0.0001, |
|
"step": 21294 |
|
}, |
|
{ |
|
"epoch": 78.75457875457876, |
|
"grad_norm": 0.2636328637599945, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1514, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.2740817740817741, |
|
"eval_f1_macro": 0.6523004018612216, |
|
"eval_f1_micro": 0.7953020134228188, |
|
"eval_loss": 0.13399606943130493, |
|
"eval_roc_auc": 0.8574454542918126, |
|
"eval_runtime": 436.7976, |
|
"eval_samples_per_second": 6.607, |
|
"eval_steps_per_second": 0.208, |
|
"learning_rate": 0.0001, |
|
"step": 21567 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.27823977823977825, |
|
"eval_f1_macro": 0.6545582038870168, |
|
"eval_f1_micro": 0.7993085420355848, |
|
"eval_loss": 0.1344238668680191, |
|
"eval_roc_auc": 0.8652547567870936, |
|
"eval_runtime": 431.9941, |
|
"eval_samples_per_second": 6.681, |
|
"eval_steps_per_second": 0.211, |
|
"learning_rate": 0.0001, |
|
"step": 21840 |
|
}, |
|
{ |
|
"epoch": 80.58608058608058, |
|
"grad_norm": 0.23601791262626648, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1516, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.2758142758142758, |
|
"eval_f1_macro": 0.6559691700651434, |
|
"eval_f1_micro": 0.7966715529878418, |
|
"eval_loss": 0.13405664265155792, |
|
"eval_roc_auc": 0.8575861109650502, |
|
"eval_runtime": 436.6356, |
|
"eval_samples_per_second": 6.61, |
|
"eval_steps_per_second": 0.208, |
|
"learning_rate": 0.0001, |
|
"step": 22113 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.2765072765072765, |
|
"eval_f1_macro": 0.6453669674995801, |
|
"eval_f1_micro": 0.7947541551246537, |
|
"eval_loss": 0.13407430052757263, |
|
"eval_roc_auc": 0.8554945304057716, |
|
"eval_runtime": 436.5794, |
|
"eval_samples_per_second": 6.61, |
|
"eval_steps_per_second": 0.208, |
|
"learning_rate": 0.0001, |
|
"step": 22386 |
|
}, |
|
{ |
|
"epoch": 82.41758241758242, |
|
"grad_norm": 0.19588124752044678, |
|
"learning_rate": 0.0001, |
|
"loss": 0.149, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.2702702702702703, |
|
"eval_f1_macro": 0.645966570658811, |
|
"eval_f1_micro": 0.7924365020985678, |
|
"eval_loss": 0.1350804716348648, |
|
"eval_roc_auc": 0.8543412288505268, |
|
"eval_runtime": 433.6987, |
|
"eval_samples_per_second": 6.654, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 0.0001, |
|
"step": 22659 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.27546777546777546, |
|
"eval_f1_macro": 0.6512285101875886, |
|
"eval_f1_micro": 0.7957293542577825, |
|
"eval_loss": 0.13387472927570343, |
|
"eval_roc_auc": 0.8585996545688873, |
|
"eval_runtime": 432.4386, |
|
"eval_samples_per_second": 6.674, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 0.0001, |
|
"step": 22932 |
|
}, |
|
{ |
|
"epoch": 84.24908424908425, |
|
"grad_norm": 0.2560372054576874, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1515, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.27927927927927926, |
|
"eval_f1_macro": 0.6531817491521362, |
|
"eval_f1_micro": 0.7990622335890879, |
|
"eval_loss": 0.13341927528381348, |
|
"eval_roc_auc": 0.8620406055936447, |
|
"eval_runtime": 432.3488, |
|
"eval_samples_per_second": 6.675, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 0.0001, |
|
"step": 23205 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.2747747747747748, |
|
"eval_f1_macro": 0.6595866427349153, |
|
"eval_f1_micro": 0.7988261313371896, |
|
"eval_loss": 0.13337253034114838, |
|
"eval_roc_auc": 0.8625331319838734, |
|
"eval_runtime": 435.2436, |
|
"eval_samples_per_second": 6.631, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.0001, |
|
"step": 23478 |
|
}, |
|
{ |
|
"epoch": 86.08058608058609, |
|
"grad_norm": 0.28640052676200867, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1495, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.27442827442827444, |
|
"eval_f1_macro": 0.6467323251879672, |
|
"eval_f1_micro": 0.7956179390619651, |
|
"eval_loss": 0.1339845359325409, |
|
"eval_roc_auc": 0.8590850582532711, |
|
"eval_runtime": 438.7375, |
|
"eval_samples_per_second": 6.578, |
|
"eval_steps_per_second": 0.207, |
|
"learning_rate": 0.0001, |
|
"step": 23751 |
|
}, |
|
{ |
|
"epoch": 87.91208791208791, |
|
"grad_norm": 0.23546907305717468, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1496, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.2747747747747748, |
|
"eval_f1_macro": 0.648318545746826, |
|
"eval_f1_micro": 0.7981612326551459, |
|
"eval_loss": 0.13357459008693695, |
|
"eval_roc_auc": 0.8619578829440303, |
|
"eval_runtime": 432.3449, |
|
"eval_samples_per_second": 6.675, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 0.0001, |
|
"step": 24024 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.2806652806652807, |
|
"eval_f1_macro": 0.6585340844298272, |
|
"eval_f1_micro": 0.8014968675104065, |
|
"eval_loss": 0.13366733491420746, |
|
"eval_roc_auc": 0.8672320387088881, |
|
"eval_runtime": 431.6296, |
|
"eval_samples_per_second": 6.686, |
|
"eval_steps_per_second": 0.211, |
|
"learning_rate": 0.0001, |
|
"step": 24297 |
|
}, |
|
{ |
|
"epoch": 89.74358974358974, |
|
"grad_norm": 0.24246211349964142, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1493, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.2772002772002772, |
|
"eval_f1_macro": 0.66211749340029, |
|
"eval_f1_micro": 0.8010798042854732, |
|
"eval_loss": 0.1332736760377884, |
|
"eval_roc_auc": 0.8661044781564988, |
|
"eval_runtime": 425.5723, |
|
"eval_samples_per_second": 6.781, |
|
"eval_steps_per_second": 0.214, |
|
"learning_rate": 0.0001, |
|
"step": 24570 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.27823977823977825, |
|
"eval_f1_macro": 0.6528573832362276, |
|
"eval_f1_micro": 0.7956933454403943, |
|
"eval_loss": 0.13367226719856262, |
|
"eval_roc_auc": 0.8562680347985093, |
|
"eval_runtime": 443.8961, |
|
"eval_samples_per_second": 6.502, |
|
"eval_steps_per_second": 0.205, |
|
"learning_rate": 0.0001, |
|
"step": 24843 |
|
}, |
|
{ |
|
"epoch": 91.57509157509158, |
|
"grad_norm": 0.22026851773262024, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1496, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.27546777546777546, |
|
"eval_f1_macro": 0.6513649424471982, |
|
"eval_f1_micro": 0.796086375587259, |
|
"eval_loss": 0.13348612189292908, |
|
"eval_roc_auc": 0.8573559442803198, |
|
"eval_runtime": 443.9031, |
|
"eval_samples_per_second": 6.501, |
|
"eval_steps_per_second": 0.205, |
|
"learning_rate": 0.0001, |
|
"step": 25116 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.2758142758142758, |
|
"eval_f1_macro": 0.6559763883082907, |
|
"eval_f1_micro": 0.8001861094662043, |
|
"eval_loss": 0.1330718696117401, |
|
"eval_roc_auc": 0.8648260530605368, |
|
"eval_runtime": 436.5725, |
|
"eval_samples_per_second": 6.611, |
|
"eval_steps_per_second": 0.208, |
|
"learning_rate": 0.0001, |
|
"step": 25389 |
|
}, |
|
{ |
|
"epoch": 93.4065934065934, |
|
"grad_norm": 0.28630152344703674, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1493, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.2758142758142758, |
|
"eval_f1_macro": 0.6553585917255438, |
|
"eval_f1_micro": 0.7995090362720617, |
|
"eval_loss": 0.13329002261161804, |
|
"eval_roc_auc": 0.864277443745379, |
|
"eval_runtime": 442.8808, |
|
"eval_samples_per_second": 6.516, |
|
"eval_steps_per_second": 0.205, |
|
"learning_rate": 0.0001, |
|
"step": 25662 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.2758142758142758, |
|
"eval_f1_macro": 0.6579543710907207, |
|
"eval_f1_micro": 0.7979651162790697, |
|
"eval_loss": 0.13314621150493622, |
|
"eval_roc_auc": 0.8606367216129991, |
|
"eval_runtime": 436.3942, |
|
"eval_samples_per_second": 6.613, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.0001, |
|
"step": 25935 |
|
}, |
|
{ |
|
"epoch": 95.23809523809524, |
|
"grad_norm": 0.25194719433784485, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1482, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.2751212751212751, |
|
"eval_f1_macro": 0.6556445954379041, |
|
"eval_f1_micro": 0.7992523999660183, |
|
"eval_loss": 0.13279949128627777, |
|
"eval_roc_auc": 0.8631226264354063, |
|
"eval_runtime": 426.8086, |
|
"eval_samples_per_second": 6.762, |
|
"eval_steps_per_second": 0.213, |
|
"learning_rate": 0.0001, |
|
"step": 26208 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.27823977823977825, |
|
"eval_f1_macro": 0.6492741904723621, |
|
"eval_f1_micro": 0.7977296181630549, |
|
"eval_loss": 0.1332886964082718, |
|
"eval_roc_auc": 0.8588905587527994, |
|
"eval_runtime": 441.9848, |
|
"eval_samples_per_second": 6.53, |
|
"eval_steps_per_second": 0.206, |
|
"learning_rate": 0.0001, |
|
"step": 26481 |
|
}, |
|
{ |
|
"epoch": 97.06959706959707, |
|
"grad_norm": 0.27280953526496887, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1497, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.27546777546777546, |
|
"eval_f1_macro": 0.6600105762308898, |
|
"eval_f1_micro": 0.799611141637432, |
|
"eval_loss": 0.13266970217227936, |
|
"eval_roc_auc": 0.864715456620441, |
|
"eval_runtime": 439.781, |
|
"eval_samples_per_second": 6.562, |
|
"eval_steps_per_second": 0.207, |
|
"learning_rate": 0.0001, |
|
"step": 26754 |
|
}, |
|
{ |
|
"epoch": 98.9010989010989, |
|
"grad_norm": 0.30599892139434814, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1489, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.27165627165627165, |
|
"eval_f1_macro": 0.6589970862385839, |
|
"eval_f1_micro": 0.7978809757764771, |
|
"eval_loss": 0.13253149390220642, |
|
"eval_roc_auc": 0.8607699202364255, |
|
"eval_runtime": 438.5456, |
|
"eval_samples_per_second": 6.581, |
|
"eval_steps_per_second": 0.208, |
|
"learning_rate": 0.0001, |
|
"step": 27027 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.27616077616077617, |
|
"eval_f1_macro": 0.6570195655430786, |
|
"eval_f1_micro": 0.797143840330351, |
|
"eval_loss": 0.1329408884048462, |
|
"eval_roc_auc": 0.8584810367011169, |
|
"eval_runtime": 434.9771, |
|
"eval_samples_per_second": 6.635, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.0001, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 100.73260073260073, |
|
"grad_norm": 0.2732805013656616, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1482, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"eval_accuracy": 0.28205128205128205, |
|
"eval_f1_macro": 0.657951499975745, |
|
"eval_f1_micro": 0.7991615690636095, |
|
"eval_loss": 0.13274870812892914, |
|
"eval_roc_auc": 0.861103560655407, |
|
"eval_runtime": 435.4493, |
|
"eval_samples_per_second": 6.628, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.0001, |
|
"step": 27573 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"eval_accuracy": 0.2817047817047817, |
|
"eval_f1_macro": 0.654306822863844, |
|
"eval_f1_micro": 0.7986821274228745, |
|
"eval_loss": 0.1326293796300888, |
|
"eval_roc_auc": 0.8607733407448822, |
|
"eval_runtime": 437.9645, |
|
"eval_samples_per_second": 6.59, |
|
"eval_steps_per_second": 0.208, |
|
"learning_rate": 0.0001, |
|
"step": 27846 |
|
}, |
|
{ |
|
"epoch": 102.56410256410257, |
|
"grad_norm": 0.23533137142658234, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1474, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"eval_accuracy": 0.2803187803187803, |
|
"eval_f1_macro": 0.6518495856500403, |
|
"eval_f1_micro": 0.7993688968487486, |
|
"eval_loss": 0.13247379660606384, |
|
"eval_roc_auc": 0.8620991566501659, |
|
"eval_runtime": 426.0566, |
|
"eval_samples_per_second": 6.774, |
|
"eval_steps_per_second": 0.214, |
|
"learning_rate": 0.0001, |
|
"step": 28119 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_accuracy": 0.27754677754677753, |
|
"eval_f1_macro": 0.6612536009112525, |
|
"eval_f1_micro": 0.8010850676047981, |
|
"eval_loss": 0.13315415382385254, |
|
"eval_roc_auc": 0.864729420343199, |
|
"eval_runtime": 425.2679, |
|
"eval_samples_per_second": 6.786, |
|
"eval_steps_per_second": 0.214, |
|
"learning_rate": 0.0001, |
|
"step": 28392 |
|
}, |
|
{ |
|
"epoch": 104.3956043956044, |
|
"grad_norm": 0.2809629738330841, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1472, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"eval_accuracy": 0.2830907830907831, |
|
"eval_f1_macro": 0.6635718544409769, |
|
"eval_f1_micro": 0.8012698412698412, |
|
"eval_loss": 0.13218620419502258, |
|
"eval_roc_auc": 0.8652135899617869, |
|
"eval_runtime": 425.1586, |
|
"eval_samples_per_second": 6.788, |
|
"eval_steps_per_second": 0.214, |
|
"learning_rate": 0.0001, |
|
"step": 28665 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"eval_accuracy": 0.2830907830907831, |
|
"eval_f1_macro": 0.6588128942023547, |
|
"eval_f1_micro": 0.800988243312319, |
|
"eval_loss": 0.13239973783493042, |
|
"eval_roc_auc": 0.8632750603887415, |
|
"eval_runtime": 427.5404, |
|
"eval_samples_per_second": 6.75, |
|
"eval_steps_per_second": 0.213, |
|
"learning_rate": 0.0001, |
|
"step": 28938 |
|
}, |
|
{ |
|
"epoch": 106.22710622710623, |
|
"grad_norm": 0.2568123936653137, |
|
"learning_rate": 0.0001, |
|
"loss": 0.148, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"eval_accuracy": 0.2785862785862786, |
|
"eval_f1_macro": 0.650564106362156, |
|
"eval_f1_micro": 0.7985513421389007, |
|
"eval_loss": 0.13358280062675476, |
|
"eval_roc_auc": 0.8618832353771251, |
|
"eval_runtime": 425.2874, |
|
"eval_samples_per_second": 6.786, |
|
"eval_steps_per_second": 0.214, |
|
"learning_rate": 0.0001, |
|
"step": 29211 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_accuracy": 0.2796257796257796, |
|
"eval_f1_macro": 0.6501303094783896, |
|
"eval_f1_micro": 0.7995554225623049, |
|
"eval_loss": 0.13270235061645508, |
|
"eval_roc_auc": 0.8615071940670409, |
|
"eval_runtime": 432.9179, |
|
"eval_samples_per_second": 6.666, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 0.0001, |
|
"step": 29484 |
|
}, |
|
{ |
|
"epoch": 108.05860805860806, |
|
"grad_norm": 0.29480934143066406, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1477, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"eval_accuracy": 0.2806652806652807, |
|
"eval_f1_macro": 0.6579556871315007, |
|
"eval_f1_micro": 0.8000342553738118, |
|
"eval_loss": 0.1318453699350357, |
|
"eval_roc_auc": 0.8612993478767093, |
|
"eval_runtime": 434.6895, |
|
"eval_samples_per_second": 6.639, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.0001, |
|
"step": 29757 |
|
}, |
|
{ |
|
"epoch": 109.89010989010988, |
|
"grad_norm": 0.3718918561935425, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1479, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"eval_accuracy": 0.2803187803187803, |
|
"eval_f1_macro": 0.6582487839550253, |
|
"eval_f1_micro": 0.7997274043785672, |
|
"eval_loss": 0.13255637884140015, |
|
"eval_roc_auc": 0.8626158546334878, |
|
"eval_runtime": 427.7015, |
|
"eval_samples_per_second": 6.748, |
|
"eval_steps_per_second": 0.213, |
|
"learning_rate": 0.0001, |
|
"step": 30030 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"eval_accuracy": 0.2785862785862786, |
|
"eval_f1_macro": 0.6608614747058748, |
|
"eval_f1_micro": 0.8012935069355799, |
|
"eval_loss": 0.1319260448217392, |
|
"eval_roc_auc": 0.8637521073014844, |
|
"eval_runtime": 422.4227, |
|
"eval_samples_per_second": 6.832, |
|
"eval_steps_per_second": 0.215, |
|
"learning_rate": 0.0001, |
|
"step": 30303 |
|
}, |
|
{ |
|
"epoch": 111.72161172161172, |
|
"grad_norm": 0.3544025719165802, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1466, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_accuracy": 0.28101178101178104, |
|
"eval_f1_macro": 0.6595016342799644, |
|
"eval_f1_micro": 0.8019278738426415, |
|
"eval_loss": 0.13223350048065186, |
|
"eval_roc_auc": 0.8659084092462648, |
|
"eval_runtime": 420.8235, |
|
"eval_samples_per_second": 6.858, |
|
"eval_steps_per_second": 0.216, |
|
"learning_rate": 0.0001, |
|
"step": 30576 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"eval_accuracy": 0.27997227997227997, |
|
"eval_f1_macro": 0.6592029124671744, |
|
"eval_f1_micro": 0.8024988392216453, |
|
"eval_loss": 0.13213913142681122, |
|
"eval_roc_auc": 0.8666766420318518, |
|
"eval_runtime": 423.8949, |
|
"eval_samples_per_second": 6.808, |
|
"eval_steps_per_second": 0.215, |
|
"learning_rate": 0.0001, |
|
"step": 30849 |
|
}, |
|
{ |
|
"epoch": 113.55311355311355, |
|
"grad_norm": 0.35069116950035095, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1474, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"eval_accuracy": 0.2823977823977824, |
|
"eval_f1_macro": 0.663088095209859, |
|
"eval_f1_micro": 0.8025030654094965, |
|
"eval_loss": 0.13204564154148102, |
|
"eval_roc_auc": 0.8661983610533127, |
|
"eval_runtime": 421.2287, |
|
"eval_samples_per_second": 6.851, |
|
"eval_steps_per_second": 0.216, |
|
"learning_rate": 0.0001, |
|
"step": 31122 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"eval_accuracy": 0.28378378378378377, |
|
"eval_f1_macro": 0.659797224924612, |
|
"eval_f1_micro": 0.8004266211604096, |
|
"eval_loss": 0.1319342404603958, |
|
"eval_roc_auc": 0.8625399730007867, |
|
"eval_runtime": 424.6871, |
|
"eval_samples_per_second": 6.796, |
|
"eval_steps_per_second": 0.214, |
|
"learning_rate": 0.0001, |
|
"step": 31395 |
|
}, |
|
{ |
|
"epoch": 115.38461538461539, |
|
"grad_norm": 0.29624369740486145, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1468, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_accuracy": 0.2844767844767845, |
|
"eval_f1_macro": 0.6627361818946377, |
|
"eval_f1_micro": 0.8022295974810655, |
|
"eval_loss": 0.13186337053775787, |
|
"eval_roc_auc": 0.8642598314802673, |
|
"eval_runtime": 423.8673, |
|
"eval_samples_per_second": 6.809, |
|
"eval_steps_per_second": 0.215, |
|
"learning_rate": 1e-05, |
|
"step": 31668 |
|
}, |
|
{ |
|
"epoch": 117.0, |
|
"eval_accuracy": 0.28205128205128205, |
|
"eval_f1_macro": 0.6604165936303265, |
|
"eval_f1_micro": 0.8012607547491268, |
|
"eval_loss": 0.1317850947380066, |
|
"eval_roc_auc": 0.8634466760169507, |
|
"eval_runtime": 419.012, |
|
"eval_samples_per_second": 6.888, |
|
"eval_steps_per_second": 0.217, |
|
"learning_rate": 1e-05, |
|
"step": 31941 |
|
}, |
|
{ |
|
"epoch": 117.21611721611721, |
|
"grad_norm": 0.28633400797843933, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1455, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"eval_accuracy": 0.2796257796257796, |
|
"eval_f1_macro": 0.6590147410119703, |
|
"eval_f1_micro": 0.8002395926924228, |
|
"eval_loss": 0.13159342110157013, |
|
"eval_roc_auc": 0.8616373075259771, |
|
"eval_runtime": 419.8006, |
|
"eval_samples_per_second": 6.875, |
|
"eval_steps_per_second": 0.217, |
|
"learning_rate": 1e-05, |
|
"step": 32214 |
|
}, |
|
{ |
|
"epoch": 119.0, |
|
"eval_accuracy": 0.28274428274428276, |
|
"eval_f1_macro": 0.6608406822787987, |
|
"eval_f1_micro": 0.8036745185622182, |
|
"eval_loss": 0.1319129317998886, |
|
"eval_roc_auc": 0.8678011174197509, |
|
"eval_runtime": 423.7674, |
|
"eval_samples_per_second": 6.81, |
|
"eval_steps_per_second": 0.215, |
|
"learning_rate": 1e-05, |
|
"step": 32487 |
|
}, |
|
{ |
|
"epoch": 119.04761904761905, |
|
"grad_norm": 0.31120315194129944, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1451, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_accuracy": 0.28135828135828134, |
|
"eval_f1_macro": 0.6614581971670047, |
|
"eval_f1_micro": 0.803593372600534, |
|
"eval_loss": 0.13164088129997253, |
|
"eval_roc_auc": 0.8661674020983411, |
|
"eval_runtime": 420.709, |
|
"eval_samples_per_second": 6.86, |
|
"eval_steps_per_second": 0.216, |
|
"learning_rate": 1e-05, |
|
"step": 32760 |
|
}, |
|
{ |
|
"epoch": 120.87912087912088, |
|
"grad_norm": 0.31770700216293335, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1454, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 121.0, |
|
"eval_accuracy": 0.28101178101178104, |
|
"eval_f1_macro": 0.6610641151618838, |
|
"eval_f1_micro": 0.8012604863092451, |
|
"eval_loss": 0.13184630870819092, |
|
"eval_roc_auc": 0.8635064611392681, |
|
"eval_runtime": 422.0264, |
|
"eval_samples_per_second": 6.838, |
|
"eval_steps_per_second": 0.216, |
|
"learning_rate": 1e-05, |
|
"step": 33033 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"eval_accuracy": 0.2817047817047817, |
|
"eval_f1_macro": 0.6647378818356079, |
|
"eval_f1_micro": 0.8049611099432415, |
|
"eval_loss": 0.13215216994285583, |
|
"eval_roc_auc": 0.8691576105910745, |
|
"eval_runtime": 436.9114, |
|
"eval_samples_per_second": 6.605, |
|
"eval_steps_per_second": 0.208, |
|
"learning_rate": 1e-05, |
|
"step": 33306 |
|
}, |
|
{ |
|
"epoch": 122.71062271062272, |
|
"grad_norm": 0.22290275990962982, |
|
"learning_rate": 1e-05, |
|
"loss": 0.145, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 123.0, |
|
"eval_accuracy": 0.2817047817047817, |
|
"eval_f1_macro": 0.6604978306251739, |
|
"eval_f1_micro": 0.8010107932156931, |
|
"eval_loss": 0.13187836110591888, |
|
"eval_roc_auc": 0.8617537926061216, |
|
"eval_runtime": 431.3938, |
|
"eval_samples_per_second": 6.69, |
|
"eval_steps_per_second": 0.211, |
|
"learning_rate": 1e-05, |
|
"step": 33579 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_accuracy": 0.2806652806652807, |
|
"eval_f1_macro": 0.6621515776947642, |
|
"eval_f1_micro": 0.8018739352640545, |
|
"eval_loss": 0.13141389191150665, |
|
"eval_roc_auc": 0.8638029186192627, |
|
"eval_runtime": 430.2675, |
|
"eval_samples_per_second": 6.707, |
|
"eval_steps_per_second": 0.211, |
|
"learning_rate": 1e-05, |
|
"step": 33852 |
|
}, |
|
{ |
|
"epoch": 124.54212454212454, |
|
"grad_norm": 0.27631625533103943, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1459, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"eval_accuracy": 0.2862092862092862, |
|
"eval_f1_macro": 0.6640721616133445, |
|
"eval_f1_micro": 0.804345987993574, |
|
"eval_loss": 0.13139639794826508, |
|
"eval_roc_auc": 0.8672404491355638, |
|
"eval_runtime": 432.0509, |
|
"eval_samples_per_second": 6.68, |
|
"eval_steps_per_second": 0.211, |
|
"learning_rate": 1e-05, |
|
"step": 34125 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"eval_accuracy": 0.2862092862092862, |
|
"eval_f1_macro": 0.663003919720051, |
|
"eval_f1_micro": 0.804212663367593, |
|
"eval_loss": 0.13103623688220978, |
|
"eval_roc_auc": 0.8670350710768244, |
|
"eval_runtime": 432.4499, |
|
"eval_samples_per_second": 6.674, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 1e-05, |
|
"step": 34398 |
|
}, |
|
{ |
|
"epoch": 126.37362637362638, |
|
"grad_norm": 0.3177105188369751, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1439, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 127.0, |
|
"eval_accuracy": 0.28586278586278585, |
|
"eval_f1_macro": 0.6597731906072118, |
|
"eval_f1_micro": 0.8038346213944846, |
|
"eval_loss": 0.13152988255023956, |
|
"eval_roc_auc": 0.8672624342859965, |
|
"eval_runtime": 431.3827, |
|
"eval_samples_per_second": 6.69, |
|
"eval_steps_per_second": 0.211, |
|
"learning_rate": 1e-05, |
|
"step": 34671 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_accuracy": 0.2869022869022869, |
|
"eval_f1_macro": 0.668197478893632, |
|
"eval_f1_micro": 0.8042412977357216, |
|
"eval_loss": 0.13113313913345337, |
|
"eval_roc_auc": 0.8674002874836755, |
|
"eval_runtime": 439.4627, |
|
"eval_samples_per_second": 6.567, |
|
"eval_steps_per_second": 0.207, |
|
"learning_rate": 1e-05, |
|
"step": 34944 |
|
}, |
|
{ |
|
"epoch": 128.2051282051282, |
|
"grad_norm": 0.2520149350166321, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1446, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 129.0, |
|
"eval_accuracy": 0.28274428274428276, |
|
"eval_f1_macro": 0.6652814888251478, |
|
"eval_f1_micro": 0.8034694309287074, |
|
"eval_loss": 0.13096605241298676, |
|
"eval_roc_auc": 0.8665332355380903, |
|
"eval_runtime": 443.7844, |
|
"eval_samples_per_second": 6.503, |
|
"eval_steps_per_second": 0.205, |
|
"learning_rate": 1e-05, |
|
"step": 35217 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"eval_accuracy": 0.28655578655578656, |
|
"eval_f1_macro": 0.6657375892895663, |
|
"eval_f1_micro": 0.8034491503931017, |
|
"eval_loss": 0.1310083270072937, |
|
"eval_roc_auc": 0.866799015752045, |
|
"eval_runtime": 440.6588, |
|
"eval_samples_per_second": 6.549, |
|
"eval_steps_per_second": 0.207, |
|
"learning_rate": 1e-05, |
|
"step": 35490 |
|
}, |
|
{ |
|
"epoch": 130.03663003663004, |
|
"grad_norm": 0.2916598916053772, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1449, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 131.0, |
|
"eval_accuracy": 0.2834372834372834, |
|
"eval_f1_macro": 0.6709132204127336, |
|
"eval_f1_micro": 0.8052362171687506, |
|
"eval_loss": 0.13133247196674347, |
|
"eval_roc_auc": 0.8699004377177725, |
|
"eval_runtime": 446.7612, |
|
"eval_samples_per_second": 6.46, |
|
"eval_steps_per_second": 0.204, |
|
"learning_rate": 1e-05, |
|
"step": 35763 |
|
}, |
|
{ |
|
"epoch": 131.86813186813185, |
|
"grad_norm": 0.3473760783672333, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1442, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"eval_accuracy": 0.2806652806652807, |
|
"eval_f1_macro": 0.6557913726655867, |
|
"eval_f1_micro": 0.7985562048814026, |
|
"eval_loss": 0.13149647414684296, |
|
"eval_roc_auc": 0.8595249758820619, |
|
"eval_runtime": 447.0484, |
|
"eval_samples_per_second": 6.456, |
|
"eval_steps_per_second": 0.204, |
|
"learning_rate": 1e-05, |
|
"step": 36036 |
|
}, |
|
{ |
|
"epoch": 133.0, |
|
"eval_accuracy": 0.28794178794178793, |
|
"eval_f1_macro": 0.6689392948255155, |
|
"eval_f1_micro": 0.8051816958277256, |
|
"eval_loss": 0.1311328113079071, |
|
"eval_roc_auc": 0.8691700049040701, |
|
"eval_runtime": 444.1217, |
|
"eval_samples_per_second": 6.498, |
|
"eval_steps_per_second": 0.205, |
|
"learning_rate": 1e-05, |
|
"step": 36309 |
|
}, |
|
{ |
|
"epoch": 133.6996336996337, |
|
"grad_norm": 0.2959079444408417, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1443, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 134.0, |
|
"eval_accuracy": 0.28274428274428276, |
|
"eval_f1_macro": 0.6648386499372343, |
|
"eval_f1_micro": 0.802060714437774, |
|
"eval_loss": 0.1308571696281433, |
|
"eval_roc_auc": 0.8639881626262637, |
|
"eval_runtime": 444.917, |
|
"eval_samples_per_second": 6.487, |
|
"eval_steps_per_second": 0.205, |
|
"learning_rate": 1e-05, |
|
"step": 36582 |
|
}, |
|
{ |
|
"epoch": 135.0, |
|
"eval_accuracy": 0.2869022869022869, |
|
"eval_f1_macro": 0.6684163123065296, |
|
"eval_f1_micro": 0.8038277511961722, |
|
"eval_loss": 0.13148072361946106, |
|
"eval_roc_auc": 0.8665118674205556, |
|
"eval_runtime": 437.5153, |
|
"eval_samples_per_second": 6.596, |
|
"eval_steps_per_second": 0.208, |
|
"learning_rate": 1e-05, |
|
"step": 36855 |
|
}, |
|
{ |
|
"epoch": 135.53113553113553, |
|
"grad_norm": 0.3723543882369995, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1438, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_accuracy": 0.28274428274428276, |
|
"eval_f1_macro": 0.659009971789042, |
|
"eval_f1_micro": 0.8024591213764248, |
|
"eval_loss": 0.13150115311145782, |
|
"eval_roc_auc": 0.8634352340808195, |
|
"eval_runtime": 444.5109, |
|
"eval_samples_per_second": 6.493, |
|
"eval_steps_per_second": 0.205, |
|
"learning_rate": 1e-05, |
|
"step": 37128 |
|
}, |
|
{ |
|
"epoch": 137.0, |
|
"eval_accuracy": 0.28586278586278585, |
|
"eval_f1_macro": 0.6666808903899752, |
|
"eval_f1_micro": 0.8035592643051771, |
|
"eval_loss": 0.1310679018497467, |
|
"eval_roc_auc": 0.8648124783367798, |
|
"eval_runtime": 434.2661, |
|
"eval_samples_per_second": 6.646, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 1e-05, |
|
"step": 37401 |
|
}, |
|
{ |
|
"epoch": 137.36263736263737, |
|
"grad_norm": 0.36766815185546875, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1452, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 138.0, |
|
"eval_accuracy": 0.2844767844767845, |
|
"eval_f1_macro": 0.6665598962110765, |
|
"eval_f1_micro": 0.8035426731078905, |
|
"eval_loss": 0.13124705851078033, |
|
"eval_roc_auc": 0.8661277510277622, |
|
"eval_runtime": 434.1413, |
|
"eval_samples_per_second": 6.648, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 1e-05, |
|
"step": 37674 |
|
}, |
|
{ |
|
"epoch": 139.0, |
|
"eval_accuracy": 0.28967428967428965, |
|
"eval_f1_macro": 0.6661043989752415, |
|
"eval_f1_micro": 0.8052538519828238, |
|
"eval_loss": 0.13104070723056793, |
|
"eval_roc_auc": 0.8689438757606943, |
|
"eval_runtime": 433.2581, |
|
"eval_samples_per_second": 6.661, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 1e-05, |
|
"step": 37947 |
|
}, |
|
{ |
|
"epoch": 139.19413919413918, |
|
"grad_norm": 0.35373228788375854, |
|
"learning_rate": 1e-05, |
|
"loss": 0.144, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_accuracy": 0.2834372834372834, |
|
"eval_f1_macro": 0.663466069531375, |
|
"eval_f1_micro": 0.8020416843896214, |
|
"eval_loss": 0.13169734179973602, |
|
"eval_roc_auc": 0.8642539428402185, |
|
"eval_runtime": 435.0147, |
|
"eval_samples_per_second": 6.634, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 1e-05, |
|
"step": 38220 |
|
}, |
|
{ |
|
"epoch": 141.0, |
|
"eval_accuracy": 0.2875952875952876, |
|
"eval_f1_macro": 0.6687691213000826, |
|
"eval_f1_micro": 0.8046521463311481, |
|
"eval_loss": 0.13089434802532196, |
|
"eval_roc_auc": 0.867299000192085, |
|
"eval_runtime": 429.8469, |
|
"eval_samples_per_second": 6.714, |
|
"eval_steps_per_second": 0.212, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 38493 |
|
}, |
|
{ |
|
"epoch": 141.02564102564102, |
|
"grad_norm": 0.2815115451812744, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.1445, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 142.0, |
|
"eval_accuracy": 0.28586278586278585, |
|
"eval_f1_macro": 0.6642894279153319, |
|
"eval_f1_micro": 0.8041640110473762, |
|
"eval_loss": 0.13103386759757996, |
|
"eval_roc_auc": 0.8657067870399482, |
|
"eval_runtime": 425.5573, |
|
"eval_samples_per_second": 6.782, |
|
"eval_steps_per_second": 0.214, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 38766 |
|
}, |
|
{ |
|
"epoch": 142.85714285714286, |
|
"grad_norm": 0.3381010890007019, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.1441, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 143.0, |
|
"eval_accuracy": 0.2872487872487873, |
|
"eval_f1_macro": 0.6623287859816251, |
|
"eval_f1_micro": 0.8019270122783083, |
|
"eval_loss": 0.13144278526306152, |
|
"eval_roc_auc": 0.8635436440782548, |
|
"eval_runtime": 433.7658, |
|
"eval_samples_per_second": 6.653, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 39039 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_accuracy": 0.28378378378378377, |
|
"eval_f1_macro": 0.6647534218687892, |
|
"eval_f1_micro": 0.8024974515800204, |
|
"eval_loss": 0.1311902105808258, |
|
"eval_roc_auc": 0.8649097280870156, |
|
"eval_runtime": 446.8955, |
|
"eval_samples_per_second": 6.458, |
|
"eval_steps_per_second": 0.204, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 39312 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 39312, |
|
"total_flos": 1.3598709030716368e+20, |
|
"train_loss": 0.157796386979584, |
|
"train_runtime": 249885.5342, |
|
"train_samples_per_second": 5.232, |
|
"train_steps_per_second": 0.164 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 40950, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 150, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 10, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3598709030716368e+20, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|