|
{ |
|
"best_metric": 0.12475299090147018, |
|
"best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/fine_scale/DinoVdeau-base-2024_09_03-batch-size32_epochs150_freeze/checkpoint-27573", |
|
"epoch": 111.0, |
|
"eval_steps": 500, |
|
"global_step": 30303, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.2079002079002079, |
|
"eval_f1_macro": 0.5105390450682302, |
|
"eval_f1_micro": 0.73108765167112, |
|
"eval_loss": 0.17516958713531494, |
|
"eval_roc_auc": 0.8186965528786462, |
|
"eval_runtime": 453.901, |
|
"eval_samples_per_second": 6.358, |
|
"eval_steps_per_second": 0.2, |
|
"learning_rate": 0.001, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.8315018315018317, |
|
"grad_norm": 0.2602289021015167, |
|
"learning_rate": 0.001, |
|
"loss": 0.2857, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.23492723492723494, |
|
"eval_f1_macro": 0.5498069096094584, |
|
"eval_f1_micro": 0.7582569600553347, |
|
"eval_loss": 0.1577771008014679, |
|
"eval_roc_auc": 0.8363419499741919, |
|
"eval_runtime": 442.4314, |
|
"eval_samples_per_second": 6.523, |
|
"eval_steps_per_second": 0.206, |
|
"learning_rate": 0.001, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.23146223146223147, |
|
"eval_f1_macro": 0.6037272443934714, |
|
"eval_f1_micro": 0.7721545657578696, |
|
"eval_loss": 0.15162432193756104, |
|
"eval_roc_auc": 0.8505384953411333, |
|
"eval_runtime": 429.5087, |
|
"eval_samples_per_second": 6.719, |
|
"eval_steps_per_second": 0.212, |
|
"learning_rate": 0.001, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 3.663003663003663, |
|
"grad_norm": 0.24485518038272858, |
|
"learning_rate": 0.001, |
|
"loss": 0.1764, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.24220374220374222, |
|
"eval_f1_macro": 0.613953187695023, |
|
"eval_f1_micro": 0.7649537378914902, |
|
"eval_loss": 0.15218119323253632, |
|
"eval_roc_auc": 0.8386795656946878, |
|
"eval_runtime": 440.157, |
|
"eval_samples_per_second": 6.557, |
|
"eval_steps_per_second": 0.207, |
|
"learning_rate": 0.001, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.24220374220374222, |
|
"eval_f1_macro": 0.6161642626912543, |
|
"eval_f1_micro": 0.7719928186714542, |
|
"eval_loss": 0.14836864173412323, |
|
"eval_roc_auc": 0.840338176952158, |
|
"eval_runtime": 432.0583, |
|
"eval_samples_per_second": 6.68, |
|
"eval_steps_per_second": 0.211, |
|
"learning_rate": 0.001, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 5.4945054945054945, |
|
"grad_norm": 0.17316196858882904, |
|
"learning_rate": 0.001, |
|
"loss": 0.1677, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.2560637560637561, |
|
"eval_f1_macro": 0.6051867487843677, |
|
"eval_f1_micro": 0.775030471878809, |
|
"eval_loss": 0.14818257093429565, |
|
"eval_roc_auc": 0.8434755477910759, |
|
"eval_runtime": 432.9746, |
|
"eval_samples_per_second": 6.666, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 0.001, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.24185724185724186, |
|
"eval_f1_macro": 0.617739220969942, |
|
"eval_f1_micro": 0.7729166666666668, |
|
"eval_loss": 0.1486394852399826, |
|
"eval_roc_auc": 0.8431254755177443, |
|
"eval_runtime": 432.8426, |
|
"eval_samples_per_second": 6.668, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 0.001, |
|
"step": 1911 |
|
}, |
|
{ |
|
"epoch": 7.326007326007326, |
|
"grad_norm": 0.31019529700279236, |
|
"learning_rate": 0.001, |
|
"loss": 0.1652, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.2512127512127512, |
|
"eval_f1_macro": 0.6171646674895677, |
|
"eval_f1_micro": 0.7767065175472426, |
|
"eval_loss": 0.14861202239990234, |
|
"eval_roc_auc": 0.8485322128731306, |
|
"eval_runtime": 436.0215, |
|
"eval_samples_per_second": 6.619, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.001, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.2512127512127512, |
|
"eval_f1_macro": 0.6366264906922544, |
|
"eval_f1_micro": 0.7805490458654168, |
|
"eval_loss": 0.14834754168987274, |
|
"eval_roc_auc": 0.857034765243127, |
|
"eval_runtime": 443.0125, |
|
"eval_samples_per_second": 6.514, |
|
"eval_steps_per_second": 0.205, |
|
"learning_rate": 0.001, |
|
"step": 2457 |
|
}, |
|
{ |
|
"epoch": 9.157509157509157, |
|
"grad_norm": 0.1974957138299942, |
|
"learning_rate": 0.001, |
|
"loss": 0.1617, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.24532224532224534, |
|
"eval_f1_macro": 0.6081428044829309, |
|
"eval_f1_micro": 0.7682759232167399, |
|
"eval_loss": 0.15029709041118622, |
|
"eval_roc_auc": 0.8352362538484075, |
|
"eval_runtime": 434.6587, |
|
"eval_samples_per_second": 6.64, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.001, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 10.989010989010989, |
|
"grad_norm": 0.16183075308799744, |
|
"learning_rate": 0.001, |
|
"loss": 0.1615, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.2609147609147609, |
|
"eval_f1_macro": 0.6199915554248129, |
|
"eval_f1_micro": 0.7756647297059341, |
|
"eval_loss": 0.14407172799110413, |
|
"eval_roc_auc": 0.8408945796610934, |
|
"eval_runtime": 431.8807, |
|
"eval_samples_per_second": 6.682, |
|
"eval_steps_per_second": 0.211, |
|
"learning_rate": 0.001, |
|
"step": 3003 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.2494802494802495, |
|
"eval_f1_macro": 0.6299207659511814, |
|
"eval_f1_micro": 0.781485559413907, |
|
"eval_loss": 0.14866559207439423, |
|
"eval_roc_auc": 0.8542998474050816, |
|
"eval_runtime": 435.7397, |
|
"eval_samples_per_second": 6.623, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.001, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 12.820512820512821, |
|
"grad_norm": 0.17374463379383087, |
|
"learning_rate": 0.001, |
|
"loss": 0.1614, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.25190575190575193, |
|
"eval_f1_macro": 0.6241659824597257, |
|
"eval_f1_micro": 0.7779037321241716, |
|
"eval_loss": 0.14902691543102264, |
|
"eval_roc_auc": 0.8445867301441496, |
|
"eval_runtime": 444.1918, |
|
"eval_samples_per_second": 6.497, |
|
"eval_steps_per_second": 0.205, |
|
"learning_rate": 0.001, |
|
"step": 3549 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.26056826056826055, |
|
"eval_f1_macro": 0.6378982802249643, |
|
"eval_f1_micro": 0.7826389795829524, |
|
"eval_loss": 0.14337006211280823, |
|
"eval_roc_auc": 0.8474976901507599, |
|
"eval_runtime": 437.9843, |
|
"eval_samples_per_second": 6.589, |
|
"eval_steps_per_second": 0.208, |
|
"learning_rate": 0.001, |
|
"step": 3822 |
|
}, |
|
{ |
|
"epoch": 14.652014652014651, |
|
"grad_norm": 0.18867848813533783, |
|
"learning_rate": 0.001, |
|
"loss": 0.1599, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.2553707553707554, |
|
"eval_f1_macro": 0.639716503598517, |
|
"eval_f1_micro": 0.7873585308562887, |
|
"eval_loss": 0.14354591071605682, |
|
"eval_roc_auc": 0.8551790656297652, |
|
"eval_runtime": 440.9279, |
|
"eval_samples_per_second": 6.545, |
|
"eval_steps_per_second": 0.206, |
|
"learning_rate": 0.001, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.25675675675675674, |
|
"eval_f1_macro": 0.6343613127126344, |
|
"eval_f1_micro": 0.7792974686292388, |
|
"eval_loss": 0.1439499706029892, |
|
"eval_roc_auc": 0.8464149986210657, |
|
"eval_runtime": 436.9638, |
|
"eval_samples_per_second": 6.605, |
|
"eval_steps_per_second": 0.208, |
|
"learning_rate": 0.001, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 16.483516483516482, |
|
"grad_norm": 0.16403253376483917, |
|
"learning_rate": 0.001, |
|
"loss": 0.1589, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.2543312543312543, |
|
"eval_f1_macro": 0.6422270697798029, |
|
"eval_f1_micro": 0.787784461363732, |
|
"eval_loss": 0.14478015899658203, |
|
"eval_roc_auc": 0.8595889192695618, |
|
"eval_runtime": 441.556, |
|
"eval_samples_per_second": 6.536, |
|
"eval_steps_per_second": 0.206, |
|
"learning_rate": 0.001, |
|
"step": 4641 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.25675675675675674, |
|
"eval_f1_macro": 0.6417123667888478, |
|
"eval_f1_micro": 0.786493860845839, |
|
"eval_loss": 0.14397625625133514, |
|
"eval_roc_auc": 0.8551892735001003, |
|
"eval_runtime": 445.1896, |
|
"eval_samples_per_second": 6.483, |
|
"eval_steps_per_second": 0.204, |
|
"learning_rate": 0.001, |
|
"step": 4914 |
|
}, |
|
{ |
|
"epoch": 18.315018315018314, |
|
"grad_norm": 1.3815889358520508, |
|
"learning_rate": 0.001, |
|
"loss": 0.1604, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.253984753984754, |
|
"eval_f1_macro": 0.6317583185615991, |
|
"eval_f1_micro": 0.7863510343356792, |
|
"eval_loss": 0.14199253916740417, |
|
"eval_roc_auc": 0.854983895441361, |
|
"eval_runtime": 435.0664, |
|
"eval_samples_per_second": 6.633, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.001, |
|
"step": 5187 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.2588357588357588, |
|
"eval_f1_macro": 0.6408966299078661, |
|
"eval_f1_micro": 0.7868513006341401, |
|
"eval_loss": 0.14092272520065308, |
|
"eval_roc_auc": 0.8521780322337986, |
|
"eval_runtime": 434.5134, |
|
"eval_samples_per_second": 6.642, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.001, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 20.146520146520146, |
|
"grad_norm": 0.21049675345420837, |
|
"learning_rate": 0.001, |
|
"loss": 0.1586, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.26195426195426197, |
|
"eval_f1_macro": 0.6412583916380257, |
|
"eval_f1_micro": 0.7864882090503504, |
|
"eval_loss": 0.1425119787454605, |
|
"eval_roc_auc": 0.8561061843865996, |
|
"eval_runtime": 433.1377, |
|
"eval_samples_per_second": 6.663, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 0.001, |
|
"step": 5733 |
|
}, |
|
{ |
|
"epoch": 21.978021978021978, |
|
"grad_norm": 0.159688800573349, |
|
"learning_rate": 0.001, |
|
"loss": 0.1587, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.23700623700623702, |
|
"eval_f1_macro": 0.6371452798177432, |
|
"eval_f1_micro": 0.7854284761587195, |
|
"eval_loss": 0.15379400551319122, |
|
"eval_roc_auc": 0.860841268018702, |
|
"eval_runtime": 439.1581, |
|
"eval_samples_per_second": 6.572, |
|
"eval_steps_per_second": 0.207, |
|
"learning_rate": 0.001, |
|
"step": 6006 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.25571725571725573, |
|
"eval_f1_macro": 0.6390434486158698, |
|
"eval_f1_micro": 0.7841676771176165, |
|
"eval_loss": 0.1418805718421936, |
|
"eval_roc_auc": 0.8497106920533675, |
|
"eval_runtime": 434.4927, |
|
"eval_samples_per_second": 6.642, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.001, |
|
"step": 6279 |
|
}, |
|
{ |
|
"epoch": 23.80952380952381, |
|
"grad_norm": 0.20623169839382172, |
|
"learning_rate": 0.001, |
|
"loss": 0.1592, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.2598752598752599, |
|
"eval_f1_macro": 0.6458978920546691, |
|
"eval_f1_micro": 0.7869535635312129, |
|
"eval_loss": 0.14135514199733734, |
|
"eval_roc_auc": 0.8561374786855376, |
|
"eval_runtime": 437.945, |
|
"eval_samples_per_second": 6.59, |
|
"eval_steps_per_second": 0.208, |
|
"learning_rate": 0.001, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.26853776853776856, |
|
"eval_f1_macro": 0.6262981090846956, |
|
"eval_f1_micro": 0.786773581652009, |
|
"eval_loss": 0.13985148072242737, |
|
"eval_roc_auc": 0.8523112308572252, |
|
"eval_runtime": 433.5621, |
|
"eval_samples_per_second": 6.656, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 0.001, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 25.641025641025642, |
|
"grad_norm": 0.167380690574646, |
|
"learning_rate": 0.001, |
|
"loss": 0.1586, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.2591822591822592, |
|
"eval_f1_macro": 0.6237830069375186, |
|
"eval_f1_micro": 0.7846557710221018, |
|
"eval_loss": 0.14649754762649536, |
|
"eval_roc_auc": 0.8560739377107973, |
|
"eval_runtime": 436.7443, |
|
"eval_samples_per_second": 6.608, |
|
"eval_steps_per_second": 0.208, |
|
"learning_rate": 0.001, |
|
"step": 7098 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.23804573804573806, |
|
"eval_f1_macro": 0.6344307952131357, |
|
"eval_f1_micro": 0.7719951506754418, |
|
"eval_loss": 0.15506784617900848, |
|
"eval_roc_auc": 0.8432688820115058, |
|
"eval_runtime": 431.1317, |
|
"eval_samples_per_second": 6.694, |
|
"eval_steps_per_second": 0.211, |
|
"learning_rate": 0.001, |
|
"step": 7371 |
|
}, |
|
{ |
|
"epoch": 27.47252747252747, |
|
"grad_norm": 0.17562341690063477, |
|
"learning_rate": 0.001, |
|
"loss": 0.16, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.2616077616077616, |
|
"eval_f1_macro": 0.6429949936408241, |
|
"eval_f1_micro": 0.7891238152420981, |
|
"eval_loss": 0.14431345462799072, |
|
"eval_roc_auc": 0.8549858001950897, |
|
"eval_runtime": 430.551, |
|
"eval_samples_per_second": 6.703, |
|
"eval_steps_per_second": 0.211, |
|
"learning_rate": 0.001, |
|
"step": 7644 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.25675675675675674, |
|
"eval_f1_macro": 0.6415824285032449, |
|
"eval_f1_micro": 0.7873995663818392, |
|
"eval_loss": 0.14275498688220978, |
|
"eval_roc_auc": 0.8564611929231155, |
|
"eval_runtime": 443.3199, |
|
"eval_samples_per_second": 6.51, |
|
"eval_steps_per_second": 0.205, |
|
"learning_rate": 0.001, |
|
"step": 7917 |
|
}, |
|
{ |
|
"epoch": 29.304029304029303, |
|
"grad_norm": 0.16407011449337006, |
|
"learning_rate": 0.001, |
|
"loss": 0.1589, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.2525987525987526, |
|
"eval_f1_macro": 0.6308133523221491, |
|
"eval_f1_micro": 0.7798808735936467, |
|
"eval_loss": 0.14164045453071594, |
|
"eval_roc_auc": 0.8425187578001007, |
|
"eval_runtime": 435.3871, |
|
"eval_samples_per_second": 6.629, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.001, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.26888426888426886, |
|
"eval_f1_macro": 0.6431010910213645, |
|
"eval_f1_micro": 0.7895365707945718, |
|
"eval_loss": 0.13976627588272095, |
|
"eval_roc_auc": 0.8565786303801245, |
|
"eval_runtime": 429.7695, |
|
"eval_samples_per_second": 6.715, |
|
"eval_steps_per_second": 0.212, |
|
"learning_rate": 0.001, |
|
"step": 8463 |
|
}, |
|
{ |
|
"epoch": 31.135531135531135, |
|
"grad_norm": 0.3522001802921295, |
|
"learning_rate": 0.001, |
|
"loss": 0.1588, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.25675675675675674, |
|
"eval_f1_macro": 0.6520927708015384, |
|
"eval_f1_micro": 0.7891036166898235, |
|
"eval_loss": 0.1448184847831726, |
|
"eval_roc_auc": 0.8600551412790717, |
|
"eval_runtime": 437.4042, |
|
"eval_samples_per_second": 6.598, |
|
"eval_steps_per_second": 0.208, |
|
"learning_rate": 0.001, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 32.967032967032964, |
|
"grad_norm": 0.16505810618400574, |
|
"learning_rate": 0.001, |
|
"loss": 0.1581, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.26403326403326405, |
|
"eval_f1_macro": 0.6496848321151188, |
|
"eval_f1_micro": 0.7895652173913044, |
|
"eval_loss": 0.14042973518371582, |
|
"eval_roc_auc": 0.8582461081320644, |
|
"eval_runtime": 438.9972, |
|
"eval_samples_per_second": 6.574, |
|
"eval_steps_per_second": 0.207, |
|
"learning_rate": 0.001, |
|
"step": 9009 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.25571725571725573, |
|
"eval_f1_macro": 0.6448790211155284, |
|
"eval_f1_micro": 0.7870906828033133, |
|
"eval_loss": 0.1426127403974533, |
|
"eval_roc_auc": 0.8537051886564672, |
|
"eval_runtime": 443.0007, |
|
"eval_samples_per_second": 6.515, |
|
"eval_steps_per_second": 0.205, |
|
"learning_rate": 0.001, |
|
"step": 9282 |
|
}, |
|
{ |
|
"epoch": 34.798534798534796, |
|
"grad_norm": 0.17374606430530548, |
|
"learning_rate": 0.001, |
|
"loss": 0.1578, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.262993762993763, |
|
"eval_f1_macro": 0.6428423378015612, |
|
"eval_f1_micro": 0.7846327880264532, |
|
"eval_loss": 0.14135821163654327, |
|
"eval_roc_auc": 0.8487055052211715, |
|
"eval_runtime": 439.0163, |
|
"eval_samples_per_second": 6.574, |
|
"eval_steps_per_second": 0.207, |
|
"learning_rate": 0.001, |
|
"step": 9555 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.26784476784476785, |
|
"eval_f1_macro": 0.6434020884943297, |
|
"eval_f1_micro": 0.7834209497328063, |
|
"eval_loss": 0.14652539789676666, |
|
"eval_roc_auc": 0.848444661270401, |
|
"eval_runtime": 441.3143, |
|
"eval_samples_per_second": 6.54, |
|
"eval_steps_per_second": 0.206, |
|
"learning_rate": 0.001, |
|
"step": 9828 |
|
}, |
|
{ |
|
"epoch": 36.63003663003663, |
|
"grad_norm": 0.1908567249774933, |
|
"learning_rate": 0.001, |
|
"loss": 0.1576, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.2668052668052668, |
|
"eval_f1_macro": 0.6438477431550106, |
|
"eval_f1_micro": 0.792425408224331, |
|
"eval_loss": 0.13795886933803558, |
|
"eval_roc_auc": 0.8576696786814598, |
|
"eval_runtime": 438.7851, |
|
"eval_samples_per_second": 6.577, |
|
"eval_steps_per_second": 0.207, |
|
"learning_rate": 0.001, |
|
"step": 10101 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.2636867636867637, |
|
"eval_f1_macro": 0.6475331965590188, |
|
"eval_f1_micro": 0.7892280686732029, |
|
"eval_loss": 0.13921019434928894, |
|
"eval_roc_auc": 0.8555102379171546, |
|
"eval_runtime": 440.2794, |
|
"eval_samples_per_second": 6.555, |
|
"eval_steps_per_second": 0.207, |
|
"learning_rate": 0.001, |
|
"step": 10374 |
|
}, |
|
{ |
|
"epoch": 38.46153846153846, |
|
"grad_norm": 0.17312012612819672, |
|
"learning_rate": 0.001, |
|
"loss": 0.1556, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.24601524601524602, |
|
"eval_f1_macro": 0.659217552215385, |
|
"eval_f1_micro": 0.7871620243872598, |
|
"eval_loss": 0.14584119617938995, |
|
"eval_roc_auc": 0.8679696612972285, |
|
"eval_runtime": 440.7666, |
|
"eval_samples_per_second": 6.548, |
|
"eval_steps_per_second": 0.206, |
|
"learning_rate": 0.001, |
|
"step": 10647 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.26992376992376993, |
|
"eval_f1_macro": 0.6469476365862663, |
|
"eval_f1_micro": 0.79463243873979, |
|
"eval_loss": 0.1389026641845703, |
|
"eval_roc_auc": 0.8659848006017948, |
|
"eval_runtime": 449.7344, |
|
"eval_samples_per_second": 6.417, |
|
"eval_steps_per_second": 0.202, |
|
"learning_rate": 0.001, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 40.29304029304029, |
|
"grad_norm": 0.17165251076221466, |
|
"learning_rate": 0.001, |
|
"loss": 0.1577, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.2616077616077616, |
|
"eval_f1_macro": 0.6509894683187031, |
|
"eval_f1_micro": 0.784842032071618, |
|
"eval_loss": 0.14020991325378418, |
|
"eval_roc_auc": 0.8491298897174419, |
|
"eval_runtime": 442.6364, |
|
"eval_samples_per_second": 6.52, |
|
"eval_steps_per_second": 0.206, |
|
"learning_rate": 0.001, |
|
"step": 11193 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.27165627165627165, |
|
"eval_f1_macro": 0.6608924914997423, |
|
"eval_f1_micro": 0.7927685516081564, |
|
"eval_loss": 0.14042720198631287, |
|
"eval_roc_auc": 0.8624648827798459, |
|
"eval_runtime": 447.1222, |
|
"eval_samples_per_second": 6.455, |
|
"eval_steps_per_second": 0.204, |
|
"learning_rate": 0.001, |
|
"step": 11466 |
|
}, |
|
{ |
|
"epoch": 42.124542124542124, |
|
"grad_norm": 0.16945631802082062, |
|
"learning_rate": 0.001, |
|
"loss": 0.1576, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.2695772695772696, |
|
"eval_f1_macro": 0.6427022769326964, |
|
"eval_f1_micro": 0.7930726352070125, |
|
"eval_loss": 0.13943640887737274, |
|
"eval_roc_auc": 0.8592814088524369, |
|
"eval_runtime": 443.0609, |
|
"eval_samples_per_second": 6.514, |
|
"eval_steps_per_second": 0.205, |
|
"learning_rate": 0.001, |
|
"step": 11739 |
|
}, |
|
{ |
|
"epoch": 43.956043956043956, |
|
"grad_norm": 0.17723415791988373, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1543, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.27546777546777546, |
|
"eval_f1_macro": 0.6567716426576066, |
|
"eval_f1_micro": 0.7989137353078458, |
|
"eval_loss": 0.1367315948009491, |
|
"eval_roc_auc": 0.8632369250728903, |
|
"eval_runtime": 444.9892, |
|
"eval_samples_per_second": 6.486, |
|
"eval_steps_per_second": 0.204, |
|
"learning_rate": 0.0001, |
|
"step": 12012 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.28274428274428276, |
|
"eval_f1_macro": 0.6686203083248894, |
|
"eval_f1_micro": 0.8018308187828446, |
|
"eval_loss": 0.13616175949573517, |
|
"eval_roc_auc": 0.8651714707596159, |
|
"eval_runtime": 435.6671, |
|
"eval_samples_per_second": 6.624, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.0001, |
|
"step": 12285 |
|
}, |
|
{ |
|
"epoch": 45.78754578754579, |
|
"grad_norm": 0.15646368265151978, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1481, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.2851697851697852, |
|
"eval_f1_macro": 0.6640104860714046, |
|
"eval_f1_micro": 0.8021852369457503, |
|
"eval_loss": 0.13375289738178253, |
|
"eval_roc_auc": 0.8655685984983028, |
|
"eval_runtime": 439.641, |
|
"eval_samples_per_second": 6.564, |
|
"eval_steps_per_second": 0.207, |
|
"learning_rate": 0.0001, |
|
"step": 12558 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.2785862785862786, |
|
"eval_f1_macro": 0.65726703563479, |
|
"eval_f1_micro": 0.7998804746862461, |
|
"eval_loss": 0.14095526933670044, |
|
"eval_roc_auc": 0.8620771714997334, |
|
"eval_runtime": 432.1404, |
|
"eval_samples_per_second": 6.678, |
|
"eval_steps_per_second": 0.211, |
|
"learning_rate": 0.0001, |
|
"step": 12831 |
|
}, |
|
{ |
|
"epoch": 47.61904761904762, |
|
"grad_norm": 0.14561912417411804, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1472, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.28482328482328484, |
|
"eval_f1_macro": 0.6728387979723557, |
|
"eval_f1_micro": 0.8044442566853957, |
|
"eval_loss": 0.13375185430049896, |
|
"eval_roc_auc": 0.8674991066436737, |
|
"eval_runtime": 444.9787, |
|
"eval_samples_per_second": 6.486, |
|
"eval_steps_per_second": 0.205, |
|
"learning_rate": 0.0001, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.2855162855162855, |
|
"eval_f1_macro": 0.674164075762875, |
|
"eval_f1_micro": 0.8058309037900874, |
|
"eval_loss": 0.13221527636051178, |
|
"eval_roc_auc": 0.8723556652741397, |
|
"eval_runtime": 443.2861, |
|
"eval_samples_per_second": 6.51, |
|
"eval_steps_per_second": 0.205, |
|
"learning_rate": 0.0001, |
|
"step": 13377 |
|
}, |
|
{ |
|
"epoch": 49.45054945054945, |
|
"grad_norm": 0.18880312144756317, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1448, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.28967428967428965, |
|
"eval_f1_macro": 0.6738599949249782, |
|
"eval_f1_micro": 0.8062985513331933, |
|
"eval_loss": 0.13315953314304352, |
|
"eval_roc_auc": 0.8702548292213903, |
|
"eval_runtime": 434.9136, |
|
"eval_samples_per_second": 6.636, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.0001, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.28967428967428965, |
|
"eval_f1_macro": 0.6770873238469556, |
|
"eval_f1_micro": 0.8062836021505377, |
|
"eval_loss": 0.13057135045528412, |
|
"eval_roc_auc": 0.8701618987014408, |
|
"eval_runtime": 431.3628, |
|
"eval_samples_per_second": 6.69, |
|
"eval_steps_per_second": 0.211, |
|
"learning_rate": 0.0001, |
|
"step": 13923 |
|
}, |
|
{ |
|
"epoch": 51.282051282051285, |
|
"grad_norm": 0.17863284051418304, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1432, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.2872487872487873, |
|
"eval_f1_macro": 0.6726562275384118, |
|
"eval_f1_micro": 0.8043922369765066, |
|
"eval_loss": 0.13108478486537933, |
|
"eval_roc_auc": 0.8653604706190395, |
|
"eval_runtime": 432.9945, |
|
"eval_samples_per_second": 6.665, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 0.0001, |
|
"step": 14196 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.2872487872487873, |
|
"eval_f1_macro": 0.6702824874792834, |
|
"eval_f1_micro": 0.8070734160241367, |
|
"eval_loss": 0.13161474466323853, |
|
"eval_roc_auc": 0.8712745968092424, |
|
"eval_runtime": 437.0691, |
|
"eval_samples_per_second": 6.603, |
|
"eval_steps_per_second": 0.208, |
|
"learning_rate": 0.0001, |
|
"step": 14469 |
|
}, |
|
{ |
|
"epoch": 53.11355311355312, |
|
"grad_norm": 0.15456052124500275, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1438, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.2882882882882883, |
|
"eval_f1_macro": 0.6787531928667037, |
|
"eval_f1_micro": 0.8064162093710426, |
|
"eval_loss": 0.1315840184688568, |
|
"eval_roc_auc": 0.8688063579069815, |
|
"eval_runtime": 432.9273, |
|
"eval_samples_per_second": 6.666, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 0.0001, |
|
"step": 14742 |
|
}, |
|
{ |
|
"epoch": 54.94505494505494, |
|
"grad_norm": 0.20175035297870636, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1417, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.2875952875952876, |
|
"eval_f1_macro": 0.6698514928377199, |
|
"eval_f1_micro": 0.8061478697800111, |
|
"eval_loss": 0.13084293901920319, |
|
"eval_roc_auc": 0.8685749571564548, |
|
"eval_runtime": 434.5176, |
|
"eval_samples_per_second": 6.642, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.0001, |
|
"step": 15015 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.29417879417879417, |
|
"eval_f1_macro": 0.6799502024965028, |
|
"eval_f1_micro": 0.8094286190238215, |
|
"eval_loss": 0.12969879806041718, |
|
"eval_roc_auc": 0.8743735506433774, |
|
"eval_runtime": 432.6673, |
|
"eval_samples_per_second": 6.67, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 0.0001, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 56.776556776556774, |
|
"grad_norm": 0.2052290290594101, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1415, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.2934857934857935, |
|
"eval_f1_macro": 0.6716759101412201, |
|
"eval_f1_micro": 0.8086806577785254, |
|
"eval_loss": 0.1296372264623642, |
|
"eval_roc_auc": 0.8711020824592034, |
|
"eval_runtime": 430.2029, |
|
"eval_samples_per_second": 6.708, |
|
"eval_steps_per_second": 0.212, |
|
"learning_rate": 0.0001, |
|
"step": 15561 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.29244629244629244, |
|
"eval_f1_macro": 0.6784509633805341, |
|
"eval_f1_micro": 0.8068982880161129, |
|
"eval_loss": 0.12973745167255402, |
|
"eval_roc_auc": 0.8707904271906546, |
|
"eval_runtime": 437.8438, |
|
"eval_samples_per_second": 6.591, |
|
"eval_steps_per_second": 0.208, |
|
"learning_rate": 0.0001, |
|
"step": 15834 |
|
}, |
|
{ |
|
"epoch": 58.608058608058606, |
|
"grad_norm": 0.18146342039108276, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1413, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.2910602910602911, |
|
"eval_f1_macro": 0.6811347101829983, |
|
"eval_f1_micro": 0.8087436297013858, |
|
"eval_loss": 0.12995606660842896, |
|
"eval_roc_auc": 0.8707232376735605, |
|
"eval_runtime": 440.1545, |
|
"eval_samples_per_second": 6.557, |
|
"eval_steps_per_second": 0.207, |
|
"learning_rate": 0.0001, |
|
"step": 16107 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.28794178794178793, |
|
"eval_f1_macro": 0.6725887638706813, |
|
"eval_f1_micro": 0.8056052474657126, |
|
"eval_loss": 0.13024823367595673, |
|
"eval_roc_auc": 0.8658006699367622, |
|
"eval_runtime": 442.8738, |
|
"eval_samples_per_second": 6.517, |
|
"eval_steps_per_second": 0.205, |
|
"learning_rate": 0.0001, |
|
"step": 16380 |
|
}, |
|
{ |
|
"epoch": 60.43956043956044, |
|
"grad_norm": 0.18909117579460144, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1404, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.2948717948717949, |
|
"eval_f1_macro": 0.6842961167409227, |
|
"eval_f1_micro": 0.8095537925534148, |
|
"eval_loss": 0.12872998416423798, |
|
"eval_roc_auc": 0.8721413670658958, |
|
"eval_runtime": 442.546, |
|
"eval_samples_per_second": 6.521, |
|
"eval_steps_per_second": 0.206, |
|
"learning_rate": 0.0001, |
|
"step": 16653 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.29002079002079, |
|
"eval_f1_macro": 0.6821531683206365, |
|
"eval_f1_micro": 0.8079526226734349, |
|
"eval_loss": 0.12909561395645142, |
|
"eval_roc_auc": 0.8690393280672706, |
|
"eval_runtime": 439.5646, |
|
"eval_samples_per_second": 6.566, |
|
"eval_steps_per_second": 0.207, |
|
"learning_rate": 0.0001, |
|
"step": 16926 |
|
}, |
|
{ |
|
"epoch": 62.27106227106227, |
|
"grad_norm": 0.18229062855243683, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1393, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.29799029799029797, |
|
"eval_f1_macro": 0.6812919501021206, |
|
"eval_f1_micro": 0.8075538806791719, |
|
"eval_loss": 0.12872986495494843, |
|
"eval_roc_auc": 0.8685427641356871, |
|
"eval_runtime": 439.5217, |
|
"eval_samples_per_second": 6.566, |
|
"eval_steps_per_second": 0.207, |
|
"learning_rate": 0.0001, |
|
"step": 17199 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.2959112959112959, |
|
"eval_f1_macro": 0.6805602232602442, |
|
"eval_f1_micro": 0.8090726144558109, |
|
"eval_loss": 0.12864243984222412, |
|
"eval_roc_auc": 0.8722296430115927, |
|
"eval_runtime": 436.4112, |
|
"eval_samples_per_second": 6.613, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.0001, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 64.1025641025641, |
|
"grad_norm": 0.2255202978849411, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1395, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.29313929313929316, |
|
"eval_f1_macro": 0.6837997472607307, |
|
"eval_f1_micro": 0.809268560334276, |
|
"eval_loss": 0.12800218164920807, |
|
"eval_roc_auc": 0.8704023269115411, |
|
"eval_runtime": 436.6673, |
|
"eval_samples_per_second": 6.609, |
|
"eval_steps_per_second": 0.208, |
|
"learning_rate": 0.0001, |
|
"step": 17745 |
|
}, |
|
{ |
|
"epoch": 65.93406593406593, |
|
"grad_norm": 0.20691530406475067, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1389, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.2959112959112959, |
|
"eval_f1_macro": 0.685457875933014, |
|
"eval_f1_micro": 0.8107521495951249, |
|
"eval_loss": 0.12777170538902283, |
|
"eval_roc_auc": 0.8744479165213509, |
|
"eval_runtime": 443.2239, |
|
"eval_samples_per_second": 6.511, |
|
"eval_steps_per_second": 0.205, |
|
"learning_rate": 0.0001, |
|
"step": 18018 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.2948717948717949, |
|
"eval_f1_macro": 0.6849396578990685, |
|
"eval_f1_micro": 0.8098450774612694, |
|
"eval_loss": 0.12816764414310455, |
|
"eval_roc_auc": 0.874604334361006, |
|
"eval_runtime": 435.2924, |
|
"eval_samples_per_second": 6.63, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.0001, |
|
"step": 18291 |
|
}, |
|
{ |
|
"epoch": 67.76556776556777, |
|
"grad_norm": 0.23671405017375946, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1376, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.29799029799029797, |
|
"eval_f1_macro": 0.6903099963278952, |
|
"eval_f1_micro": 0.8123470107455503, |
|
"eval_loss": 0.12804801762104034, |
|
"eval_roc_auc": 0.8771048199390693, |
|
"eval_runtime": 435.6555, |
|
"eval_samples_per_second": 6.625, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.0001, |
|
"step": 18564 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.29521829521829523, |
|
"eval_f1_macro": 0.6800351861453543, |
|
"eval_f1_micro": 0.8104663431103608, |
|
"eval_loss": 0.12803924083709717, |
|
"eval_roc_auc": 0.8710816667185332, |
|
"eval_runtime": 435.4705, |
|
"eval_samples_per_second": 6.627, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.0001, |
|
"step": 18837 |
|
}, |
|
{ |
|
"epoch": 69.59706959706959, |
|
"grad_norm": 0.19641809165477753, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1375, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.29313929313929316, |
|
"eval_f1_macro": 0.684802818649885, |
|
"eval_f1_micro": 0.8096462751380749, |
|
"eval_loss": 0.12764029204845428, |
|
"eval_roc_auc": 0.8709255305678097, |
|
"eval_runtime": 438.0603, |
|
"eval_samples_per_second": 6.588, |
|
"eval_steps_per_second": 0.208, |
|
"learning_rate": 0.0001, |
|
"step": 19110 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.29036729036729036, |
|
"eval_f1_macro": 0.6796736257485385, |
|
"eval_f1_micro": 0.8072724183339705, |
|
"eval_loss": 0.12794704735279083, |
|
"eval_roc_auc": 0.8674678659997703, |
|
"eval_runtime": 438.6839, |
|
"eval_samples_per_second": 6.579, |
|
"eval_steps_per_second": 0.207, |
|
"learning_rate": 0.0001, |
|
"step": 19383 |
|
}, |
|
{ |
|
"epoch": 71.42857142857143, |
|
"grad_norm": 0.23725061118602753, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1368, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.2938322938322938, |
|
"eval_f1_macro": 0.6802343842914587, |
|
"eval_f1_micro": 0.8102650399663442, |
|
"eval_loss": 0.12780210375785828, |
|
"eval_roc_auc": 0.8718861300662425, |
|
"eval_runtime": 436.1264, |
|
"eval_samples_per_second": 6.617, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.0001, |
|
"step": 19656 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.29764379764379767, |
|
"eval_f1_macro": 0.6805723882610378, |
|
"eval_f1_micro": 0.8091473263623224, |
|
"eval_loss": 0.12723641097545624, |
|
"eval_roc_auc": 0.8683039589903839, |
|
"eval_runtime": 430.6869, |
|
"eval_samples_per_second": 6.701, |
|
"eval_steps_per_second": 0.211, |
|
"learning_rate": 0.0001, |
|
"step": 19929 |
|
}, |
|
{ |
|
"epoch": 73.26007326007326, |
|
"grad_norm": 0.2105712741613388, |
|
"learning_rate": 0.0001, |
|
"loss": 0.137, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.2934857934857935, |
|
"eval_f1_macro": 0.6777188921642516, |
|
"eval_f1_micro": 0.8064391831142698, |
|
"eval_loss": 0.12804573774337769, |
|
"eval_roc_auc": 0.8647877433658233, |
|
"eval_runtime": 435.4406, |
|
"eval_samples_per_second": 6.628, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.0001, |
|
"step": 20202 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.29244629244629244, |
|
"eval_f1_macro": 0.6885203936930924, |
|
"eval_f1_micro": 0.8109922383050138, |
|
"eval_loss": 0.1273234635591507, |
|
"eval_roc_auc": 0.8730787473480999, |
|
"eval_runtime": 433.0115, |
|
"eval_samples_per_second": 6.665, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 0.0001, |
|
"step": 20475 |
|
}, |
|
{ |
|
"epoch": 75.0915750915751, |
|
"grad_norm": 0.24889850616455078, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1367, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.2972972972972973, |
|
"eval_f1_macro": 0.6810578369044884, |
|
"eval_f1_micro": 0.8088975345709815, |
|
"eval_loss": 0.1272992193698883, |
|
"eval_roc_auc": 0.8696399827660029, |
|
"eval_runtime": 432.2403, |
|
"eval_samples_per_second": 6.677, |
|
"eval_steps_per_second": 0.211, |
|
"learning_rate": 0.0001, |
|
"step": 20748 |
|
}, |
|
{ |
|
"epoch": 76.92307692307692, |
|
"grad_norm": 0.24539624154567719, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1358, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.29244629244629244, |
|
"eval_f1_macro": 0.6863183190306963, |
|
"eval_f1_micro": 0.8102101349375445, |
|
"eval_loss": 0.12745273113250732, |
|
"eval_roc_auc": 0.8738921845003481, |
|
"eval_runtime": 434.2537, |
|
"eval_samples_per_second": 6.646, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 0.0001, |
|
"step": 21021 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.2945252945252945, |
|
"eval_f1_macro": 0.6897104532016692, |
|
"eval_f1_micro": 0.8121675531914894, |
|
"eval_loss": 0.12705788016319275, |
|
"eval_roc_auc": 0.876527719800532, |
|
"eval_runtime": 444.7222, |
|
"eval_samples_per_second": 6.489, |
|
"eval_steps_per_second": 0.205, |
|
"learning_rate": 0.0001, |
|
"step": 21294 |
|
}, |
|
{ |
|
"epoch": 78.75457875457876, |
|
"grad_norm": 0.23895101249217987, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1352, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.2934857934857935, |
|
"eval_f1_macro": 0.6881838490414868, |
|
"eval_f1_micro": 0.809842452990005, |
|
"eval_loss": 0.12710121273994446, |
|
"eval_roc_auc": 0.869729211088564, |
|
"eval_runtime": 435.3871, |
|
"eval_samples_per_second": 6.629, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.0001, |
|
"step": 21567 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.2983367983367983, |
|
"eval_f1_macro": 0.6914032136958002, |
|
"eval_f1_micro": 0.8123911420751431, |
|
"eval_loss": 0.12715762853622437, |
|
"eval_roc_auc": 0.8772640412542829, |
|
"eval_runtime": 435.2387, |
|
"eval_samples_per_second": 6.631, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 0.0001, |
|
"step": 21840 |
|
}, |
|
{ |
|
"epoch": 80.58608058608058, |
|
"grad_norm": 0.28309133648872375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1353, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.2966042966042966, |
|
"eval_f1_macro": 0.6899389708752343, |
|
"eval_f1_micro": 0.810378232667846, |
|
"eval_loss": 0.12650521099567413, |
|
"eval_roc_auc": 0.8716067214734959, |
|
"eval_runtime": 440.2197, |
|
"eval_samples_per_second": 6.556, |
|
"eval_steps_per_second": 0.207, |
|
"learning_rate": 0.0001, |
|
"step": 22113 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.29140679140679143, |
|
"eval_f1_macro": 0.6844864031747653, |
|
"eval_f1_micro": 0.8105446364138047, |
|
"eval_loss": 0.12635371088981628, |
|
"eval_roc_auc": 0.8694408286912787, |
|
"eval_runtime": 441.9602, |
|
"eval_samples_per_second": 6.53, |
|
"eval_steps_per_second": 0.206, |
|
"learning_rate": 0.0001, |
|
"step": 22386 |
|
}, |
|
{ |
|
"epoch": 82.41758241758242, |
|
"grad_norm": 0.24017925560474396, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1337, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.2934857934857935, |
|
"eval_f1_macro": 0.6832392344549459, |
|
"eval_f1_micro": 0.8099670022844573, |
|
"eval_loss": 0.1272997260093689, |
|
"eval_roc_auc": 0.8701139445142557, |
|
"eval_runtime": 442.0471, |
|
"eval_samples_per_second": 6.529, |
|
"eval_steps_per_second": 0.206, |
|
"learning_rate": 0.0001, |
|
"step": 22659 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.2959112959112959, |
|
"eval_f1_macro": 0.6944491344986764, |
|
"eval_f1_micro": 0.8124478558318038, |
|
"eval_loss": 0.12640425562858582, |
|
"eval_roc_auc": 0.875556912431764, |
|
"eval_runtime": 443.3633, |
|
"eval_samples_per_second": 6.509, |
|
"eval_steps_per_second": 0.205, |
|
"learning_rate": 0.0001, |
|
"step": 22932 |
|
}, |
|
{ |
|
"epoch": 84.24908424908425, |
|
"grad_norm": 0.22998856008052826, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1354, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.2972972972972973, |
|
"eval_f1_macro": 0.6879519222426981, |
|
"eval_f1_micro": 0.812659392115055, |
|
"eval_loss": 0.12647400796413422, |
|
"eval_roc_auc": 0.8750247349709565, |
|
"eval_runtime": 442.8099, |
|
"eval_samples_per_second": 6.517, |
|
"eval_steps_per_second": 0.206, |
|
"learning_rate": 0.0001, |
|
"step": 23205 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.29521829521829523, |
|
"eval_f1_macro": 0.6933253774763921, |
|
"eval_f1_micro": 0.8135877542461731, |
|
"eval_loss": 0.12585221230983734, |
|
"eval_roc_auc": 0.8746291766420319, |
|
"eval_runtime": 440.9089, |
|
"eval_samples_per_second": 6.546, |
|
"eval_steps_per_second": 0.206, |
|
"learning_rate": 0.0001, |
|
"step": 23478 |
|
}, |
|
{ |
|
"epoch": 86.08058608058609, |
|
"grad_norm": 0.29592010378837585, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1334, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.2966042966042966, |
|
"eval_f1_macro": 0.6882459007361815, |
|
"eval_f1_micro": 0.8111366966715512, |
|
"eval_loss": 0.12641744315624237, |
|
"eval_roc_auc": 0.8737689120583252, |
|
"eval_runtime": 444.5125, |
|
"eval_samples_per_second": 6.493, |
|
"eval_steps_per_second": 0.205, |
|
"learning_rate": 0.0001, |
|
"step": 23751 |
|
}, |
|
{ |
|
"epoch": 87.91208791208791, |
|
"grad_norm": 0.252650648355484, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1335, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.29902979902979904, |
|
"eval_f1_macro": 0.6859575429209334, |
|
"eval_f1_micro": 0.8126931106471816, |
|
"eval_loss": 0.1263686865568161, |
|
"eval_roc_auc": 0.8754230967754396, |
|
"eval_runtime": 448.2495, |
|
"eval_samples_per_second": 6.438, |
|
"eval_steps_per_second": 0.203, |
|
"learning_rate": 0.0001, |
|
"step": 24024 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.2983367983367983, |
|
"eval_f1_macro": 0.6990366097632199, |
|
"eval_f1_micro": 0.8140188460902628, |
|
"eval_loss": 0.12690132856369019, |
|
"eval_roc_auc": 0.8791768834795075, |
|
"eval_runtime": 441.4136, |
|
"eval_samples_per_second": 6.538, |
|
"eval_steps_per_second": 0.206, |
|
"learning_rate": 0.0001, |
|
"step": 24297 |
|
}, |
|
{ |
|
"epoch": 89.74358974358974, |
|
"grad_norm": 0.2610660791397095, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1332, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.29799029799029797, |
|
"eval_f1_macro": 0.6994167448254883, |
|
"eval_f1_micro": 0.8155163144617673, |
|
"eval_loss": 0.12612390518188477, |
|
"eval_roc_auc": 0.8798283494960182, |
|
"eval_runtime": 445.6794, |
|
"eval_samples_per_second": 6.476, |
|
"eval_steps_per_second": 0.204, |
|
"learning_rate": 0.0001, |
|
"step": 24570 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.28932778932778935, |
|
"eval_f1_macro": 0.6827913109763548, |
|
"eval_f1_micro": 0.8108811552831535, |
|
"eval_loss": 0.1268243044614792, |
|
"eval_roc_auc": 0.8728467295646753, |
|
"eval_runtime": 442.1487, |
|
"eval_samples_per_second": 6.527, |
|
"eval_steps_per_second": 0.206, |
|
"learning_rate": 0.0001, |
|
"step": 24843 |
|
}, |
|
{ |
|
"epoch": 91.57509157509158, |
|
"grad_norm": 0.28306326270103455, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1326, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.29521829521829523, |
|
"eval_f1_macro": 0.6858483939371968, |
|
"eval_f1_micro": 0.8123787840458724, |
|
"eval_loss": 0.12613284587860107, |
|
"eval_roc_auc": 0.8724090520335794, |
|
"eval_runtime": 447.7751, |
|
"eval_samples_per_second": 6.445, |
|
"eval_steps_per_second": 0.203, |
|
"learning_rate": 0.0001, |
|
"step": 25116 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.2966042966042966, |
|
"eval_f1_macro": 0.6897216822080747, |
|
"eval_f1_micro": 0.8138213420238991, |
|
"eval_loss": 0.1258293092250824, |
|
"eval_roc_auc": 0.8758502847472687, |
|
"eval_runtime": 442.7745, |
|
"eval_samples_per_second": 6.518, |
|
"eval_steps_per_second": 0.206, |
|
"learning_rate": 1e-05, |
|
"step": 25389 |
|
}, |
|
{ |
|
"epoch": 93.4065934065934, |
|
"grad_norm": 0.36196333169937134, |
|
"learning_rate": 1e-05, |
|
"loss": 0.132, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.29764379764379767, |
|
"eval_f1_macro": 0.6940665827082791, |
|
"eval_f1_micro": 0.8137706015226304, |
|
"eval_loss": 0.12682591378688812, |
|
"eval_roc_auc": 0.8754785626674707, |
|
"eval_runtime": 447.9681, |
|
"eval_samples_per_second": 6.442, |
|
"eval_steps_per_second": 0.203, |
|
"learning_rate": 1e-05, |
|
"step": 25662 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.2948717948717949, |
|
"eval_f1_macro": 0.6913394393323408, |
|
"eval_f1_micro": 0.8133975298304374, |
|
"eval_loss": 0.1256789118051529, |
|
"eval_roc_auc": 0.8750151441335194, |
|
"eval_runtime": 494.4109, |
|
"eval_samples_per_second": 5.837, |
|
"eval_steps_per_second": 0.184, |
|
"learning_rate": 1e-05, |
|
"step": 25935 |
|
}, |
|
{ |
|
"epoch": 95.23809523809524, |
|
"grad_norm": 0.28360626101493835, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1294, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.29764379764379767, |
|
"eval_f1_macro": 0.6957055849225957, |
|
"eval_f1_micro": 0.8147281313996739, |
|
"eval_loss": 0.12587758898735046, |
|
"eval_roc_auc": 0.876265078406102, |
|
"eval_runtime": 456.43, |
|
"eval_samples_per_second": 6.323, |
|
"eval_steps_per_second": 0.199, |
|
"learning_rate": 1e-05, |
|
"step": 26208 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.2945252945252945, |
|
"eval_f1_macro": 0.6940781337907567, |
|
"eval_f1_micro": 0.8126029480086159, |
|
"eval_loss": 0.1256256103515625, |
|
"eval_roc_auc": 0.8720295902150387, |
|
"eval_runtime": 440.8704, |
|
"eval_samples_per_second": 6.546, |
|
"eval_steps_per_second": 0.206, |
|
"learning_rate": 1e-05, |
|
"step": 26481 |
|
}, |
|
{ |
|
"epoch": 97.06959706959707, |
|
"grad_norm": 0.29758304357528687, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1302, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.2993762993762994, |
|
"eval_f1_macro": 0.6951390304078455, |
|
"eval_f1_micro": 0.8158955813276801, |
|
"eval_loss": 0.1253080666065216, |
|
"eval_roc_auc": 0.8785118427392398, |
|
"eval_runtime": 432.3413, |
|
"eval_samples_per_second": 6.675, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 1e-05, |
|
"step": 26754 |
|
}, |
|
{ |
|
"epoch": 98.9010989010989, |
|
"grad_norm": 0.3460623621940613, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1298, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.2993762993762994, |
|
"eval_f1_macro": 0.6968244403216463, |
|
"eval_f1_micro": 0.8141971169963125, |
|
"eval_loss": 0.12485036998987198, |
|
"eval_roc_auc": 0.8751979200089282, |
|
"eval_runtime": 440.8458, |
|
"eval_samples_per_second": 6.547, |
|
"eval_steps_per_second": 0.206, |
|
"learning_rate": 1e-05, |
|
"step": 27027 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.2972972972972973, |
|
"eval_f1_macro": 0.693647218520028, |
|
"eval_f1_micro": 0.8134507606084869, |
|
"eval_loss": 0.12519583106040955, |
|
"eval_roc_auc": 0.8731552996687338, |
|
"eval_runtime": 426.3213, |
|
"eval_samples_per_second": 6.77, |
|
"eval_steps_per_second": 0.213, |
|
"learning_rate": 1e-05, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 100.73260073260073, |
|
"grad_norm": 0.2845664918422699, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1304, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"eval_accuracy": 0.29902979902979904, |
|
"eval_f1_macro": 0.6961023046950545, |
|
"eval_f1_micro": 0.8148550421923302, |
|
"eval_loss": 0.12475299090147018, |
|
"eval_roc_auc": 0.8764704564648416, |
|
"eval_runtime": 432.286, |
|
"eval_samples_per_second": 6.676, |
|
"eval_steps_per_second": 0.211, |
|
"learning_rate": 1e-05, |
|
"step": 27573 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"eval_accuracy": 0.29625779625779625, |
|
"eval_f1_macro": 0.692743439816851, |
|
"eval_f1_micro": 0.81366198367965, |
|
"eval_loss": 0.12659381330013275, |
|
"eval_roc_auc": 0.8737986906025352, |
|
"eval_runtime": 429.8933, |
|
"eval_samples_per_second": 6.713, |
|
"eval_steps_per_second": 0.212, |
|
"learning_rate": 1e-05, |
|
"step": 27846 |
|
}, |
|
{ |
|
"epoch": 102.56410256410257, |
|
"grad_norm": 0.276334673166275, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1287, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"eval_accuracy": 0.29902979902979904, |
|
"eval_f1_macro": 0.6954353634647259, |
|
"eval_f1_micro": 0.8146347596496376, |
|
"eval_loss": 0.124935083091259, |
|
"eval_roc_auc": 0.8754020640018715, |
|
"eval_runtime": 433.4496, |
|
"eval_samples_per_second": 6.658, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 1e-05, |
|
"step": 28119 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_accuracy": 0.29764379764379767, |
|
"eval_f1_macro": 0.692659001716947, |
|
"eval_f1_micro": 0.8148796863922599, |
|
"eval_loss": 0.12519653141498566, |
|
"eval_roc_auc": 0.8769883885139594, |
|
"eval_runtime": 435.0406, |
|
"eval_samples_per_second": 6.634, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 1e-05, |
|
"step": 28392 |
|
}, |
|
{ |
|
"epoch": 104.3956043956044, |
|
"grad_norm": 0.2987622320652008, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1282, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"eval_accuracy": 0.29902979902979904, |
|
"eval_f1_macro": 0.6961790886935857, |
|
"eval_f1_micro": 0.8152223750573132, |
|
"eval_loss": 0.12513257563114166, |
|
"eval_roc_auc": 0.8773257311303291, |
|
"eval_runtime": 430.7348, |
|
"eval_samples_per_second": 6.7, |
|
"eval_steps_per_second": 0.211, |
|
"learning_rate": 1e-05, |
|
"step": 28665 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"eval_accuracy": 0.29972279972279975, |
|
"eval_f1_macro": 0.6963861386142265, |
|
"eval_f1_micro": 0.8147252563995664, |
|
"eval_loss": 0.12511174380779266, |
|
"eval_roc_auc": 0.8769890055468574, |
|
"eval_runtime": 434.1489, |
|
"eval_samples_per_second": 6.647, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 1e-05, |
|
"step": 28938 |
|
}, |
|
{ |
|
"epoch": 106.22710622710623, |
|
"grad_norm": 0.2862643301486969, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1293, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"eval_accuracy": 0.29799029799029797, |
|
"eval_f1_macro": 0.694620567930595, |
|
"eval_f1_micro": 0.8144894800685992, |
|
"eval_loss": 0.12498941272497177, |
|
"eval_roc_auc": 0.8758874676862554, |
|
"eval_runtime": 435.6169, |
|
"eval_samples_per_second": 6.625, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 1e-05, |
|
"step": 29211 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_accuracy": 0.29972279972279975, |
|
"eval_f1_macro": 0.6934713387989168, |
|
"eval_f1_micro": 0.8144792584203683, |
|
"eval_loss": 0.1248873621225357, |
|
"eval_roc_auc": 0.8750706100255504, |
|
"eval_runtime": 426.658, |
|
"eval_samples_per_second": 6.764, |
|
"eval_steps_per_second": 0.213, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 29484 |
|
}, |
|
{ |
|
"epoch": 108.05860805860806, |
|
"grad_norm": 0.2752939760684967, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.129, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"eval_accuracy": 0.29521829521829523, |
|
"eval_f1_macro": 0.6900779361953018, |
|
"eval_f1_micro": 0.8116150302210575, |
|
"eval_loss": 0.12527066469192505, |
|
"eval_roc_auc": 0.8712697142010926, |
|
"eval_runtime": 434.9611, |
|
"eval_samples_per_second": 6.635, |
|
"eval_steps_per_second": 0.209, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 29757 |
|
}, |
|
{ |
|
"epoch": 109.89010989010988, |
|
"grad_norm": 0.2989753484725952, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.1293, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"eval_accuracy": 0.29799029799029797, |
|
"eval_f1_macro": 0.69491512245201, |
|
"eval_f1_micro": 0.8143917285082964, |
|
"eval_loss": 0.125152125954628, |
|
"eval_roc_auc": 0.8768043785727546, |
|
"eval_runtime": 434.3393, |
|
"eval_samples_per_second": 6.645, |
|
"eval_steps_per_second": 0.21, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 30030 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"eval_accuracy": 0.2983367983367983, |
|
"eval_f1_macro": 0.6932228755688746, |
|
"eval_f1_micro": 0.8137025263510123, |
|
"eval_loss": 0.12495684623718262, |
|
"eval_roc_auc": 0.8754655513215771, |
|
"eval_runtime": 431.8608, |
|
"eval_samples_per_second": 6.683, |
|
"eval_steps_per_second": 0.211, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 30303 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 30303, |
|
"total_flos": 4.1153080208666034e+20, |
|
"train_loss": 0.14764341744551096, |
|
"train_runtime": 198162.6547, |
|
"train_samples_per_second": 6.598, |
|
"train_steps_per_second": 0.207 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 40950, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 150, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 10, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.1153080208666034e+20, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|