|
{ |
|
"best_metric": 0.9783398772157638, |
|
"best_model_checkpoint": "models/pos_final_mono_nl/checkpoint-2760", |
|
"epoch": 39.99638989169675, |
|
"global_step": 2760, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.2575113142718755, |
|
"eval_f1": 0.16891972475874908, |
|
"eval_loss": 3.770303726196289, |
|
"eval_precision": 0.25972286447785947, |
|
"eval_recall": 0.12516136964406466, |
|
"eval_runtime": 10.2346, |
|
"eval_samples_per_second": 768.865, |
|
"eval_steps_per_second": 3.029, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8066356880136831, |
|
"eval_f1": 0.8033051608452517, |
|
"eval_loss": 1.0147907733917236, |
|
"eval_precision": 0.8057864338897609, |
|
"eval_recall": 0.8008391221491363, |
|
"eval_runtime": 10.7861, |
|
"eval_samples_per_second": 729.548, |
|
"eval_steps_per_second": 2.874, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.929904490895606, |
|
"eval_f1": 0.9289770104330163, |
|
"eval_loss": 0.3402073085308075, |
|
"eval_precision": 0.9301506840872673, |
|
"eval_recall": 0.9278062949529723, |
|
"eval_runtime": 10.421, |
|
"eval_samples_per_second": 755.108, |
|
"eval_steps_per_second": 2.975, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9560597575188824, |
|
"eval_f1": 0.9555069516784352, |
|
"eval_loss": 0.20157238841056824, |
|
"eval_precision": 0.9559442226785728, |
|
"eval_recall": 0.9550700805311366, |
|
"eval_runtime": 10.4533, |
|
"eval_samples_per_second": 752.775, |
|
"eval_steps_per_second": 2.966, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.964831156250473, |
|
"eval_f1": 0.964063779010111, |
|
"eval_loss": 0.14858682453632355, |
|
"eval_precision": 0.9642898327887757, |
|
"eval_recall": 0.9638378311919837, |
|
"eval_runtime": 11.0648, |
|
"eval_samples_per_second": 711.175, |
|
"eval_steps_per_second": 2.802, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9701515128581591, |
|
"eval_f1": 0.9696606418295832, |
|
"eval_loss": 0.12055634707212448, |
|
"eval_precision": 0.9697202582231786, |
|
"eval_recall": 0.969601032765722, |
|
"eval_runtime": 10.8285, |
|
"eval_samples_per_second": 726.696, |
|
"eval_steps_per_second": 2.863, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.972701954076922, |
|
"eval_f1": 0.9719634068091613, |
|
"eval_loss": 0.10631231963634491, |
|
"eval_precision": 0.9719820795967141, |
|
"eval_recall": 0.9719447347390422, |
|
"eval_runtime": 10.4365, |
|
"eval_samples_per_second": 753.985, |
|
"eval_steps_per_second": 2.97, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2192, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9742231371183798, |
|
"eval_f1": 0.9734790710183069, |
|
"eval_loss": 0.09831023961305618, |
|
"eval_precision": 0.9734304527887268, |
|
"eval_recall": 0.973527694104629, |
|
"eval_runtime": 10.9399, |
|
"eval_samples_per_second": 719.291, |
|
"eval_steps_per_second": 2.834, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.975388620642681, |
|
"eval_f1": 0.9746202254443189, |
|
"eval_loss": 0.09469176828861237, |
|
"eval_precision": 0.974567806377257, |
|
"eval_recall": 0.9746726501506117, |
|
"eval_runtime": 10.9888, |
|
"eval_samples_per_second": 716.095, |
|
"eval_steps_per_second": 2.821, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9761454281259934, |
|
"eval_f1": 0.9753816837883316, |
|
"eval_loss": 0.09128155559301376, |
|
"eval_precision": 0.9752992516787289, |
|
"eval_recall": 0.975464129833405, |
|
"eval_runtime": 10.3458, |
|
"eval_samples_per_second": 760.6, |
|
"eval_steps_per_second": 2.996, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9770081886569695, |
|
"eval_f1": 0.9762231314470121, |
|
"eval_loss": 0.08845613151788712, |
|
"eval_precision": 0.9761368787406173, |
|
"eval_recall": 0.9763093993975533, |
|
"eval_runtime": 10.591, |
|
"eval_samples_per_second": 742.993, |
|
"eval_steps_per_second": 2.927, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9772200947522969, |
|
"eval_f1": 0.9764375477837924, |
|
"eval_loss": 0.08773986995220184, |
|
"eval_precision": 0.9763812802053, |
|
"eval_recall": 0.976493821847913, |
|
"eval_runtime": 10.1848, |
|
"eval_samples_per_second": 772.621, |
|
"eval_steps_per_second": 3.044, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.977530385820455, |
|
"eval_f1": 0.9767615183960107, |
|
"eval_loss": 0.0878407210111618, |
|
"eval_precision": 0.9766527100218952, |
|
"eval_recall": 0.9768703510173972, |
|
"eval_runtime": 10.414, |
|
"eval_samples_per_second": 755.619, |
|
"eval_steps_per_second": 2.977, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9775909304191199, |
|
"eval_f1": 0.9767761714683069, |
|
"eval_loss": 0.08732089400291443, |
|
"eval_precision": 0.9766973731723995, |
|
"eval_recall": 0.9768549824798672, |
|
"eval_runtime": 10.2517, |
|
"eval_samples_per_second": 767.579, |
|
"eval_steps_per_second": 3.024, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"learning_rate": 3.893805309734514e-05, |
|
"loss": 0.0688, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9779466299362768, |
|
"eval_f1": 0.9771878601613523, |
|
"eval_loss": 0.08772371709346771, |
|
"eval_precision": 0.9770827571371501, |
|
"eval_recall": 0.9772929857994713, |
|
"eval_runtime": 10.4702, |
|
"eval_samples_per_second": 751.563, |
|
"eval_steps_per_second": 2.961, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9781282637322718, |
|
"eval_f1": 0.9773326264186318, |
|
"eval_loss": 0.08782745897769928, |
|
"eval_precision": 0.9772800614675374, |
|
"eval_recall": 0.9773851970246511, |
|
"eval_runtime": 10.0217, |
|
"eval_samples_per_second": 785.196, |
|
"eval_steps_per_second": 3.093, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9780601510587736, |
|
"eval_f1": 0.9772529072559422, |
|
"eval_loss": 0.08969255536794662, |
|
"eval_precision": 0.9771590568603499, |
|
"eval_recall": 0.9773467756808262, |
|
"eval_runtime": 10.2237, |
|
"eval_samples_per_second": 769.681, |
|
"eval_steps_per_second": 3.032, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.978325033677933, |
|
"eval_f1": 0.9775826100987749, |
|
"eval_loss": 0.09088694304227829, |
|
"eval_precision": 0.9775187663749587, |
|
"eval_recall": 0.9776464621626606, |
|
"eval_runtime": 10.2448, |
|
"eval_samples_per_second": 768.099, |
|
"eval_steps_per_second": 3.026, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9784536909500962, |
|
"eval_f1": 0.9776833564477773, |
|
"eval_loss": 0.09170977026224136, |
|
"eval_precision": 0.9775819549334296, |
|
"eval_recall": 0.9777847790004304, |
|
"eval_runtime": 10.5329, |
|
"eval_samples_per_second": 747.089, |
|
"eval_steps_per_second": 2.943, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.978695869344756, |
|
"eval_f1": 0.977932645393286, |
|
"eval_loss": 0.09237655997276306, |
|
"eval_precision": 0.977849997695109, |
|
"eval_recall": 0.9780153070633798, |
|
"eval_runtime": 10.1755, |
|
"eval_samples_per_second": 773.331, |
|
"eval_steps_per_second": 3.047, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.9785369397732605, |
|
"eval_f1": 0.9777796559381027, |
|
"eval_loss": 0.09489051252603531, |
|
"eval_precision": 0.9776669790882412, |
|
"eval_recall": 0.9778923587631401, |
|
"eval_runtime": 10.3245, |
|
"eval_samples_per_second": 762.165, |
|
"eval_steps_per_second": 3.003, |
|
"step": 1449 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"learning_rate": 2.7876106194690264e-05, |
|
"loss": 0.0366, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.9783931463514312, |
|
"eval_f1": 0.977655520039341, |
|
"eval_loss": 0.09559858590364456, |
|
"eval_precision": 0.9775954268854877, |
|
"eval_recall": 0.9777156205815455, |
|
"eval_runtime": 10.2656, |
|
"eval_samples_per_second": 766.54, |
|
"eval_steps_per_second": 3.02, |
|
"step": 1518 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.9785899162970924, |
|
"eval_f1": 0.9778600762968741, |
|
"eval_loss": 0.0962114930152893, |
|
"eval_precision": 0.977758656453831, |
|
"eval_recall": 0.977961517182025, |
|
"eval_runtime": 10.2847, |
|
"eval_samples_per_second": 765.116, |
|
"eval_steps_per_second": 3.014, |
|
"step": 1587 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.978627756671258, |
|
"eval_f1": 0.9778800497871751, |
|
"eval_loss": 0.09919747710227966, |
|
"eval_precision": 0.9777448299173401, |
|
"eval_recall": 0.9780153070633798, |
|
"eval_runtime": 10.2497, |
|
"eval_samples_per_second": 767.73, |
|
"eval_steps_per_second": 3.024, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.9787488458685879, |
|
"eval_f1": 0.9779865998709163, |
|
"eval_loss": 0.09993624687194824, |
|
"eval_precision": 0.9778964351567302, |
|
"eval_recall": 0.9780767812134997, |
|
"eval_runtime": 10.4654, |
|
"eval_samples_per_second": 751.908, |
|
"eval_steps_per_second": 2.962, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.9788699350659179, |
|
"eval_f1": 0.9781095368349878, |
|
"eval_loss": 0.10065959393978119, |
|
"eval_precision": 0.978019360786724, |
|
"eval_recall": 0.9781997295137395, |
|
"eval_runtime": 10.3069, |
|
"eval_samples_per_second": 763.466, |
|
"eval_steps_per_second": 3.008, |
|
"step": 1794 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.9789229115897498, |
|
"eval_f1": 0.9781592282543133, |
|
"eval_loss": 0.10217240452766418, |
|
"eval_precision": 0.97808031838472, |
|
"eval_recall": 0.9782381508575644, |
|
"eval_runtime": 10.3246, |
|
"eval_samples_per_second": 762.158, |
|
"eval_steps_per_second": 3.003, |
|
"step": 1863 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.9790061604129142, |
|
"eval_f1": 0.9782287156594198, |
|
"eval_loss": 0.10301286727190018, |
|
"eval_precision": 0.9781347715521547, |
|
"eval_recall": 0.9783226778139792, |
|
"eval_runtime": 10.6685, |
|
"eval_samples_per_second": 737.591, |
|
"eval_steps_per_second": 2.906, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 28.98, |
|
"learning_rate": 1.6814159292035402e-05, |
|
"loss": 0.0226, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.9789456158142492, |
|
"eval_f1": 0.9781276533619175, |
|
"eval_loss": 0.10546565800905228, |
|
"eval_precision": 0.9780863177791267, |
|
"eval_recall": 0.9781689924386795, |
|
"eval_runtime": 10.2106, |
|
"eval_samples_per_second": 770.668, |
|
"eval_steps_per_second": 3.036, |
|
"step": 2001 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.9788775031407511, |
|
"eval_f1": 0.9781016850177108, |
|
"eval_loss": 0.10569430887699127, |
|
"eval_precision": 0.9780190230335438, |
|
"eval_recall": 0.9781843609762095, |
|
"eval_runtime": 10.1623, |
|
"eval_samples_per_second": 774.335, |
|
"eval_steps_per_second": 3.051, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.9788169585420861, |
|
"eval_f1": 0.9780400473314586, |
|
"eval_loss": 0.10669872909784317, |
|
"eval_precision": 0.9779649036540766, |
|
"eval_recall": 0.9781152025573246, |
|
"eval_runtime": 10.228, |
|
"eval_samples_per_second": 769.357, |
|
"eval_steps_per_second": 3.031, |
|
"step": 2139 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.9788850712155842, |
|
"eval_f1": 0.9781134626983792, |
|
"eval_loss": 0.10771454125642776, |
|
"eval_precision": 0.9780195296594217, |
|
"eval_recall": 0.9782074137825044, |
|
"eval_runtime": 10.2465, |
|
"eval_samples_per_second": 767.969, |
|
"eval_steps_per_second": 3.025, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.9788547989162517, |
|
"eval_f1": 0.9780702765419577, |
|
"eval_loss": 0.10846679657697678, |
|
"eval_precision": 0.9780176719170188, |
|
"eval_recall": 0.9781228868260896, |
|
"eval_runtime": 10.262, |
|
"eval_samples_per_second": 766.809, |
|
"eval_steps_per_second": 3.021, |
|
"step": 2277 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.9789153435149167, |
|
"eval_f1": 0.9781402710760058, |
|
"eval_loss": 0.10942833125591278, |
|
"eval_precision": 0.9780500921942225, |
|
"eval_recall": 0.9782304665887994, |
|
"eval_runtime": 10.2289, |
|
"eval_samples_per_second": 769.29, |
|
"eval_steps_per_second": 3.031, |
|
"step": 2346 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.9791272496102441, |
|
"eval_f1": 0.9783318606170041, |
|
"eval_loss": 0.10954407602548599, |
|
"eval_precision": 0.9782642100895862, |
|
"eval_recall": 0.978399520501629, |
|
"eval_runtime": 10.2671, |
|
"eval_samples_per_second": 766.432, |
|
"eval_steps_per_second": 3.019, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.9788775031407511, |
|
"eval_f1": 0.9780747081173908, |
|
"eval_loss": 0.11010610312223434, |
|
"eval_precision": 0.9779958050661893, |
|
"eval_recall": 0.9781536239011496, |
|
"eval_runtime": 10.1692, |
|
"eval_samples_per_second": 773.807, |
|
"eval_steps_per_second": 3.048, |
|
"step": 2484 |
|
}, |
|
{ |
|
"epoch": 36.23, |
|
"learning_rate": 5.752212389380531e-06, |
|
"loss": 0.0159, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.9791045453857448, |
|
"eval_f1": 0.9783088094048946, |
|
"eval_loss": 0.11143232136964798, |
|
"eval_precision": 0.9782411604714415, |
|
"eval_recall": 0.9783764676953342, |
|
"eval_runtime": 10.184, |
|
"eval_samples_per_second": 772.684, |
|
"eval_steps_per_second": 3.044, |
|
"step": 2553 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.9791272496102441, |
|
"eval_f1": 0.9783286015589884, |
|
"eval_loss": 0.11111290007829666, |
|
"eval_precision": 0.9782346478591898, |
|
"eval_recall": 0.978422573307924, |
|
"eval_runtime": 10.011, |
|
"eval_samples_per_second": 786.035, |
|
"eval_steps_per_second": 3.097, |
|
"step": 2622 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.9790894092360786, |
|
"eval_f1": 0.9782975339329141, |
|
"eval_loss": 0.11137838661670685, |
|
"eval_precision": 0.978218612905952, |
|
"eval_recall": 0.9783764676953342, |
|
"eval_runtime": 10.1814, |
|
"eval_samples_per_second": 772.882, |
|
"eval_steps_per_second": 3.045, |
|
"step": 2691 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.9791272496102441, |
|
"eval_f1": 0.9783398772157638, |
|
"eval_loss": 0.1115424633026123, |
|
"eval_precision": 0.9782571951013384, |
|
"eval_recall": 0.978422573307924, |
|
"eval_runtime": 10.2449, |
|
"eval_samples_per_second": 768.087, |
|
"eval_steps_per_second": 3.026, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"step": 2760, |
|
"total_flos": 1.3845487354146643e+17, |
|
"train_loss": 0.24823836001796998, |
|
"train_runtime": 2048.5615, |
|
"train_samples_per_second": 1382.668, |
|
"train_steps_per_second": 1.347 |
|
} |
|
], |
|
"max_steps": 2760, |
|
"num_train_epochs": 40, |
|
"total_flos": 1.3845487354146643e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|