diff --git "a/last-checkpoint/trainer_state.json" "b/last-checkpoint/trainer_state.json" --- "a/last-checkpoint/trainer_state.json" +++ "b/last-checkpoint/trainer_state.json" @@ -1,9 +1,9 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.9015680844898091, + "epoch": 0.9999678011398396, "eval_steps": 5, - "global_step": 3500, + "global_step": 3882, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -36424,6 +36424,3972 @@ "eval_samples_per_second": 2.587, "eval_steps_per_second": 0.176, "step": 3500 + }, + { + "epoch": 0.9018256753710918, + "grad_norm": 0.14090469611175638, + "learning_rate": 2.90695757911017e-06, + "loss": 0.3593, + "step": 3501 + }, + { + "epoch": 0.9020832662523747, + "grad_norm": 0.14977731555923768, + "learning_rate": 2.8918666204450106e-06, + "loss": 0.3779, + "step": 3502 + }, + { + "epoch": 0.9023408571336574, + "grad_norm": 0.14409493620439406, + "learning_rate": 2.8768137682253837e-06, + "loss": 0.3847, + "step": 3503 + }, + { + "epoch": 0.9025984480149403, + "grad_norm": 0.17852245830856514, + "learning_rate": 2.8617990346277657e-06, + "loss": 0.3825, + "step": 3504 + }, + { + "epoch": 0.9028560388962231, + "grad_norm": 0.2081650506236442, + "learning_rate": 2.8468224317977743e-06, + "loss": 0.4107, + "step": 3505 + }, + { + "epoch": 0.9028560388962231, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8862275449101796, + "eval_PRM F1 AUC": 0.7283918281822945, + "eval_PRM F1 AUC (fixed)": 0.858040859088528, + "eval_PRM F1 Neg": 0.5777777777777777, + "eval_PRM NPV": 0.5909090909090909, + "eval_PRM Precision": 0.8809523809523809, + "eval_PRM Recall": 0.891566265060241, + "eval_PRM Specificty": 0.5652173913043478, + "eval_loss": 0.3613725006580353, + "eval_runtime": 17.0225, + "eval_samples_per_second": 2.585, + "eval_steps_per_second": 0.176, + "step": 3505 + }, + { + "epoch": 0.9031136297775059, + "grad_norm": 0.1304410803037061, + "learning_rate": 2.831883971850213e-06, + "loss": 0.3199, + "step": 3506 + }, + { + "epoch": 0.9033712206587887, + "grad_norm": 0.3183315368975275, + "learning_rate": 2.8169836668690165e-06, + "loss": 0.517, + "step": 3507 + }, + { + "epoch": 0.9036288115400715, + "grad_norm": 0.14265599880347749, + "learning_rate": 2.802121528907242e-06, + "loss": 0.3872, + "step": 3508 + }, + { + "epoch": 0.9038864024213543, + "grad_norm": 0.15673814385728924, + "learning_rate": 2.787297569987107e-06, + "loss": 0.3693, + "step": 3509 + }, + { + "epoch": 0.9041439933026371, + "grad_norm": 0.17147029433004354, + "learning_rate": 2.7725118020998973e-06, + "loss": 0.4265, + "step": 3510 + }, + { + "epoch": 0.9041439933026371, + "eval_PRM Accuracy": 0.8301886792452831, + "eval_PRM F1": 0.891566265060241, + "eval_PRM F1 AUC": 0.7501309586170771, + "eval_PRM F1 AUC (fixed)": 0.8580408590885279, + "eval_PRM F1 Neg": 0.6086956521739131, + "eval_PRM NPV": 0.6086956521739131, + "eval_PRM Precision": 0.891566265060241, + "eval_PRM Recall": 0.891566265060241, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3582652807235718, + "eval_runtime": 17.1274, + "eval_samples_per_second": 2.569, + "eval_steps_per_second": 0.175, + "step": 3510 + }, + { + "epoch": 0.9044015841839199, + "grad_norm": 0.1294599974767688, + "learning_rate": 2.7577642372060673e-06, + "loss": 0.3366, + "step": 3511 + }, + { + "epoch": 0.9046591750652027, + "grad_norm": 0.10739872504317904, + "learning_rate": 2.7430548872351135e-06, + "loss": 0.2895, + "step": 3512 + }, + { + "epoch": 0.9049167659464855, + "grad_norm": 0.1337098599209687, + "learning_rate": 2.7283837640856346e-06, + "loss": 0.3331, + "step": 3513 + }, + { + "epoch": 0.9051743568277683, + "grad_norm": 0.15326670322972885, + "learning_rate": 2.7137508796253208e-06, + "loss": 0.3547, + "step": 3514 + }, + { + "epoch": 0.9054319477090511, + "grad_norm": 0.17439375071989902, + "learning_rate": 2.6991562456909205e-06, + "loss": 0.4062, + "step": 3515 + }, + { + "epoch": 0.9054319477090511, + "eval_PRM Accuracy": 0.8113207547169812, + "eval_PRM F1": 0.8780487804878049, + "eval_PRM F1 AUC": 0.7380827658459927, + "eval_PRM F1 AUC (fixed)": 0.8543740178103719, + "eval_PRM F1 Neg": 0.5833333333333334, + "eval_PRM NPV": 0.56, + "eval_PRM Precision": 0.8888888888888888, + "eval_PRM Recall": 0.8674698795180723, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3610174059867859, + "eval_runtime": 17.031, + "eval_samples_per_second": 2.584, + "eval_steps_per_second": 0.176, + "step": 3515 + }, + { + "epoch": 0.9056895385903339, + "grad_norm": 0.14757466196871588, + "learning_rate": 2.684599874088256e-06, + "loss": 0.4083, + "step": 3516 + }, + { + "epoch": 0.9059471294716167, + "grad_norm": 0.13038628864544433, + "learning_rate": 2.670081776592165e-06, + "loss": 0.3017, + "step": 3517 + }, + { + "epoch": 0.9062047203528996, + "grad_norm": 0.1362571289704275, + "learning_rate": 2.6556019649465525e-06, + "loss": 0.2883, + "step": 3518 + }, + { + "epoch": 0.9064623112341823, + "grad_norm": 0.19400127869035802, + "learning_rate": 2.641160450864355e-06, + "loss": 0.455, + "step": 3519 + }, + { + "epoch": 0.9067199021154652, + "grad_norm": 0.1487023689144612, + "learning_rate": 2.626757246027506e-06, + "loss": 0.3572, + "step": 3520 + }, + { + "epoch": 0.9067199021154652, + "eval_PRM Accuracy": 0.8113207547169812, + "eval_PRM F1": 0.8780487804878049, + "eval_PRM F1 AUC": 0.7380827658459927, + "eval_PRM F1 AUC (fixed)": 0.8559455212152959, + "eval_PRM F1 Neg": 0.5833333333333334, + "eval_PRM NPV": 0.56, + "eval_PRM Precision": 0.8888888888888888, + "eval_PRM Recall": 0.8674698795180723, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.36106178164482117, + "eval_runtime": 17.6175, + "eval_samples_per_second": 2.498, + "eval_steps_per_second": 0.17, + "step": 3520 + }, + { + "epoch": 0.9069774929967479, + "grad_norm": 0.1758953345140761, + "learning_rate": 2.6123923620869795e-06, + "loss": 0.4051, + "step": 3521 + }, + { + "epoch": 0.9072350838780308, + "grad_norm": 0.1603008294335897, + "learning_rate": 2.59806581066272e-06, + "loss": 0.3914, + "step": 3522 + }, + { + "epoch": 0.9074926747593135, + "grad_norm": 0.14475537508184771, + "learning_rate": 2.5837776033436954e-06, + "loss": 0.3153, + "step": 3523 + }, + { + "epoch": 0.9077502656405964, + "grad_norm": 0.1312022526870313, + "learning_rate": 2.5695277516878336e-06, + "loss": 0.2982, + "step": 3524 + }, + { + "epoch": 0.9080078565218791, + "grad_norm": 0.21749204403301972, + "learning_rate": 2.5553162672220465e-06, + "loss": 0.3939, + "step": 3525 + }, + { + "epoch": 0.9080078565218791, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.8533263488737559, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3616832494735718, + "eval_runtime": 16.4985, + "eval_samples_per_second": 2.667, + "eval_steps_per_second": 0.182, + "step": 3525 + }, + { + "epoch": 0.908265447403162, + "grad_norm": 0.15509138928369606, + "learning_rate": 2.5411431614422053e-06, + "loss": 0.3359, + "step": 3526 + }, + { + "epoch": 0.9085230382844447, + "grad_norm": 0.12699302222708414, + "learning_rate": 2.5270084458131395e-06, + "loss": 0.2976, + "step": 3527 + }, + { + "epoch": 0.9087806291657275, + "grad_norm": 0.1781909885669849, + "learning_rate": 2.5129121317686356e-06, + "loss": 0.3845, + "step": 3528 + }, + { + "epoch": 0.9090382200470103, + "grad_norm": 0.15216059727780382, + "learning_rate": 2.4988542307113837e-06, + "loss": 0.3444, + "step": 3529 + }, + { + "epoch": 0.9092958109282931, + "grad_norm": 0.16200325190532375, + "learning_rate": 2.4848347540130434e-06, + "loss": 0.2597, + "step": 3530 + }, + { + "epoch": 0.9092958109282931, + "eval_PRM Accuracy": 0.8113207547169812, + "eval_PRM F1": 0.8780487804878049, + "eval_PRM F1 AUC": 0.7380827658459927, + "eval_PRM F1 AUC (fixed)": 0.8530644316396019, + "eval_PRM F1 Neg": 0.5833333333333334, + "eval_PRM NPV": 0.56, + "eval_PRM Precision": 0.8888888888888888, + "eval_PRM Recall": 0.8674698795180723, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.36110618710517883, + "eval_runtime": 16.989, + "eval_samples_per_second": 2.59, + "eval_steps_per_second": 0.177, + "step": 3530 + }, + { + "epoch": 0.9095534018095759, + "grad_norm": 0.15168951575416167, + "learning_rate": 2.4708537130141485e-06, + "loss": 0.3409, + "step": 3531 + }, + { + "epoch": 0.9098109926908587, + "grad_norm": 0.15496414073672882, + "learning_rate": 2.45691111902418e-06, + "loss": 0.3449, + "step": 3532 + }, + { + "epoch": 0.9100685835721416, + "grad_norm": 0.3104512486482539, + "learning_rate": 2.4430069833214996e-06, + "loss": 0.4948, + "step": 3533 + }, + { + "epoch": 0.9103261744534243, + "grad_norm": 0.20727427059037964, + "learning_rate": 2.429141317153355e-06, + "loss": 0.455, + "step": 3534 + }, + { + "epoch": 0.9105837653347072, + "grad_norm": 0.1696170445047076, + "learning_rate": 2.415314131735885e-06, + "loss": 0.368, + "step": 3535 + }, + { + "epoch": 0.9105837653347072, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.8567312729177579, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3587535619735718, + "eval_runtime": 16.9861, + "eval_samples_per_second": 2.59, + "eval_steps_per_second": 0.177, + "step": 3535 + }, + { + "epoch": 0.9108413562159899, + "grad_norm": 0.16578645048029902, + "learning_rate": 2.4015254382541043e-06, + "loss": 0.3967, + "step": 3536 + }, + { + "epoch": 0.9110989470972728, + "grad_norm": 0.14286368348379852, + "learning_rate": 2.38777524786189e-06, + "loss": 0.2688, + "step": 3537 + }, + { + "epoch": 0.9113565379785555, + "grad_norm": 0.12512391214687887, + "learning_rate": 2.374063571681956e-06, + "loss": 0.3187, + "step": 3538 + }, + { + "epoch": 0.9116141288598384, + "grad_norm": 0.17653831694965627, + "learning_rate": 2.360390420805869e-06, + "loss": 0.391, + "step": 3539 + }, + { + "epoch": 0.9118717197411211, + "grad_norm": 0.14848179498309094, + "learning_rate": 2.34675580629406e-06, + "loss": 0.4207, + "step": 3540 + }, + { + "epoch": 0.9118717197411211, + "eval_PRM Accuracy": 0.8113207547169812, + "eval_PRM F1": 0.8780487804878049, + "eval_PRM F1 AUC": 0.7380827658459927, + "eval_PRM F1 AUC (fixed)": 0.8517548454688318, + "eval_PRM F1 Neg": 0.5833333333333334, + "eval_PRM NPV": 0.56, + "eval_PRM Precision": 0.8888888888888888, + "eval_PRM Recall": 0.8674698795180723, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35910865664482117, + "eval_runtime": 17.5619, + "eval_samples_per_second": 2.505, + "eval_steps_per_second": 0.171, + "step": 3540 + }, + { + "epoch": 0.912129310622404, + "grad_norm": 0.17358589157100177, + "learning_rate": 2.3331597391757442e-06, + "loss": 0.2874, + "step": 3541 + }, + { + "epoch": 0.9123869015036867, + "grad_norm": 0.17014683888786977, + "learning_rate": 2.3196022304489864e-06, + "loss": 0.3243, + "step": 3542 + }, + { + "epoch": 0.9126444923849696, + "grad_norm": 0.219578585265958, + "learning_rate": 2.306083291080641e-06, + "loss": 0.4471, + "step": 3543 + }, + { + "epoch": 0.9129020832662523, + "grad_norm": 0.13052088921236898, + "learning_rate": 2.292602932006377e-06, + "loss": 0.3245, + "step": 3544 + }, + { + "epoch": 0.9131596741475352, + "grad_norm": 0.1485674479766076, + "learning_rate": 2.2791611641306555e-06, + "loss": 0.305, + "step": 3545 + }, + { + "epoch": 0.9131596741475352, + "eval_PRM Accuracy": 0.8113207547169812, + "eval_PRM F1": 0.8780487804878049, + "eval_PRM F1 AUC": 0.7380827658459927, + "eval_PRM F1 AUC (fixed)": 0.8533263488737558, + "eval_PRM F1 Neg": 0.5833333333333334, + "eval_PRM NPV": 0.56, + "eval_PRM Precision": 0.8888888888888888, + "eval_PRM Recall": 0.8674698795180723, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3580433130264282, + "eval_runtime": 17.4401, + "eval_samples_per_second": 2.523, + "eval_steps_per_second": 0.172, + "step": 3545 + }, + { + "epoch": 0.9134172650288179, + "grad_norm": 0.28276776229349954, + "learning_rate": 2.2657579983267064e-06, + "loss": 0.4108, + "step": 3546 + }, + { + "epoch": 0.9136748559101008, + "grad_norm": 0.15989949777721657, + "learning_rate": 2.252393445436546e-06, + "loss": 0.429, + "step": 3547 + }, + { + "epoch": 0.9139324467913836, + "grad_norm": 0.16830071915691178, + "learning_rate": 2.239067516270954e-06, + "loss": 0.3874, + "step": 3548 + }, + { + "epoch": 0.9141900376726664, + "grad_norm": 0.14365545128953014, + "learning_rate": 2.2257802216094746e-06, + "loss": 0.3974, + "step": 3549 + }, + { + "epoch": 0.9144476285539492, + "grad_norm": 0.10287303681402393, + "learning_rate": 2.212531572200377e-06, + "loss": 0.298, + "step": 3550 + }, + { + "epoch": 0.9144476285539492, + "eval_PRM Accuracy": 0.8113207547169812, + "eval_PRM F1": 0.8780487804878049, + "eval_PRM F1 AUC": 0.7380827658459927, + "eval_PRM F1 AUC (fixed)": 0.8509690937663698, + "eval_PRM F1 Neg": 0.5833333333333334, + "eval_PRM NPV": 0.56, + "eval_PRM Precision": 0.8888888888888888, + "eval_PRM Recall": 0.8674698795180723, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3582652807235718, + "eval_runtime": 16.8653, + "eval_samples_per_second": 2.609, + "eval_steps_per_second": 0.178, + "step": 3550 + }, + { + "epoch": 0.914705219435232, + "grad_norm": 0.1414417999527504, + "learning_rate": 2.1993215787606936e-06, + "loss": 0.3516, + "step": 3551 + }, + { + "epoch": 0.9149628103165148, + "grad_norm": 0.2086615373938124, + "learning_rate": 2.186150251976171e-06, + "loss": 0.4494, + "step": 3552 + }, + { + "epoch": 0.9152204011977976, + "grad_norm": 0.15183181236880697, + "learning_rate": 2.1730176025012816e-06, + "loss": 0.3249, + "step": 3553 + }, + { + "epoch": 0.9154779920790804, + "grad_norm": 0.17879774948444485, + "learning_rate": 2.1599236409592327e-06, + "loss": 0.3687, + "step": 3554 + }, + { + "epoch": 0.9157355829603632, + "grad_norm": 0.22149387381725968, + "learning_rate": 2.1468683779418965e-06, + "loss": 0.4342, + "step": 3555 + }, + { + "epoch": 0.9157355829603632, + "eval_PRM Accuracy": 0.8113207547169812, + "eval_PRM F1": 0.8780487804878049, + "eval_PRM F1 AUC": 0.7380827658459927, + "eval_PRM F1 AUC (fixed)": 0.8499214248297537, + "eval_PRM F1 Neg": 0.5833333333333334, + "eval_PRM NPV": 0.56, + "eval_PRM Precision": 0.8888888888888888, + "eval_PRM Recall": 0.8674698795180723, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3606622815132141, + "eval_runtime": 17.747, + "eval_samples_per_second": 2.479, + "eval_steps_per_second": 0.169, + "step": 3555 + }, + { + "epoch": 0.915993173841646, + "grad_norm": 0.15702597463200188, + "learning_rate": 2.1338518240098694e-06, + "loss": 0.4099, + "step": 3556 + }, + { + "epoch": 0.9162507647229288, + "grad_norm": 0.15451074192970757, + "learning_rate": 2.1208739896924345e-06, + "loss": 0.3728, + "step": 3557 + }, + { + "epoch": 0.9165083556042116, + "grad_norm": 0.15794402817072664, + "learning_rate": 2.1079348854875505e-06, + "loss": 0.3392, + "step": 3558 + }, + { + "epoch": 0.9167659464854944, + "grad_norm": 0.19678681465612674, + "learning_rate": 2.0950345218618328e-06, + "loss": 0.3692, + "step": 3559 + }, + { + "epoch": 0.9170235373667772, + "grad_norm": 0.1361028667540031, + "learning_rate": 2.082172909250568e-06, + "loss": 0.3296, + "step": 3560 + }, + { + "epoch": 0.9170235373667772, + "eval_PRM Accuracy": 0.8113207547169812, + "eval_PRM F1": 0.8780487804878049, + "eval_PRM F1 AUC": 0.7380827658459927, + "eval_PRM F1 AUC (fixed)": 0.858040859088528, + "eval_PRM F1 Neg": 0.5833333333333334, + "eval_PRM NPV": 0.56, + "eval_PRM Precision": 0.8888888888888888, + "eval_PRM Recall": 0.8674698795180723, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35537996888160706, + "eval_runtime": 16.9677, + "eval_samples_per_second": 2.593, + "eval_steps_per_second": 0.177, + "step": 3560 + }, + { + "epoch": 0.9172811282480601, + "grad_norm": 0.12828516715375252, + "learning_rate": 2.069350058057723e-06, + "loss": 0.284, + "step": 3561 + }, + { + "epoch": 0.9175387191293428, + "grad_norm": 0.15589654522386157, + "learning_rate": 2.056565978655861e-06, + "loss": 0.2953, + "step": 3562 + }, + { + "epoch": 0.9177963100106257, + "grad_norm": 0.1922738703624335, + "learning_rate": 2.043820681386227e-06, + "loss": 0.3538, + "step": 3563 + }, + { + "epoch": 0.9180539008919084, + "grad_norm": 0.20272910116447893, + "learning_rate": 2.0311141765586573e-06, + "loss": 0.4282, + "step": 3564 + }, + { + "epoch": 0.9183114917731913, + "grad_norm": 0.16346346255570265, + "learning_rate": 2.0184464744516353e-06, + "loss": 0.3704, + "step": 3565 + }, + { + "epoch": 0.9183114917731913, + "eval_PRM Accuracy": 0.8113207547169812, + "eval_PRM F1": 0.8780487804878049, + "eval_PRM F1 AUC": 0.7380827658459927, + "eval_PRM F1 AUC (fixed)": 0.8528025144054479, + "eval_PRM F1 Neg": 0.5833333333333334, + "eval_PRM NPV": 0.56, + "eval_PRM Precision": 0.8888888888888888, + "eval_PRM Recall": 0.8674698795180723, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35910865664482117, + "eval_runtime": 17.618, + "eval_samples_per_second": 2.497, + "eval_steps_per_second": 0.17, + "step": 3565 + }, + { + "epoch": 0.918569082654474, + "grad_norm": 0.1457485536576306, + "learning_rate": 2.005817585312253e-06, + "loss": 0.334, + "step": 3566 + }, + { + "epoch": 0.9188266735357569, + "grad_norm": 0.15443730771182385, + "learning_rate": 1.993227519356189e-06, + "loss": 0.3371, + "step": 3567 + }, + { + "epoch": 0.9190842644170396, + "grad_norm": 0.1680000043483095, + "learning_rate": 1.9806762867677296e-06, + "loss": 0.4377, + "step": 3568 + }, + { + "epoch": 0.9193418552983225, + "grad_norm": 0.14717374054767854, + "learning_rate": 1.9681638976997486e-06, + "loss": 0.3451, + "step": 3569 + }, + { + "epoch": 0.9195994461796052, + "grad_norm": 0.1242823379036716, + "learning_rate": 1.95569036227371e-06, + "loss": 0.3423, + "step": 3570 + }, + { + "epoch": 0.9195994461796052, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.85620743844945, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3561345934867859, + "eval_runtime": 17.0785, + "eval_samples_per_second": 2.576, + "eval_steps_per_second": 0.176, + "step": 3570 + }, + { + "epoch": 0.9198570370608881, + "grad_norm": 0.1169699351085616, + "learning_rate": 1.943255690579615e-06, + "loss": 0.2595, + "step": 3571 + }, + { + "epoch": 0.9201146279421708, + "grad_norm": 0.11198757782737513, + "learning_rate": 1.9308598926760625e-06, + "loss": 0.3334, + "step": 3572 + }, + { + "epoch": 0.9203722188234537, + "grad_norm": 0.14045385474378944, + "learning_rate": 1.9185029785901867e-06, + "loss": 0.3475, + "step": 3573 + }, + { + "epoch": 0.9206298097047364, + "grad_norm": 0.1654475938787749, + "learning_rate": 1.906184958317664e-06, + "loss": 0.3333, + "step": 3574 + }, + { + "epoch": 0.9208874005860193, + "grad_norm": 0.16327096309656713, + "learning_rate": 1.8939058418227406e-06, + "loss": 0.4313, + "step": 3575 + }, + { + "epoch": 0.9208874005860193, + "eval_PRM Accuracy": 0.8301886792452831, + "eval_PRM F1": 0.891566265060241, + "eval_PRM F1 AUC": 0.7501309586170771, + "eval_PRM F1 AUC (fixed)": 0.8548978522786799, + "eval_PRM F1 Neg": 0.6086956521739131, + "eval_PRM NPV": 0.6086956521739131, + "eval_PRM Precision": 0.891566265060241, + "eval_PRM Recall": 0.891566265060241, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35595703125, + "eval_runtime": 17.2808, + "eval_samples_per_second": 2.546, + "eval_steps_per_second": 0.174, + "step": 3575 + }, + { + "epoch": 0.9211449914673021, + "grad_norm": 0.17906720956662106, + "learning_rate": 1.8816656390381437e-06, + "loss": 0.3375, + "step": 3576 + }, + { + "epoch": 0.9214025823485849, + "grad_norm": 0.17656544233826593, + "learning_rate": 1.8694643598651695e-06, + "loss": 0.3924, + "step": 3577 + }, + { + "epoch": 0.9216601732298677, + "grad_norm": 0.13320038375003498, + "learning_rate": 1.8573020141736008e-06, + "loss": 0.3321, + "step": 3578 + }, + { + "epoch": 0.9219177641111505, + "grad_norm": 0.15680397389075254, + "learning_rate": 1.8451786118017234e-06, + "loss": 0.3439, + "step": 3579 + }, + { + "epoch": 0.9221753549924333, + "grad_norm": 0.1473731597244704, + "learning_rate": 1.8330941625563535e-06, + "loss": 0.4521, + "step": 3580 + }, + { + "epoch": 0.9221753549924333, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.8575170246202201, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35458096861839294, + "eval_runtime": 16.9061, + "eval_samples_per_second": 2.603, + "eval_steps_per_second": 0.177, + "step": 3580 + }, + { + "epoch": 0.922432945873716, + "grad_norm": 0.13627264372745382, + "learning_rate": 1.8210486762127499e-06, + "loss": 0.3084, + "step": 3581 + }, + { + "epoch": 0.9226905367549989, + "grad_norm": 0.16310370490610962, + "learning_rate": 1.8090421625147013e-06, + "loss": 0.3037, + "step": 3582 + }, + { + "epoch": 0.9229481276362816, + "grad_norm": 0.14387730487937045, + "learning_rate": 1.7970746311744336e-06, + "loss": 0.4041, + "step": 3583 + }, + { + "epoch": 0.9232057185175645, + "grad_norm": 0.14068108820423889, + "learning_rate": 1.7851460918726748e-06, + "loss": 0.3223, + "step": 3584 + }, + { + "epoch": 0.9234633093988472, + "grad_norm": 0.1302142737116632, + "learning_rate": 1.7732565542585678e-06, + "loss": 0.315, + "step": 3585 + }, + { + "epoch": 0.9234633093988472, + "eval_PRM Accuracy": 0.8301886792452831, + "eval_PRM F1": 0.891566265060241, + "eval_PRM F1 AUC": 0.7501309586170771, + "eval_PRM F1 AUC (fixed)": 0.856993190151912, + "eval_PRM F1 Neg": 0.6086956521739131, + "eval_PRM NPV": 0.6086956521739131, + "eval_PRM Precision": 0.891566265060241, + "eval_PRM Recall": 0.891566265060241, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35946378111839294, + "eval_runtime": 17.6897, + "eval_samples_per_second": 2.487, + "eval_steps_per_second": 0.17, + "step": 3585 + }, + { + "epoch": 0.9237209002801301, + "grad_norm": 0.1446479054321807, + "learning_rate": 1.7614060279497413e-06, + "loss": 0.3067, + "step": 3586 + }, + { + "epoch": 0.9239784911614128, + "grad_norm": 0.13510928230906516, + "learning_rate": 1.7495945225322607e-06, + "loss": 0.2805, + "step": 3587 + }, + { + "epoch": 0.9242360820426957, + "grad_norm": 0.15398682236678907, + "learning_rate": 1.737822047560611e-06, + "loss": 0.3182, + "step": 3588 + }, + { + "epoch": 0.9244936729239784, + "grad_norm": 0.1598075897613883, + "learning_rate": 1.7260886125577247e-06, + "loss": 0.4169, + "step": 3589 + }, + { + "epoch": 0.9247512638052613, + "grad_norm": 0.15869750034866761, + "learning_rate": 1.7143942270149427e-06, + "loss": 0.3449, + "step": 3590 + }, + { + "epoch": 0.9247512638052613, + "eval_PRM Accuracy": 0.8301886792452831, + "eval_PRM F1": 0.891566265060241, + "eval_PRM F1 AUC": 0.7501309586170771, + "eval_PRM F1 AUC (fixed)": 0.8585646935568361, + "eval_PRM F1 Neg": 0.6086956521739131, + "eval_PRM NPV": 0.6086956521739131, + "eval_PRM Precision": 0.891566265060241, + "eval_PRM Recall": 0.891566265060241, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35830965638160706, + "eval_runtime": 16.3997, + "eval_samples_per_second": 2.683, + "eval_steps_per_second": 0.183, + "step": 3590 + }, + { + "epoch": 0.9250088546865441, + "grad_norm": 0.15858603689819276, + "learning_rate": 1.7027389003920313e-06, + "loss": 0.3533, + "step": 3591 + }, + { + "epoch": 0.9252664455678269, + "grad_norm": 0.15268138246759289, + "learning_rate": 1.6911226421171378e-06, + "loss": 0.3639, + "step": 3592 + }, + { + "epoch": 0.9255240364491097, + "grad_norm": 0.1692437619504843, + "learning_rate": 1.6795454615868234e-06, + "loss": 0.3816, + "step": 3593 + }, + { + "epoch": 0.9257816273303925, + "grad_norm": 0.16953837399894867, + "learning_rate": 1.6680073681660358e-06, + "loss": 0.401, + "step": 3594 + }, + { + "epoch": 0.9260392182116753, + "grad_norm": 0.19482660347644706, + "learning_rate": 1.656508371188109e-06, + "loss": 0.2827, + "step": 3595 + }, + { + "epoch": 0.9260392182116753, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.859874279727606, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35520240664482117, + "eval_runtime": 17.2187, + "eval_samples_per_second": 2.555, + "eval_steps_per_second": 0.174, + "step": 3595 + }, + { + "epoch": 0.9262968090929581, + "grad_norm": 0.13849159515883916, + "learning_rate": 1.6450484799547473e-06, + "loss": 0.4034, + "step": 3596 + }, + { + "epoch": 0.9265543999742409, + "grad_norm": 0.17046521701031161, + "learning_rate": 1.6336277037360127e-06, + "loss": 0.3568, + "step": 3597 + }, + { + "epoch": 0.9268119908555237, + "grad_norm": 0.17147074585372724, + "learning_rate": 1.622246051770343e-06, + "loss": 0.4252, + "step": 3598 + }, + { + "epoch": 0.9270695817368065, + "grad_norm": 0.17687896763490749, + "learning_rate": 1.6109035332645183e-06, + "loss": 0.3575, + "step": 3599 + }, + { + "epoch": 0.9273271726180893, + "grad_norm": 0.1830855633473594, + "learning_rate": 1.5996001573936604e-06, + "loss": 0.3733, + "step": 3600 + }, + { + "epoch": 0.9273271726180893, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.8593504452592982, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3565784692764282, + "eval_runtime": 16.8833, + "eval_samples_per_second": 2.606, + "eval_steps_per_second": 0.178, + "step": 3600 + }, + { + "epoch": 0.9275847634993721, + "grad_norm": 0.17321490179978577, + "learning_rate": 1.5883359333012438e-06, + "loss": 0.3897, + "step": 3601 + }, + { + "epoch": 0.9278423543806549, + "grad_norm": 0.15898382741093173, + "learning_rate": 1.5771108700990412e-06, + "loss": 0.3738, + "step": 3602 + }, + { + "epoch": 0.9280999452619377, + "grad_norm": 0.1391967307095472, + "learning_rate": 1.5659249768671835e-06, + "loss": 0.3337, + "step": 3603 + }, + { + "epoch": 0.9283575361432206, + "grad_norm": 0.12481251556632249, + "learning_rate": 1.5547782626540941e-06, + "loss": 0.3264, + "step": 3604 + }, + { + "epoch": 0.9286151270245033, + "grad_norm": 0.1582670706657496, + "learning_rate": 1.5436707364765213e-06, + "loss": 0.3687, + "step": 3605 + }, + { + "epoch": 0.9286151270245033, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.8606600314300681, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3534712493419647, + "eval_runtime": 17.7365, + "eval_samples_per_second": 2.481, + "eval_steps_per_second": 0.169, + "step": 3605 + }, + { + "epoch": 0.9288727179057862, + "grad_norm": 0.1962104659388427, + "learning_rate": 1.5326024073194834e-06, + "loss": 0.4026, + "step": 3606 + }, + { + "epoch": 0.9291303087870689, + "grad_norm": 0.13502688190956327, + "learning_rate": 1.5215732841363239e-06, + "loss": 0.2946, + "step": 3607 + }, + { + "epoch": 0.9293878996683518, + "grad_norm": 0.11718572355068893, + "learning_rate": 1.5105833758486621e-06, + "loss": 0.298, + "step": 3608 + }, + { + "epoch": 0.9296454905496345, + "grad_norm": 0.16677425259863735, + "learning_rate": 1.4996326913463754e-06, + "loss": 0.2994, + "step": 3609 + }, + { + "epoch": 0.9299030814309174, + "grad_norm": 0.15552722462552468, + "learning_rate": 1.4887212394876503e-06, + "loss": 0.3641, + "step": 3610 + }, + { + "epoch": 0.9299030814309174, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.8593504452592982, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3546253442764282, + "eval_runtime": 17.1911, + "eval_samples_per_second": 2.559, + "eval_steps_per_second": 0.175, + "step": 3610 + }, + { + "epoch": 0.9301606723122001, + "grad_norm": 0.1307387602339093, + "learning_rate": 1.4778490290988934e-06, + "loss": 0.3407, + "step": 3611 + }, + { + "epoch": 0.930418263193483, + "grad_norm": 0.2251240226399944, + "learning_rate": 1.467016068974819e-06, + "loss": 0.4252, + "step": 3612 + }, + { + "epoch": 0.9306758540747657, + "grad_norm": 0.1225706200048114, + "learning_rate": 1.4562223678783516e-06, + "loss": 0.2744, + "step": 3613 + }, + { + "epoch": 0.9309334449560486, + "grad_norm": 0.14564700385711848, + "learning_rate": 1.4454679345406619e-06, + "loss": 0.3934, + "step": 3614 + }, + { + "epoch": 0.9311910358373313, + "grad_norm": 0.16876178194128658, + "learning_rate": 1.4347527776611746e-06, + "loss": 0.3533, + "step": 3615 + }, + { + "epoch": 0.9311910358373313, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.859350445259298, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3512073755264282, + "eval_runtime": 17.5668, + "eval_samples_per_second": 2.505, + "eval_steps_per_second": 0.171, + "step": 3615 + }, + { + "epoch": 0.9314486267186142, + "grad_norm": 0.1525862990991277, + "learning_rate": 1.4240769059075342e-06, + "loss": 0.2566, + "step": 3616 + }, + { + "epoch": 0.9317062175998969, + "grad_norm": 0.14940505072760127, + "learning_rate": 1.4134403279156106e-06, + "loss": 0.2961, + "step": 3617 + }, + { + "epoch": 0.9319638084811798, + "grad_norm": 0.15374299685837137, + "learning_rate": 1.4028430522894765e-06, + "loss": 0.3751, + "step": 3618 + }, + { + "epoch": 0.9322213993624626, + "grad_norm": 0.1341987628806026, + "learning_rate": 1.3922850876014192e-06, + "loss": 0.343, + "step": 3619 + }, + { + "epoch": 0.9324789902437454, + "grad_norm": 0.13654672843056637, + "learning_rate": 1.3817664423919351e-06, + "loss": 0.3156, + "step": 3620 + }, + { + "epoch": 0.9324789902437454, + "eval_PRM Accuracy": 0.8301886792452831, + "eval_PRM F1": 0.891566265060241, + "eval_PRM F1 AUC": 0.7501309586170771, + "eval_PRM F1 AUC (fixed)": 0.8588266107909901, + "eval_PRM F1 Neg": 0.6086956521739131, + "eval_PRM NPV": 0.6086956521739131, + "eval_PRM Precision": 0.891566265060241, + "eval_PRM Recall": 0.891566265060241, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.353515625, + "eval_runtime": 17.5501, + "eval_samples_per_second": 2.507, + "eval_steps_per_second": 0.171, + "step": 3620 + }, + { + "epoch": 0.9327365811250282, + "grad_norm": 0.16672504814855243, + "learning_rate": 1.3712871251697012e-06, + "loss": 0.3681, + "step": 3621 + }, + { + "epoch": 0.932994172006311, + "grad_norm": 0.19594028408110487, + "learning_rate": 1.3608471444115977e-06, + "loss": 0.3699, + "step": 3622 + }, + { + "epoch": 0.9332517628875938, + "grad_norm": 0.17086512939183604, + "learning_rate": 1.3504465085626638e-06, + "loss": 0.4609, + "step": 3623 + }, + { + "epoch": 0.9335093537688766, + "grad_norm": 0.17268770687487686, + "learning_rate": 1.3400852260361252e-06, + "loss": 0.3939, + "step": 3624 + }, + { + "epoch": 0.9337669446501594, + "grad_norm": 0.14195089254333754, + "learning_rate": 1.3297633052133718e-06, + "loss": 0.3224, + "step": 3625 + }, + { + "epoch": 0.9337669446501594, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.8643268727082242, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3525390625, + "eval_runtime": 17.1593, + "eval_samples_per_second": 2.564, + "eval_steps_per_second": 0.175, + "step": 3625 + }, + { + "epoch": 0.9340245355314422, + "grad_norm": 0.15892233470683617, + "learning_rate": 1.3194807544439635e-06, + "loss": 0.373, + "step": 3626 + }, + { + "epoch": 0.934282126412725, + "grad_norm": 0.13537545563985226, + "learning_rate": 1.3092375820455916e-06, + "loss": 0.3124, + "step": 3627 + }, + { + "epoch": 0.9345397172940078, + "grad_norm": 0.15225779088304373, + "learning_rate": 1.299033796304111e-06, + "loss": 0.382, + "step": 3628 + }, + { + "epoch": 0.9347973081752906, + "grad_norm": 0.16354081536121157, + "learning_rate": 1.2888694054735085e-06, + "loss": 0.3384, + "step": 3629 + }, + { + "epoch": 0.9350548990565734, + "grad_norm": 0.15028260915943015, + "learning_rate": 1.2787444177759068e-06, + "loss": 0.3891, + "step": 3630 + }, + { + "epoch": 0.9350548990565734, + "eval_PRM Accuracy": 0.8113207547169812, + "eval_PRM F1": 0.8795180722891566, + "eval_PRM F1 AUC": 0.7223677317967523, + "eval_PRM F1 AUC (fixed)": 0.8596123624934522, + "eval_PRM F1 Neg": 0.5652173913043478, + "eval_PRM NPV": 0.5652173913043478, + "eval_PRM Precision": 0.8795180722891566, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.5652173913043478, + "eval_loss": 0.3546253442764282, + "eval_runtime": 17.0396, + "eval_samples_per_second": 2.582, + "eval_steps_per_second": 0.176, + "step": 3630 + }, + { + "epoch": 0.9353124899378562, + "grad_norm": 0.15976214590838345, + "learning_rate": 1.2686588414015543e-06, + "loss": 0.3122, + "step": 3631 + }, + { + "epoch": 0.935570080819139, + "grad_norm": 0.13884197323278308, + "learning_rate": 1.2586126845088086e-06, + "loss": 0.3325, + "step": 3632 + }, + { + "epoch": 0.9358276717004218, + "grad_norm": 0.15873924916917725, + "learning_rate": 1.248605955224169e-06, + "loss": 0.3719, + "step": 3633 + }, + { + "epoch": 0.9360852625817047, + "grad_norm": 0.17844417291816023, + "learning_rate": 1.2386386616422164e-06, + "loss": 0.3619, + "step": 3634 + }, + { + "epoch": 0.9363428534629874, + "grad_norm": 0.1429127723093715, + "learning_rate": 1.228710811825623e-06, + "loss": 0.3363, + "step": 3635 + }, + { + "epoch": 0.9363428534629874, + "eval_PRM Accuracy": 0.8301886792452831, + "eval_PRM F1": 0.891566265060241, + "eval_PRM F1 AUC": 0.7501309586170771, + "eval_PRM F1 AUC (fixed)": 0.8577789418543739, + "eval_PRM F1 Neg": 0.6086956521739131, + "eval_PRM NPV": 0.6086956521739131, + "eval_PRM Precision": 0.891566265060241, + "eval_PRM Recall": 0.891566265060241, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3564453125, + "eval_runtime": 17.1988, + "eval_samples_per_second": 2.558, + "eval_steps_per_second": 0.174, + "step": 3635 + }, + { + "epoch": 0.9366004443442703, + "grad_norm": 0.143553936055149, + "learning_rate": 1.2188224138051874e-06, + "loss": 0.3573, + "step": 3636 + }, + { + "epoch": 0.936858035225553, + "grad_norm": 0.1570089595566499, + "learning_rate": 1.208973475579761e-06, + "loss": 0.3635, + "step": 3637 + }, + { + "epoch": 0.9371156261068359, + "grad_norm": 0.1636449864279838, + "learning_rate": 1.19916400511631e-06, + "loss": 0.4452, + "step": 3638 + }, + { + "epoch": 0.9373732169881186, + "grad_norm": 0.1434591093458333, + "learning_rate": 1.189394010349848e-06, + "loss": 0.3512, + "step": 3639 + }, + { + "epoch": 0.9376308078694015, + "grad_norm": 0.1845146332799933, + "learning_rate": 1.1796634991834476e-06, + "loss": 0.3987, + "step": 3640 + }, + { + "epoch": 0.9376308078694015, + "eval_PRM Accuracy": 0.8301886792452831, + "eval_PRM F1": 0.891566265060241, + "eval_PRM F1 AUC": 0.7501309586170771, + "eval_PRM F1 AUC (fixed)": 0.8609219486642221, + "eval_PRM F1 Neg": 0.6086956521739131, + "eval_PRM NPV": 0.6086956521739131, + "eval_PRM Precision": 0.891566265060241, + "eval_PRM Recall": 0.891566265060241, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3558238744735718, + "eval_runtime": 17.0517, + "eval_samples_per_second": 2.58, + "eval_steps_per_second": 0.176, + "step": 3640 + }, + { + "epoch": 0.9378883987506842, + "grad_norm": 0.1850052147374829, + "learning_rate": 1.1699724794882849e-06, + "loss": 0.4093, + "step": 3641 + }, + { + "epoch": 0.938145989631967, + "grad_norm": 0.140018455666938, + "learning_rate": 1.1603209591035447e-06, + "loss": 0.3434, + "step": 3642 + }, + { + "epoch": 0.9384035805132498, + "grad_norm": 0.19723646319820293, + "learning_rate": 1.1507089458364984e-06, + "loss": 0.3912, + "step": 3643 + }, + { + "epoch": 0.9386611713945326, + "grad_norm": 0.13679557778401036, + "learning_rate": 1.1411364474624264e-06, + "loss": 0.2239, + "step": 3644 + }, + { + "epoch": 0.9389187622758154, + "grad_norm": 0.11434361612107999, + "learning_rate": 1.1316034717246626e-06, + "loss": 0.3308, + "step": 3645 + }, + { + "epoch": 0.9389187622758154, + "eval_PRM Accuracy": 0.8301886792452831, + "eval_PRM F1": 0.891566265060241, + "eval_PRM F1 AUC": 0.7501309586170771, + "eval_PRM F1 AUC (fixed)": 0.8624934520691461, + "eval_PRM F1 Neg": 0.6086956521739131, + "eval_PRM NPV": 0.6086956521739131, + "eval_PRM Precision": 0.891566265060241, + "eval_PRM Recall": 0.891566265060241, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35205078125, + "eval_runtime": 17.0238, + "eval_samples_per_second": 2.585, + "eval_steps_per_second": 0.176, + "step": 3645 + }, + { + "epoch": 0.9391763531570982, + "grad_norm": 0.13629942244093662, + "learning_rate": 1.1221100263345773e-06, + "loss": 0.3062, + "step": 3646 + }, + { + "epoch": 0.939433944038381, + "grad_norm": 0.19287017509156754, + "learning_rate": 1.1126561189715502e-06, + "loss": 0.3842, + "step": 3647 + }, + { + "epoch": 0.9396915349196638, + "grad_norm": 0.13998401829250906, + "learning_rate": 1.1032417572829745e-06, + "loss": 0.3509, + "step": 3648 + }, + { + "epoch": 0.9399491258009467, + "grad_norm": 0.17528116018871204, + "learning_rate": 1.0938669488842702e-06, + "loss": 0.4325, + "step": 3649 + }, + { + "epoch": 0.9402067166822294, + "grad_norm": 0.15921787864544712, + "learning_rate": 1.0845317013588596e-06, + "loss": 0.3764, + "step": 3650 + }, + { + "epoch": 0.9402067166822294, + "eval_PRM Accuracy": 0.8301886792452831, + "eval_PRM F1": 0.891566265060241, + "eval_PRM F1 AUC": 0.7501309586170771, + "eval_PRM F1 AUC (fixed)": 0.859874279727606, + "eval_PRM F1 Neg": 0.6086956521739131, + "eval_PRM NPV": 0.6086956521739131, + "eval_PRM Precision": 0.891566265060241, + "eval_PRM Recall": 0.891566265060241, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35324928164482117, + "eval_runtime": 17.04, + "eval_samples_per_second": 2.582, + "eval_steps_per_second": 0.176, + "step": 3650 + }, + { + "epoch": 0.9404643075635123, + "grad_norm": 0.193952286619382, + "learning_rate": 1.075236022258147e-06, + "loss": 0.3405, + "step": 3651 + }, + { + "epoch": 0.940721898444795, + "grad_norm": 0.26518998616866324, + "learning_rate": 1.0659799191015507e-06, + "loss": 0.4643, + "step": 3652 + }, + { + "epoch": 0.9409794893260779, + "grad_norm": 0.13543714158708117, + "learning_rate": 1.0567633993764537e-06, + "loss": 0.273, + "step": 3653 + }, + { + "epoch": 0.9412370802073606, + "grad_norm": 0.13373048613174426, + "learning_rate": 1.0475864705382422e-06, + "loss": 0.3098, + "step": 3654 + }, + { + "epoch": 0.9414946710886435, + "grad_norm": 0.15415902218239402, + "learning_rate": 1.0384491400102614e-06, + "loss": 0.3667, + "step": 3655 + }, + { + "epoch": 0.9414946710886435, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.8609219486642221, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3531605005264282, + "eval_runtime": 16.5343, + "eval_samples_per_second": 2.661, + "eval_steps_per_second": 0.181, + "step": 3655 + }, + { + "epoch": 0.9417522619699262, + "grad_norm": 0.16902528190442034, + "learning_rate": 1.0293514151838268e-06, + "loss": 0.4194, + "step": 3656 + }, + { + "epoch": 0.9420098528512091, + "grad_norm": 0.1837710083377154, + "learning_rate": 1.0202933034182237e-06, + "loss": 0.3673, + "step": 3657 + }, + { + "epoch": 0.9422674437324918, + "grad_norm": 0.155257115669239, + "learning_rate": 1.0112748120406856e-06, + "loss": 0.3239, + "step": 3658 + }, + { + "epoch": 0.9425250346137747, + "grad_norm": 0.1603155333470742, + "learning_rate": 1.0022959483464045e-06, + "loss": 0.3068, + "step": 3659 + }, + { + "epoch": 0.9427826254950574, + "grad_norm": 0.19221021847619366, + "learning_rate": 9.933567195985094e-07, + "loss": 0.4736, + "step": 3660 + }, + { + "epoch": 0.9427826254950574, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.8619696176008381, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3556463122367859, + "eval_runtime": 17.1711, + "eval_samples_per_second": 2.562, + "eval_steps_per_second": 0.175, + "step": 3660 + }, + { + "epoch": 0.9430402163763403, + "grad_norm": 0.16379670288583872, + "learning_rate": 9.844571330280549e-07, + "loss": 0.4013, + "step": 3661 + }, + { + "epoch": 0.9432978072576231, + "grad_norm": 0.17665040568388132, + "learning_rate": 9.75597195834077e-07, + "loss": 0.3604, + "step": 3662 + }, + { + "epoch": 0.9435553981389059, + "grad_norm": 0.1694285296370909, + "learning_rate": 9.66776915183476e-07, + "loss": 0.3601, + "step": 3663 + }, + { + "epoch": 0.9438129890201887, + "grad_norm": 0.12650901480029578, + "learning_rate": 9.579962982111224e-07, + "loss": 0.3239, + "step": 3664 + }, + { + "epoch": 0.9440705799014715, + "grad_norm": 0.14756402561710022, + "learning_rate": 9.492553520197733e-07, + "loss": 0.4006, + "step": 3665 + }, + { + "epoch": 0.9440705799014715, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.8609219486642221, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3529829680919647, + "eval_runtime": 17.0353, + "eval_samples_per_second": 2.583, + "eval_steps_per_second": 0.176, + "step": 3665 + }, + { + "epoch": 0.9443281707827543, + "grad_norm": 0.17854362312944283, + "learning_rate": 9.405540836801119e-07, + "loss": 0.3465, + "step": 3666 + }, + { + "epoch": 0.9445857616640371, + "grad_norm": 0.17709937002340748, + "learning_rate": 9.318925002307188e-07, + "loss": 0.3149, + "step": 3667 + }, + { + "epoch": 0.9448433525453199, + "grad_norm": 0.172973032323487, + "learning_rate": 9.232706086780619e-07, + "loss": 0.3452, + "step": 3668 + }, + { + "epoch": 0.9451009434266027, + "grad_norm": 0.1345121785044483, + "learning_rate": 9.146884159965286e-07, + "loss": 0.2891, + "step": 3669 + }, + { + "epoch": 0.9453585343078855, + "grad_norm": 0.13728174842331684, + "learning_rate": 9.061459291283658e-07, + "loss": 0.3148, + "step": 3670 + }, + { + "epoch": 0.9453585343078855, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.858040859088528, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3525390625, + "eval_runtime": 16.9543, + "eval_samples_per_second": 2.595, + "eval_steps_per_second": 0.177, + "step": 3670 + }, + { + "epoch": 0.9456161251891683, + "grad_norm": 0.12179028171270745, + "learning_rate": 8.976431549837239e-07, + "loss": 0.2727, + "step": 3671 + }, + { + "epoch": 0.9458737160704511, + "grad_norm": 0.20292642131673685, + "learning_rate": 8.891801004406119e-07, + "loss": 0.4476, + "step": 3672 + }, + { + "epoch": 0.9461313069517339, + "grad_norm": 0.2117910852732907, + "learning_rate": 8.807567723449206e-07, + "loss": 0.4602, + "step": 3673 + }, + { + "epoch": 0.9463888978330167, + "grad_norm": 0.17428962139928333, + "learning_rate": 8.723731775103938e-07, + "loss": 0.4595, + "step": 3674 + }, + { + "epoch": 0.9466464887142995, + "grad_norm": 0.2090642216310755, + "learning_rate": 8.640293227186513e-07, + "loss": 0.4209, + "step": 3675 + }, + { + "epoch": 0.9466464887142995, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.859874279727606, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35422584414482117, + "eval_runtime": 16.9944, + "eval_samples_per_second": 2.589, + "eval_steps_per_second": 0.177, + "step": 3675 + }, + { + "epoch": 0.9469040795955823, + "grad_norm": 0.1414186990108163, + "learning_rate": 8.557252147191497e-07, + "loss": 0.3868, + "step": 3676 + }, + { + "epoch": 0.9471616704768652, + "grad_norm": 0.14781947639985626, + "learning_rate": 8.474608602292044e-07, + "loss": 0.2981, + "step": 3677 + }, + { + "epoch": 0.9474192613581479, + "grad_norm": 0.14013584779347527, + "learning_rate": 8.392362659339681e-07, + "loss": 0.3697, + "step": 3678 + }, + { + "epoch": 0.9476768522394308, + "grad_norm": 0.15392211696114294, + "learning_rate": 8.31051438486441e-07, + "loss": 0.3278, + "step": 3679 + }, + { + "epoch": 0.9479344431207135, + "grad_norm": 0.2299225601108455, + "learning_rate": 8.22906384507438e-07, + "loss": 0.422, + "step": 3680 + }, + { + "epoch": 0.9479344431207135, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.8548978522786799, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3544478118419647, + "eval_runtime": 17.2464, + "eval_samples_per_second": 2.551, + "eval_steps_per_second": 0.174, + "step": 3680 + }, + { + "epoch": 0.9481920340019964, + "grad_norm": 0.16274285836944163, + "learning_rate": 8.148011105856168e-07, + "loss": 0.4064, + "step": 3681 + }, + { + "epoch": 0.9484496248832791, + "grad_norm": 0.20910361953817333, + "learning_rate": 8.067356232774437e-07, + "loss": 0.473, + "step": 3682 + }, + { + "epoch": 0.948707215764562, + "grad_norm": 0.17748660504950942, + "learning_rate": 7.987099291072109e-07, + "loss": 0.3576, + "step": 3683 + }, + { + "epoch": 0.9489648066458447, + "grad_norm": 0.1578814384448594, + "learning_rate": 7.907240345670142e-07, + "loss": 0.3753, + "step": 3684 + }, + { + "epoch": 0.9492223975271276, + "grad_norm": 0.15212521856275274, + "learning_rate": 7.827779461167695e-07, + "loss": 0.3193, + "step": 3685 + }, + { + "epoch": 0.9492223975271276, + "eval_PRM Accuracy": 0.8301886792452831, + "eval_PRM F1": 0.891566265060241, + "eval_PRM F1 AUC": 0.7501309586170771, + "eval_PRM F1 AUC (fixed)": 0.861707700366684, + "eval_PRM F1 Neg": 0.6086956521739131, + "eval_PRM NPV": 0.6086956521739131, + "eval_PRM Precision": 0.891566265060241, + "eval_PRM Recall": 0.891566265060241, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3528497815132141, + "eval_runtime": 17.6253, + "eval_samples_per_second": 2.496, + "eval_steps_per_second": 0.17, + "step": 3685 + }, + { + "epoch": 0.9494799884084103, + "grad_norm": 0.18749221830312565, + "learning_rate": 7.748716701841685e-07, + "loss": 0.3724, + "step": 3686 + }, + { + "epoch": 0.9497375792896932, + "grad_norm": 0.1891080708391055, + "learning_rate": 7.670052131647121e-07, + "loss": 0.389, + "step": 3687 + }, + { + "epoch": 0.9499951701709759, + "grad_norm": 0.21227202340748516, + "learning_rate": 7.591785814217046e-07, + "loss": 0.3401, + "step": 3688 + }, + { + "epoch": 0.9502527610522588, + "grad_norm": 0.1648754080971513, + "learning_rate": 7.513917812862037e-07, + "loss": 0.3453, + "step": 3689 + }, + { + "epoch": 0.9505103519335415, + "grad_norm": 0.19280860950145984, + "learning_rate": 7.436448190570766e-07, + "loss": 0.3472, + "step": 3690 + }, + { + "epoch": 0.9505103519335415, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.859874279727606, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3533824682235718, + "eval_runtime": 17.2241, + "eval_samples_per_second": 2.555, + "eval_steps_per_second": 0.174, + "step": 3690 + }, + { + "epoch": 0.9507679428148244, + "grad_norm": 0.14333195087175796, + "learning_rate": 7.359377010009383e-07, + "loss": 0.2865, + "step": 3691 + }, + { + "epoch": 0.9510255336961072, + "grad_norm": 0.18370111816292833, + "learning_rate": 7.282704333522017e-07, + "loss": 0.3927, + "step": 3692 + }, + { + "epoch": 0.95128312457739, + "grad_norm": 0.1728863568975308, + "learning_rate": 7.206430223130278e-07, + "loss": 0.3012, + "step": 3693 + }, + { + "epoch": 0.9515407154586728, + "grad_norm": 0.15169059013242978, + "learning_rate": 7.130554740533313e-07, + "loss": 0.3035, + "step": 3694 + }, + { + "epoch": 0.9517983063399555, + "grad_norm": 0.13965884246701635, + "learning_rate": 7.055077947107858e-07, + "loss": 0.3257, + "step": 3695 + }, + { + "epoch": 0.9517983063399555, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.859874279727606, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35205078125, + "eval_runtime": 16.9261, + "eval_samples_per_second": 2.6, + "eval_steps_per_second": 0.177, + "step": 3695 + }, + { + "epoch": 0.9520558972212384, + "grad_norm": 0.15234151942918903, + "learning_rate": 6.979999903908297e-07, + "loss": 0.2756, + "step": 3696 + }, + { + "epoch": 0.9523134881025211, + "grad_norm": 0.17011987219811123, + "learning_rate": 6.905320671666326e-07, + "loss": 0.3531, + "step": 3697 + }, + { + "epoch": 0.952571078983804, + "grad_norm": 0.20705912072551072, + "learning_rate": 6.831040310790959e-07, + "loss": 0.4757, + "step": 3698 + }, + { + "epoch": 0.9528286698650867, + "grad_norm": 0.12767561963277557, + "learning_rate": 6.757158881368686e-07, + "loss": 0.3342, + "step": 3699 + }, + { + "epoch": 0.9530862607463696, + "grad_norm": 0.17796177316584283, + "learning_rate": 6.683676443163311e-07, + "loss": 0.3029, + "step": 3700 + }, + { + "epoch": 0.9530862607463696, + "eval_PRM Accuracy": 0.8113207547169812, + "eval_PRM F1": 0.8780487804878049, + "eval_PRM F1 AUC": 0.7380827658459927, + "eval_PRM F1 AUC (fixed)": 0.8622315348349922, + "eval_PRM F1 Neg": 0.5833333333333334, + "eval_PRM NPV": 0.56, + "eval_PRM Precision": 0.8888888888888888, + "eval_PRM Recall": 0.8674698795180723, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3525390625, + "eval_runtime": 17.2548, + "eval_samples_per_second": 2.55, + "eval_steps_per_second": 0.174, + "step": 3700 + }, + { + "epoch": 0.9533438516276523, + "grad_norm": 0.1310624988332114, + "learning_rate": 6.610593055615733e-07, + "loss": 0.3261, + "step": 3701 + }, + { + "epoch": 0.9536014425089352, + "grad_norm": 0.13505877689353882, + "learning_rate": 6.537908777844215e-07, + "loss": 0.3194, + "step": 3702 + }, + { + "epoch": 0.9538590333902179, + "grad_norm": 0.14249633091361077, + "learning_rate": 6.465623668644117e-07, + "loss": 0.3034, + "step": 3703 + }, + { + "epoch": 0.9541166242715008, + "grad_norm": 0.14268727301342632, + "learning_rate": 6.393737786487885e-07, + "loss": 0.3654, + "step": 3704 + }, + { + "epoch": 0.9543742151527836, + "grad_norm": 0.1564181616551385, + "learning_rate": 6.322251189525008e-07, + "loss": 0.3605, + "step": 3705 + }, + { + "epoch": 0.9543742151527836, + "eval_PRM Accuracy": 0.8301886792452831, + "eval_PRM F1": 0.891566265060241, + "eval_PRM F1 AUC": 0.7501309586170771, + "eval_PRM F1 AUC (fixed)": 0.8590885280251441, + "eval_PRM F1 Neg": 0.6086956521739131, + "eval_PRM NPV": 0.6086956521739131, + "eval_PRM Precision": 0.891566265060241, + "eval_PRM Recall": 0.891566265060241, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3514293432235718, + "eval_runtime": 17.4001, + "eval_samples_per_second": 2.529, + "eval_steps_per_second": 0.172, + "step": 3705 + }, + { + "epoch": 0.9546318060340664, + "grad_norm": 0.16752144840241906, + "learning_rate": 6.251163935582116e-07, + "loss": 0.3424, + "step": 3706 + }, + { + "epoch": 0.9548893969153492, + "grad_norm": 0.1731135089318462, + "learning_rate": 6.180476082162656e-07, + "loss": 0.3848, + "step": 3707 + }, + { + "epoch": 0.955146987796632, + "grad_norm": 0.15070275416486573, + "learning_rate": 6.110187686447e-07, + "loss": 0.3657, + "step": 3708 + }, + { + "epoch": 0.9554045786779148, + "grad_norm": 0.18480108877326346, + "learning_rate": 6.040298805292499e-07, + "loss": 0.3594, + "step": 3709 + }, + { + "epoch": 0.9556621695591976, + "grad_norm": 0.1451269599547431, + "learning_rate": 5.970809495233265e-07, + "loss": 0.3382, + "step": 3710 + }, + { + "epoch": 0.9556621695591976, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.8588266107909901, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3533380627632141, + "eval_runtime": 17.1366, + "eval_samples_per_second": 2.568, + "eval_steps_per_second": 0.175, + "step": 3710 + }, + { + "epoch": 0.9559197604404804, + "grad_norm": 0.204747373910136, + "learning_rate": 5.901719812480166e-07, + "loss": 0.4478, + "step": 3711 + }, + { + "epoch": 0.9561773513217632, + "grad_norm": 0.12067191724353114, + "learning_rate": 5.833029812920831e-07, + "loss": 0.3148, + "step": 3712 + }, + { + "epoch": 0.956434942203046, + "grad_norm": 0.15870357656342368, + "learning_rate": 5.764739552119702e-07, + "loss": 0.4068, + "step": 3713 + }, + { + "epoch": 0.9566925330843288, + "grad_norm": 0.1973206592364747, + "learning_rate": 5.696849085317646e-07, + "loss": 0.3478, + "step": 3714 + }, + { + "epoch": 0.9569501239656116, + "grad_norm": 0.2018788475412071, + "learning_rate": 5.629358467432289e-07, + "loss": 0.4264, + "step": 3715 + }, + { + "epoch": 0.9569501239656116, + "eval_PRM Accuracy": 0.8113207547169812, + "eval_PRM F1": 0.8780487804878049, + "eval_PRM F1 AUC": 0.7380827658459927, + "eval_PRM F1 AUC (fixed)": 0.86013619696176, + "eval_PRM F1 Neg": 0.5833333333333334, + "eval_PRM NPV": 0.56, + "eval_PRM Precision": 0.8888888888888888, + "eval_PRM Recall": 0.8674698795180723, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3541370630264282, + "eval_runtime": 17.3697, + "eval_samples_per_second": 2.533, + "eval_steps_per_second": 0.173, + "step": 3715 + }, + { + "epoch": 0.9572077148468944, + "grad_norm": 0.15152106035513985, + "learning_rate": 5.562267753057626e-07, + "loss": 0.3867, + "step": 3716 + }, + { + "epoch": 0.9574653057281772, + "grad_norm": 0.13845243247216737, + "learning_rate": 5.495576996464468e-07, + "loss": 0.3161, + "step": 3717 + }, + { + "epoch": 0.95772289660946, + "grad_norm": 0.13210334465496898, + "learning_rate": 5.429286251599885e-07, + "loss": 0.3148, + "step": 3718 + }, + { + "epoch": 0.9579804874907428, + "grad_norm": 0.1571066170842066, + "learning_rate": 5.363395572087371e-07, + "loss": 0.3541, + "step": 3719 + }, + { + "epoch": 0.9582380783720257, + "grad_norm": 0.15295813994550886, + "learning_rate": 5.297905011226845e-07, + "loss": 0.4044, + "step": 3720 + }, + { + "epoch": 0.9582380783720257, + "eval_PRM Accuracy": 0.8113207547169812, + "eval_PRM F1": 0.8780487804878049, + "eval_PRM F1 AUC": 0.7380827658459927, + "eval_PRM F1 AUC (fixed)": 0.86013619696176, + "eval_PRM F1 Neg": 0.5833333333333334, + "eval_PRM NPV": 0.56, + "eval_PRM Precision": 0.8888888888888888, + "eval_PRM Recall": 0.8674698795180723, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3545365631580353, + "eval_runtime": 17.1569, + "eval_samples_per_second": 2.565, + "eval_steps_per_second": 0.175, + "step": 3720 + }, + { + "epoch": 0.9584956692533084, + "grad_norm": 0.19608383974821886, + "learning_rate": 5.232814621994598e-07, + "loss": 0.4614, + "step": 3721 + }, + { + "epoch": 0.9587532601345913, + "grad_norm": 0.16729845368017604, + "learning_rate": 5.168124457043178e-07, + "loss": 0.3416, + "step": 3722 + }, + { + "epoch": 0.959010851015874, + "grad_norm": 0.1385253217089452, + "learning_rate": 5.103834568701338e-07, + "loss": 0.3307, + "step": 3723 + }, + { + "epoch": 0.9592684418971569, + "grad_norm": 0.13269673976828575, + "learning_rate": 5.039945008974145e-07, + "loss": 0.3276, + "step": 3724 + }, + { + "epoch": 0.9595260327784396, + "grad_norm": 0.1505006388364205, + "learning_rate": 4.976455829542704e-07, + "loss": 0.3554, + "step": 3725 + }, + { + "epoch": 0.9595260327784396, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.8606600314300681, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3522283434867859, + "eval_runtime": 17.3805, + "eval_samples_per_second": 2.532, + "eval_steps_per_second": 0.173, + "step": 3725 + }, + { + "epoch": 0.9597836236597225, + "grad_norm": 0.15422220137904377, + "learning_rate": 4.91336708176443e-07, + "loss": 0.307, + "step": 3726 + }, + { + "epoch": 0.9600412145410052, + "grad_norm": 0.1948051781206229, + "learning_rate": 4.850678816672671e-07, + "loss": 0.3772, + "step": 3727 + }, + { + "epoch": 0.9602988054222881, + "grad_norm": 0.11942577613308271, + "learning_rate": 4.788391084976862e-07, + "loss": 0.3148, + "step": 3728 + }, + { + "epoch": 0.9605563963035708, + "grad_norm": 0.16581696493096246, + "learning_rate": 4.726503937062421e-07, + "loss": 0.3584, + "step": 3729 + }, + { + "epoch": 0.9608139871848537, + "grad_norm": 0.18270653336601703, + "learning_rate": 4.6650174229908026e-07, + "loss": 0.3931, + "step": 3730 + }, + { + "epoch": 0.9608139871848537, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.859350445259298, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35400390625, + "eval_runtime": 17.6109, + "eval_samples_per_second": 2.498, + "eval_steps_per_second": 0.17, + "step": 3730 + }, + { + "epoch": 0.9610715780661364, + "grad_norm": 0.1656800706537147, + "learning_rate": 4.6039315924992774e-07, + "loss": 0.337, + "step": 3731 + }, + { + "epoch": 0.9613291689474193, + "grad_norm": 0.15687605108049824, + "learning_rate": 4.543246495001097e-07, + "loss": 0.3087, + "step": 3732 + }, + { + "epoch": 0.961586759828702, + "grad_norm": 0.41979890458747227, + "learning_rate": 4.4829621795852705e-07, + "loss": 0.4024, + "step": 3733 + }, + { + "epoch": 0.9618443507099849, + "grad_norm": 0.18933485246564027, + "learning_rate": 4.42307869501668e-07, + "loss": 0.4332, + "step": 3734 + }, + { + "epoch": 0.9621019415912677, + "grad_norm": 0.1369765834241001, + "learning_rate": 4.363596089735911e-07, + "loss": 0.3336, + "step": 3735 + }, + { + "epoch": 0.9621019415912677, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.858040859088528, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35520240664482117, + "eval_runtime": 16.9809, + "eval_samples_per_second": 2.591, + "eval_steps_per_second": 0.177, + "step": 3735 + }, + { + "epoch": 0.9623595324725505, + "grad_norm": 0.1743975417300076, + "learning_rate": 4.304514411859306e-07, + "loss": 0.4265, + "step": 3736 + }, + { + "epoch": 0.9626171233538333, + "grad_norm": 0.18163224348449938, + "learning_rate": 4.2458337091788593e-07, + "loss": 0.4083, + "step": 3737 + }, + { + "epoch": 0.9628747142351161, + "grad_norm": 0.15285926097764635, + "learning_rate": 4.1875540291622106e-07, + "loss": 0.3955, + "step": 3738 + }, + { + "epoch": 0.9631323051163989, + "grad_norm": 0.16429583098588607, + "learning_rate": 4.129675418952761e-07, + "loss": 0.3556, + "step": 3739 + }, + { + "epoch": 0.9633898959976817, + "grad_norm": 0.22238495157892732, + "learning_rate": 4.0721979253692254e-07, + "loss": 0.4598, + "step": 3740 + }, + { + "epoch": 0.9633898959976817, + "eval_PRM Accuracy": 0.8113207547169812, + "eval_PRM F1": 0.8780487804878049, + "eval_PRM F1 AUC": 0.7380827658459927, + "eval_PRM F1 AUC (fixed)": 0.8588266107909901, + "eval_PRM F1 Neg": 0.5833333333333334, + "eval_PRM NPV": 0.56, + "eval_PRM Precision": 0.8888888888888888, + "eval_PRM Recall": 0.8674698795180723, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35422584414482117, + "eval_runtime": 16.9862, + "eval_samples_per_second": 2.59, + "eval_steps_per_second": 0.177, + "step": 3740 + }, + { + "epoch": 0.9636474868789645, + "grad_norm": 0.17674656977462916, + "learning_rate": 4.015121594906024e-07, + "loss": 0.3965, + "step": 3741 + }, + { + "epoch": 0.9639050777602473, + "grad_norm": 0.16840117530400994, + "learning_rate": 3.958446473733002e-07, + "loss": 0.4132, + "step": 3742 + }, + { + "epoch": 0.9641626686415301, + "grad_norm": 0.13867074302418367, + "learning_rate": 3.9021726076954867e-07, + "loss": 0.3525, + "step": 3743 + }, + { + "epoch": 0.9644202595228129, + "grad_norm": 0.1459626178307732, + "learning_rate": 3.8463000423142326e-07, + "loss": 0.3487, + "step": 3744 + }, + { + "epoch": 0.9646778504040957, + "grad_norm": 0.15807260960562433, + "learning_rate": 3.7908288227853085e-07, + "loss": 0.3266, + "step": 3745 + }, + { + "epoch": 0.9646778504040957, + "eval_PRM Accuracy": 0.8301886792452831, + "eval_PRM F1": 0.891566265060241, + "eval_PRM F1 AUC": 0.7501309586170771, + "eval_PRM F1 AUC (fixed)": 0.859088528025144, + "eval_PRM F1 Neg": 0.6086956521739131, + "eval_PRM NPV": 0.6086956521739131, + "eval_PRM Precision": 0.891566265060241, + "eval_PRM Recall": 0.891566265060241, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35506924986839294, + "eval_runtime": 17.1534, + "eval_samples_per_second": 2.565, + "eval_steps_per_second": 0.175, + "step": 3745 + }, + { + "epoch": 0.9649354412853784, + "grad_norm": 0.1452604989808499, + "learning_rate": 3.73575899398021e-07, + "loss": 0.3457, + "step": 3746 + }, + { + "epoch": 0.9651930321666613, + "grad_norm": 0.19165309239817846, + "learning_rate": 3.681090600445747e-07, + "loss": 0.4019, + "step": 3747 + }, + { + "epoch": 0.9654506230479442, + "grad_norm": 0.1471576365965977, + "learning_rate": 3.626823686403935e-07, + "loss": 0.3234, + "step": 3748 + }, + { + "epoch": 0.9657082139292269, + "grad_norm": 0.18331570304808137, + "learning_rate": 3.572958295752049e-07, + "loss": 0.4197, + "step": 3749 + }, + { + "epoch": 0.9659658048105098, + "grad_norm": 0.1497592311083479, + "learning_rate": 3.519494472062568e-07, + "loss": 0.2976, + "step": 3750 + }, + { + "epoch": 0.9659658048105098, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.8624934520691461, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3533824682235718, + "eval_runtime": 17.4882, + "eval_samples_per_second": 2.516, + "eval_steps_per_second": 0.172, + "step": 3750 + }, + { + "epoch": 0.9662233956917925, + "grad_norm": 0.18542590579616175, + "learning_rate": 3.466432258583174e-07, + "loss": 0.2955, + "step": 3751 + }, + { + "epoch": 0.9664809865730754, + "grad_norm": 0.14196607964512953, + "learning_rate": 3.4137716982366453e-07, + "loss": 0.2848, + "step": 3752 + }, + { + "epoch": 0.9667385774543581, + "grad_norm": 0.16210220602760117, + "learning_rate": 3.3615128336209054e-07, + "loss": 0.4347, + "step": 3753 + }, + { + "epoch": 0.966996168335641, + "grad_norm": 0.12357408535083989, + "learning_rate": 3.309655707008863e-07, + "loss": 0.3585, + "step": 3754 + }, + { + "epoch": 0.9672537592169237, + "grad_norm": 0.13618250690592645, + "learning_rate": 3.258200360348462e-07, + "loss": 0.3281, + "step": 3755 + }, + { + "epoch": 0.9672537592169237, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.8588266107909899, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35360440611839294, + "eval_runtime": 17.0303, + "eval_samples_per_second": 2.584, + "eval_steps_per_second": 0.176, + "step": 3755 + }, + { + "epoch": 0.9675113500982065, + "grad_norm": 0.13654383235663448, + "learning_rate": 3.207146835262742e-07, + "loss": 0.3063, + "step": 3756 + }, + { + "epoch": 0.9677689409794893, + "grad_norm": 0.15999728729569007, + "learning_rate": 3.1564951730495006e-07, + "loss": 0.3554, + "step": 3757 + }, + { + "epoch": 0.9680265318607721, + "grad_norm": 0.16211117805780945, + "learning_rate": 3.106245414681741e-07, + "loss": 0.3693, + "step": 3758 + }, + { + "epoch": 0.9682841227420549, + "grad_norm": 0.21034146330880885, + "learning_rate": 3.0563976008071706e-07, + "loss": 0.4134, + "step": 3759 + }, + { + "epoch": 0.9685417136233377, + "grad_norm": 0.14652568706774757, + "learning_rate": 3.006951771748423e-07, + "loss": 0.3347, + "step": 3760 + }, + { + "epoch": 0.9685417136233377, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.857778941854374, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3546697497367859, + "eval_runtime": 16.9428, + "eval_samples_per_second": 2.597, + "eval_steps_per_second": 0.177, + "step": 3760 + }, + { + "epoch": 0.9687993045046205, + "grad_norm": 0.14316930791436056, + "learning_rate": 2.9579079675028374e-07, + "loss": 0.3233, + "step": 3761 + }, + { + "epoch": 0.9690568953859033, + "grad_norm": 0.2000551747105647, + "learning_rate": 2.9092662277427906e-07, + "loss": 0.4459, + "step": 3762 + }, + { + "epoch": 0.9693144862671862, + "grad_norm": 0.165468342566276, + "learning_rate": 2.8610265918151414e-07, + "loss": 0.3376, + "step": 3763 + }, + { + "epoch": 0.9695720771484689, + "grad_norm": 0.11885486319727619, + "learning_rate": 2.813189098741731e-07, + "loss": 0.2936, + "step": 3764 + }, + { + "epoch": 0.9698296680297518, + "grad_norm": 0.18333947791187652, + "learning_rate": 2.7657537872189386e-07, + "loss": 0.4077, + "step": 3765 + }, + { + "epoch": 0.9698296680297518, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.8601361969617601, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35245028138160706, + "eval_runtime": 17.0879, + "eval_samples_per_second": 2.575, + "eval_steps_per_second": 0.176, + "step": 3765 + }, + { + "epoch": 0.9700872589110345, + "grad_norm": 0.18535954788255296, + "learning_rate": 2.7187206956179047e-07, + "loss": 0.404, + "step": 3766 + }, + { + "epoch": 0.9703448497923174, + "grad_norm": 0.17033575330351897, + "learning_rate": 2.6720898619843615e-07, + "loss": 0.3333, + "step": 3767 + }, + { + "epoch": 0.9706024406736001, + "grad_norm": 0.1340335465058161, + "learning_rate": 2.625861324038692e-07, + "loss": 0.3344, + "step": 3768 + }, + { + "epoch": 0.970860031554883, + "grad_norm": 0.1539518992267126, + "learning_rate": 2.580035119175761e-07, + "loss": 0.3689, + "step": 3769 + }, + { + "epoch": 0.9711176224361657, + "grad_norm": 0.22640026655678336, + "learning_rate": 2.534611284465083e-07, + "loss": 0.404, + "step": 3770 + }, + { + "epoch": 0.9711176224361657, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.8601361969617599, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35391512513160706, + "eval_runtime": 17.1355, + "eval_samples_per_second": 2.568, + "eval_steps_per_second": 0.175, + "step": 3770 + }, + { + "epoch": 0.9713752133174486, + "grad_norm": 0.1467583746360964, + "learning_rate": 2.4895898566505983e-07, + "loss": 0.3658, + "step": 3771 + }, + { + "epoch": 0.9716328041987313, + "grad_norm": 0.15289702694249147, + "learning_rate": 2.4449708721508424e-07, + "loss": 0.3454, + "step": 3772 + }, + { + "epoch": 0.9718903950800142, + "grad_norm": 0.16047779634622722, + "learning_rate": 2.4007543670587774e-07, + "loss": 0.4495, + "step": 3773 + }, + { + "epoch": 0.9721479859612969, + "grad_norm": 0.1665765188938278, + "learning_rate": 2.356940377141681e-07, + "loss": 0.4329, + "step": 3774 + }, + { + "epoch": 0.9724055768425798, + "grad_norm": 0.1179113612466018, + "learning_rate": 2.3135289378413694e-07, + "loss": 0.3069, + "step": 3775 + }, + { + "epoch": 0.9724055768425798, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.8569931901519119, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3548029065132141, + "eval_runtime": 17.5981, + "eval_samples_per_second": 2.5, + "eval_steps_per_second": 0.17, + "step": 3775 + }, + { + "epoch": 0.9726631677238625, + "grad_norm": 0.16314819661869495, + "learning_rate": 2.2705200842740305e-07, + "loss": 0.2938, + "step": 3776 + }, + { + "epoch": 0.9729207586051454, + "grad_norm": 0.20572881699364856, + "learning_rate": 2.2279138512300567e-07, + "loss": 0.3926, + "step": 3777 + }, + { + "epoch": 0.9731783494864282, + "grad_norm": 0.16351923007215768, + "learning_rate": 2.1857102731742684e-07, + "loss": 0.309, + "step": 3778 + }, + { + "epoch": 0.973435940367711, + "grad_norm": 0.15430164950532926, + "learning_rate": 2.143909384245746e-07, + "loss": 0.34, + "step": 3779 + }, + { + "epoch": 0.9736935312489938, + "grad_norm": 0.17881733482234016, + "learning_rate": 2.10251121825783e-07, + "loss": 0.3813, + "step": 3780 + }, + { + "epoch": 0.9736935312489938, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.857778941854374, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3528497815132141, + "eval_runtime": 16.9415, + "eval_samples_per_second": 2.597, + "eval_steps_per_second": 0.177, + "step": 3780 + }, + { + "epoch": 0.9739511221302766, + "grad_norm": 0.1759713240969221, + "learning_rate": 2.0615158086981222e-07, + "loss": 0.4305, + "step": 3781 + }, + { + "epoch": 0.9742087130115594, + "grad_norm": 0.15528693139533326, + "learning_rate": 2.020923188728374e-07, + "loss": 0.3215, + "step": 3782 + }, + { + "epoch": 0.9744663038928422, + "grad_norm": 0.1394952976316927, + "learning_rate": 1.980733391184486e-07, + "loss": 0.335, + "step": 3783 + }, + { + "epoch": 0.974723894774125, + "grad_norm": 0.16330749731661878, + "learning_rate": 1.940946448576675e-07, + "loss": 0.3388, + "step": 3784 + }, + { + "epoch": 0.9749814856554078, + "grad_norm": 0.13626155155802247, + "learning_rate": 1.9015623930890846e-07, + "loss": 0.3177, + "step": 3785 + }, + { + "epoch": 0.9749814856554078, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.8617077003666841, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35196200013160706, + "eval_runtime": 16.6163, + "eval_samples_per_second": 2.648, + "eval_steps_per_second": 0.181, + "step": 3785 + }, + { + "epoch": 0.9752390765366906, + "grad_norm": 0.16720574020751214, + "learning_rate": 1.8625812565800648e-07, + "loss": 0.4074, + "step": 3786 + }, + { + "epoch": 0.9754966674179734, + "grad_norm": 0.16792236698504823, + "learning_rate": 1.8240030705820032e-07, + "loss": 0.3667, + "step": 3787 + }, + { + "epoch": 0.9757542582992562, + "grad_norm": 0.1932284534720809, + "learning_rate": 1.7858278663013817e-07, + "loss": 0.3507, + "step": 3788 + }, + { + "epoch": 0.976011849180539, + "grad_norm": 0.15949009758829952, + "learning_rate": 1.748055674618665e-07, + "loss": 0.2286, + "step": 3789 + }, + { + "epoch": 0.9762694400618218, + "grad_norm": 0.17407811866438883, + "learning_rate": 1.710686526088301e-07, + "loss": 0.3552, + "step": 3790 + }, + { + "epoch": 0.9762694400618218, + "eval_PRM Accuracy": 0.8113207547169812, + "eval_PRM F1": 0.8780487804878049, + "eval_PRM F1 AUC": 0.7380827658459927, + "eval_PRM F1 AUC (fixed)": 0.8609219486642221, + "eval_PRM F1 Neg": 0.5833333333333334, + "eval_PRM NPV": 0.56, + "eval_PRM Precision": 0.8888888888888888, + "eval_PRM Recall": 0.8674698795180723, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35324928164482117, + "eval_runtime": 17.4588, + "eval_samples_per_second": 2.52, + "eval_steps_per_second": 0.172, + "step": 3790 + }, + { + "epoch": 0.9765270309431047, + "grad_norm": 0.18141142441348804, + "learning_rate": 1.6737204509387206e-07, + "loss": 0.4585, + "step": 3791 + }, + { + "epoch": 0.9767846218243874, + "grad_norm": 0.17238267023227896, + "learning_rate": 1.6371574790723377e-07, + "loss": 0.3291, + "step": 3792 + }, + { + "epoch": 0.9770422127056703, + "grad_norm": 0.18083413404662643, + "learning_rate": 1.6009976400654937e-07, + "loss": 0.3972, + "step": 3793 + }, + { + "epoch": 0.977299803586953, + "grad_norm": 0.1659465500339923, + "learning_rate": 1.5652409631682908e-07, + "loss": 0.3672, + "step": 3794 + }, + { + "epoch": 0.9775573944682359, + "grad_norm": 0.13994735128114089, + "learning_rate": 1.529887477304981e-07, + "loss": 0.4038, + "step": 3795 + }, + { + "epoch": 0.9775573944682359, + "eval_PRM Accuracy": 0.8113207547169812, + "eval_PRM F1": 0.8795180722891566, + "eval_PRM F1 AUC": 0.7223677317967523, + "eval_PRM F1 AUC (fixed)": 0.8572551073860659, + "eval_PRM F1 Neg": 0.5652173913043478, + "eval_PRM NPV": 0.5652173913043478, + "eval_PRM Precision": 0.8795180722891566, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.5652173913043478, + "eval_loss": 0.3546253442764282, + "eval_runtime": 16.6268, + "eval_samples_per_second": 2.646, + "eval_steps_per_second": 0.18, + "step": 3795 + }, + { + "epoch": 0.9778149853495186, + "grad_norm": 0.14847747360892083, + "learning_rate": 1.494937211073355e-07, + "loss": 0.3311, + "step": 3796 + }, + { + "epoch": 0.9780725762308015, + "grad_norm": 0.14264566608027543, + "learning_rate": 1.4603901927452978e-07, + "loss": 0.3381, + "step": 3797 + }, + { + "epoch": 0.9783301671120842, + "grad_norm": 0.13899999288446177, + "learning_rate": 1.4262464502663443e-07, + "loss": 0.3626, + "step": 3798 + }, + { + "epoch": 0.9785877579933671, + "grad_norm": 0.13291754858024754, + "learning_rate": 1.3925060112557898e-07, + "loss": 0.3594, + "step": 3799 + }, + { + "epoch": 0.9788453488746498, + "grad_norm": 0.15571500003310815, + "learning_rate": 1.3591689030068577e-07, + "loss": 0.3405, + "step": 3800 + }, + { + "epoch": 0.9788453488746498, + "eval_PRM Accuracy": 0.8113207547169812, + "eval_PRM F1": 0.8795180722891566, + "eval_PRM F1 AUC": 0.7223677317967523, + "eval_PRM F1 AUC (fixed)": 0.8590885280251439, + "eval_PRM F1 Neg": 0.5652173913043478, + "eval_PRM NPV": 0.5652173913043478, + "eval_PRM Precision": 0.8795180722891566, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.5652173913043478, + "eval_loss": 0.3543146252632141, + "eval_runtime": 17.097, + "eval_samples_per_second": 2.574, + "eval_steps_per_second": 0.175, + "step": 3800 + }, + { + "epoch": 0.9791029397559327, + "grad_norm": 0.18620757337239352, + "learning_rate": 1.3262351524864213e-07, + "loss": 0.4272, + "step": 3801 + }, + { + "epoch": 0.9793605306372154, + "grad_norm": 0.1756420655693183, + "learning_rate": 1.2937047863350037e-07, + "loss": 0.2808, + "step": 3802 + }, + { + "epoch": 0.9796181215184983, + "grad_norm": 0.14244491271889062, + "learning_rate": 1.2615778308668892e-07, + "loss": 0.2852, + "step": 3803 + }, + { + "epoch": 0.979875712399781, + "grad_norm": 0.1516494617807825, + "learning_rate": 1.2298543120700113e-07, + "loss": 0.3433, + "step": 3804 + }, + { + "epoch": 0.9801333032810639, + "grad_norm": 0.14925646654192018, + "learning_rate": 1.1985342556060652e-07, + "loss": 0.2899, + "step": 3805 + }, + { + "epoch": 0.9801333032810639, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.8609219486642221, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3538263440132141, + "eval_runtime": 17.8024, + "eval_samples_per_second": 2.472, + "eval_steps_per_second": 0.169, + "step": 3805 + }, + { + "epoch": 0.9803908941623467, + "grad_norm": 0.14432809565504431, + "learning_rate": 1.1676176868102295e-07, + "loss": 0.3247, + "step": 3806 + }, + { + "epoch": 0.9806484850436294, + "grad_norm": 0.17548929482352033, + "learning_rate": 1.1371046306914435e-07, + "loss": 0.3986, + "step": 3807 + }, + { + "epoch": 0.9809060759249123, + "grad_norm": 0.13571487128816537, + "learning_rate": 1.1069951119320188e-07, + "loss": 0.3344, + "step": 3808 + }, + { + "epoch": 0.981163666806195, + "grad_norm": 0.14559430886412886, + "learning_rate": 1.0772891548880282e-07, + "loss": 0.3374, + "step": 3809 + }, + { + "epoch": 0.9814212576874779, + "grad_norm": 0.15790207391145736, + "learning_rate": 1.0479867835891389e-07, + "loss": 0.3819, + "step": 3810 + }, + { + "epoch": 0.9814212576874779, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.85620743844945, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3541814684867859, + "eval_runtime": 17.5142, + "eval_samples_per_second": 2.512, + "eval_steps_per_second": 0.171, + "step": 3810 + }, + { + "epoch": 0.9816788485687606, + "grad_norm": 0.14105410419460254, + "learning_rate": 1.0190880217383347e-07, + "loss": 0.3418, + "step": 3811 + }, + { + "epoch": 0.9819364394500435, + "grad_norm": 0.16032004791537616, + "learning_rate": 9.905928927123609e-08, + "loss": 0.2992, + "step": 3812 + }, + { + "epoch": 0.9821940303313262, + "grad_norm": 0.17342208377038257, + "learning_rate": 9.625014195612236e-08, + "loss": 0.3601, + "step": 3813 + }, + { + "epoch": 0.9824516212126091, + "grad_norm": 0.13949111908780418, + "learning_rate": 9.348136250085793e-08, + "loss": 0.3036, + "step": 3814 + }, + { + "epoch": 0.9827092120938918, + "grad_norm": 0.1604200708844017, + "learning_rate": 9.075295314514565e-08, + "loss": 0.3832, + "step": 3815 + }, + { + "epoch": 0.9827092120938918, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.859350445259298, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3556019067764282, + "eval_runtime": 17.1716, + "eval_samples_per_second": 2.562, + "eval_steps_per_second": 0.175, + "step": 3815 + }, + { + "epoch": 0.9829668029751747, + "grad_norm": 0.15596615102074293, + "learning_rate": 8.806491609603673e-08, + "loss": 0.3538, + "step": 3816 + }, + { + "epoch": 0.9832243938564574, + "grad_norm": 0.1798691547838207, + "learning_rate": 8.541725352791963e-08, + "loss": 0.3654, + "step": 3817 + }, + { + "epoch": 0.9834819847377403, + "grad_norm": 0.16031802102742165, + "learning_rate": 8.280996758253112e-08, + "loss": 0.363, + "step": 3818 + }, + { + "epoch": 0.983739575619023, + "grad_norm": 0.13947517080899272, + "learning_rate": 8.02430603689397e-08, + "loss": 0.315, + "step": 3819 + }, + { + "epoch": 0.9839971665003059, + "grad_norm": 0.16446868867314984, + "learning_rate": 7.77165339635566e-08, + "loss": 0.394, + "step": 3820 + }, + { + "epoch": 0.9839971665003059, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.8619696176008381, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35427024960517883, + "eval_runtime": 17.0867, + "eval_samples_per_second": 2.575, + "eval_steps_per_second": 0.176, + "step": 3820 + }, + { + "epoch": 0.9842547573815887, + "grad_norm": 0.1385861107711507, + "learning_rate": 7.523039041012481e-08, + "loss": 0.3465, + "step": 3821 + }, + { + "epoch": 0.9845123482628715, + "grad_norm": 0.12561988471237037, + "learning_rate": 7.27846317197134e-08, + "loss": 0.3569, + "step": 3822 + }, + { + "epoch": 0.9847699391441543, + "grad_norm": 0.19075340599019874, + "learning_rate": 7.037925987074534e-08, + "loss": 0.324, + "step": 3823 + }, + { + "epoch": 0.9850275300254371, + "grad_norm": 0.1658793046587931, + "learning_rate": 6.801427680894756e-08, + "loss": 0.3466, + "step": 3824 + }, + { + "epoch": 0.9852851209067199, + "grad_norm": 0.1456440297940814, + "learning_rate": 6.568968444739532e-08, + "loss": 0.4245, + "step": 3825 + }, + { + "epoch": 0.9852851209067199, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8862275449101796, + "eval_PRM F1 AUC": 0.7283918281822945, + "eval_PRM F1 AUC (fixed)": 0.8588266107909899, + "eval_PRM F1 Neg": 0.5777777777777777, + "eval_PRM NPV": 0.5909090909090909, + "eval_PRM Precision": 0.8809523809523809, + "eval_PRM Recall": 0.891566265060241, + "eval_PRM Specificty": 0.5652173913043478, + "eval_loss": 0.35573509335517883, + "eval_runtime": 17.1337, + "eval_samples_per_second": 2.568, + "eval_steps_per_second": 0.175, + "step": 3825 + }, + { + "epoch": 0.9855427117880027, + "grad_norm": 0.13283828143284623, + "learning_rate": 6.340548466648443e-08, + "loss": 0.2988, + "step": 3826 + }, + { + "epoch": 0.9858003026692855, + "grad_norm": 0.1839927084008868, + "learning_rate": 6.116167931393691e-08, + "loss": 0.3724, + "step": 3827 + }, + { + "epoch": 0.9860578935505683, + "grad_norm": 0.14082341113721372, + "learning_rate": 5.895827020479527e-08, + "loss": 0.276, + "step": 3828 + }, + { + "epoch": 0.9863154844318511, + "grad_norm": 0.14357908740989736, + "learning_rate": 5.6795259121439345e-08, + "loss": 0.2945, + "step": 3829 + }, + { + "epoch": 0.9865730753131339, + "grad_norm": 0.15451102193521904, + "learning_rate": 5.4672647813547305e-08, + "loss": 0.3223, + "step": 3830 + }, + { + "epoch": 0.9865730753131339, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.8551597695128339, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3560901880264282, + "eval_runtime": 16.8872, + "eval_samples_per_second": 2.606, + "eval_steps_per_second": 0.178, + "step": 3830 + }, + { + "epoch": 0.9868306661944167, + "grad_norm": 0.17445890619553567, + "learning_rate": 5.2590437998134566e-08, + "loss": 0.4237, + "step": 3831 + }, + { + "epoch": 0.9870882570756995, + "grad_norm": 0.1255277707156797, + "learning_rate": 5.054863135953158e-08, + "loss": 0.3201, + "step": 3832 + }, + { + "epoch": 0.9873458479569823, + "grad_norm": 0.1420320075937339, + "learning_rate": 4.8547229549383844e-08, + "loss": 0.4258, + "step": 3833 + }, + { + "epoch": 0.9876034388382652, + "grad_norm": 0.16849844384390825, + "learning_rate": 4.658623418665742e-08, + "loss": 0.3963, + "step": 3834 + }, + { + "epoch": 0.9878610297195479, + "grad_norm": 0.13801399523631844, + "learning_rate": 4.4665646857627864e-08, + "loss": 0.3911, + "step": 3835 + }, + { + "epoch": 0.9878610297195479, + "eval_PRM Accuracy": 0.8301886792452831, + "eval_PRM F1": 0.891566265060241, + "eval_PRM F1 AUC": 0.7501309586170771, + "eval_PRM F1 AUC (fixed)": 0.8619696176008381, + "eval_PRM F1 Neg": 0.6086956521739131, + "eval_PRM NPV": 0.6086956521739131, + "eval_PRM Precision": 0.891566265060241, + "eval_PRM Recall": 0.891566265060241, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35400390625, + "eval_runtime": 17.5443, + "eval_samples_per_second": 2.508, + "eval_steps_per_second": 0.171, + "step": 3835 + }, + { + "epoch": 0.9881186206008308, + "grad_norm": 0.14497941822115065, + "learning_rate": 4.278546911588022e-08, + "loss": 0.337, + "step": 3836 + }, + { + "epoch": 0.9883762114821135, + "grad_norm": 0.17940620321670397, + "learning_rate": 4.094570248232565e-08, + "loss": 0.3965, + "step": 3837 + }, + { + "epoch": 0.9886338023633964, + "grad_norm": 0.13987601268318273, + "learning_rate": 3.9146348445173733e-08, + "loss": 0.3451, + "step": 3838 + }, + { + "epoch": 0.9888913932446791, + "grad_norm": 0.16282875349357964, + "learning_rate": 3.7387408459949035e-08, + "loss": 0.3761, + "step": 3839 + }, + { + "epoch": 0.989148984125962, + "grad_norm": 0.15513189177962927, + "learning_rate": 3.566888394948009e-08, + "loss": 0.3252, + "step": 3840 + }, + { + "epoch": 0.989148984125962, + "eval_PRM Accuracy": 0.8113207547169812, + "eval_PRM F1": 0.8780487804878049, + "eval_PRM F1 AUC": 0.7380827658459927, + "eval_PRM F1 AUC (fixed)": 0.8624934520691462, + "eval_PRM F1 Neg": 0.5833333333333334, + "eval_PRM NPV": 0.56, + "eval_PRM Precision": 0.8888888888888888, + "eval_PRM Recall": 0.8674698795180723, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3528497815132141, + "eval_runtime": 16.8796, + "eval_samples_per_second": 2.607, + "eval_steps_per_second": 0.178, + "step": 3840 + }, + { + "epoch": 0.9894065750072447, + "grad_norm": 0.1551213359199816, + "learning_rate": 3.3990776303910456e-08, + "loss": 0.3912, + "step": 3841 + }, + { + "epoch": 0.9896641658885276, + "grad_norm": 0.15981647033788135, + "learning_rate": 3.235308688068206e-08, + "loss": 0.3977, + "step": 3842 + }, + { + "epoch": 0.9899217567698103, + "grad_norm": 0.16522435580180106, + "learning_rate": 3.075581700454633e-08, + "loss": 0.2958, + "step": 3843 + }, + { + "epoch": 0.9901793476510932, + "grad_norm": 0.23521488165578452, + "learning_rate": 2.919896796755861e-08, + "loss": 0.4202, + "step": 3844 + }, + { + "epoch": 0.9904369385323759, + "grad_norm": 0.1904301992468953, + "learning_rate": 2.768254102907819e-08, + "loss": 0.4, + "step": 3845 + }, + { + "epoch": 0.9904369385323759, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.858302776322682, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35178443789482117, + "eval_runtime": 17.4583, + "eval_samples_per_second": 2.52, + "eval_steps_per_second": 0.172, + "step": 3845 + }, + { + "epoch": 0.9906945294136588, + "grad_norm": 0.1464762142939478, + "learning_rate": 2.6206537415762733e-08, + "loss": 0.3524, + "step": 3846 + }, + { + "epoch": 0.9909521202949415, + "grad_norm": 0.16020884252536802, + "learning_rate": 2.4770958321568283e-08, + "loss": 0.4248, + "step": 3847 + }, + { + "epoch": 0.9912097111762244, + "grad_norm": 0.11633653027179804, + "learning_rate": 2.337580490776592e-08, + "loss": 0.246, + "step": 3848 + }, + { + "epoch": 0.9914673020575072, + "grad_norm": 0.14268793767622032, + "learning_rate": 2.2021078302902897e-08, + "loss": 0.339, + "step": 3849 + }, + { + "epoch": 0.99172489293879, + "grad_norm": 0.1373486811631178, + "learning_rate": 2.070677960284706e-08, + "loss": 0.3918, + "step": 3850 + }, + { + "epoch": 0.99172489293879, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.859874279727606, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35293856263160706, + "eval_runtime": 17.4584, + "eval_samples_per_second": 2.52, + "eval_steps_per_second": 0.172, + "step": 3850 + }, + { + "epoch": 0.9919824838200728, + "grad_norm": 0.12419707771509048, + "learning_rate": 1.9432909870747974e-08, + "loss": 0.3071, + "step": 3851 + }, + { + "epoch": 0.9922400747013556, + "grad_norm": 0.14396651708514002, + "learning_rate": 1.819947013705914e-08, + "loss": 0.3771, + "step": 3852 + }, + { + "epoch": 0.9924976655826384, + "grad_norm": 0.16279186864950249, + "learning_rate": 1.7006461399532436e-08, + "loss": 0.3324, + "step": 3853 + }, + { + "epoch": 0.9927552564639212, + "grad_norm": 0.14552583560564572, + "learning_rate": 1.5853884623195925e-08, + "loss": 0.306, + "step": 3854 + }, + { + "epoch": 0.993012847345204, + "grad_norm": 0.14857715616726758, + "learning_rate": 1.4741740740398246e-08, + "loss": 0.3462, + "step": 3855 + }, + { + "epoch": 0.993012847345204, + "eval_PRM Accuracy": 0.8113207547169812, + "eval_PRM F1": 0.8780487804878049, + "eval_PRM F1 AUC": 0.7380827658459927, + "eval_PRM F1 AUC (fixed)": 0.8611838658983761, + "eval_PRM F1 Neg": 0.5833333333333334, + "eval_PRM NPV": 0.56, + "eval_PRM Precision": 0.8888888888888888, + "eval_PRM Recall": 0.8674698795180723, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35262784361839294, + "eval_runtime": 16.7981, + "eval_samples_per_second": 2.619, + "eval_steps_per_second": 0.179, + "step": 3855 + }, + { + "epoch": 0.9932704382264868, + "grad_norm": 0.11861395561824115, + "learning_rate": 1.367003065076422e-08, + "loss": 0.2956, + "step": 3856 + }, + { + "epoch": 0.9935280291077696, + "grad_norm": 0.18274494107078726, + "learning_rate": 1.2638755221217047e-08, + "loss": 0.3989, + "step": 3857 + }, + { + "epoch": 0.9937856199890523, + "grad_norm": 0.15911968143770888, + "learning_rate": 1.1647915285967204e-08, + "loss": 0.4393, + "step": 3858 + }, + { + "epoch": 0.9940432108703352, + "grad_norm": 0.19947833891807695, + "learning_rate": 1.069751164652355e-08, + "loss": 0.4774, + "step": 3859 + }, + { + "epoch": 0.994300801751618, + "grad_norm": 0.1792602435414376, + "learning_rate": 9.787545071676674e-09, + "loss": 0.4409, + "step": 3860 + }, + { + "epoch": 0.994300801751618, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.8596123624934521, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.3546697497367859, + "eval_runtime": 16.8421, + "eval_samples_per_second": 2.612, + "eval_steps_per_second": 0.178, + "step": 3860 + }, + { + "epoch": 0.9945583926329008, + "grad_norm": 0.16676315030772298, + "learning_rate": 8.918016297515541e-09, + "loss": 0.3873, + "step": 3861 + }, + { + "epoch": 0.9948159835141835, + "grad_norm": 0.1537904133890653, + "learning_rate": 8.088926027410848e-09, + "loss": 0.3587, + "step": 3862 + }, + { + "epoch": 0.9950735743954664, + "grad_norm": 0.14157889090639966, + "learning_rate": 7.30027493203167e-09, + "loss": 0.3607, + "step": 3863 + }, + { + "epoch": 0.9953311652767493, + "grad_norm": 0.19332578555591137, + "learning_rate": 6.552063649323259e-09, + "loss": 0.4088, + "step": 3864 + }, + { + "epoch": 0.995588756158032, + "grad_norm": 0.13129590984808737, + "learning_rate": 5.844292784523697e-09, + "loss": 0.3371, + "step": 3865 + }, + { + "epoch": 0.995588756158032, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.85751702462022, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35520240664482117, + "eval_runtime": 17.1042, + "eval_samples_per_second": 2.572, + "eval_steps_per_second": 0.175, + "step": 3865 + }, + { + "epoch": 0.9958463470393148, + "grad_norm": 0.1591974055243455, + "learning_rate": 5.176962910163896e-09, + "loss": 0.302, + "step": 3866 + }, + { + "epoch": 0.9961039379205976, + "grad_norm": 0.13232354053669065, + "learning_rate": 4.5500745660509435e-09, + "loss": 0.2751, + "step": 3867 + }, + { + "epoch": 0.9963615288018804, + "grad_norm": 0.19267965536010473, + "learning_rate": 3.963628259290308e-09, + "loss": 0.4389, + "step": 3868 + }, + { + "epoch": 0.9966191196831632, + "grad_norm": 0.22194226750555665, + "learning_rate": 3.4176244642636355e-09, + "loss": 0.4047, + "step": 3869 + }, + { + "epoch": 0.996876710564446, + "grad_norm": 0.1667614023688519, + "learning_rate": 2.912063622639849e-09, + "loss": 0.3986, + "step": 3870 + }, + { + "epoch": 0.996876710564446, + "eval_PRM Accuracy": 0.8301886792452831, + "eval_PRM F1": 0.891566265060241, + "eval_PRM F1 AUC": 0.7501309586170771, + "eval_PRM F1 AUC (fixed)": 0.8643268727082242, + "eval_PRM F1 Neg": 0.6086956521739131, + "eval_PRM NPV": 0.6086956521739131, + "eval_PRM Precision": 0.891566265060241, + "eval_PRM Recall": 0.891566265060241, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35129615664482117, + "eval_runtime": 16.8712, + "eval_samples_per_second": 2.608, + "eval_steps_per_second": 0.178, + "step": 3870 + }, + { + "epoch": 0.9971343014457288, + "grad_norm": 0.1728700765894953, + "learning_rate": 2.4469461433751507e-09, + "loss": 0.3819, + "step": 3871 + }, + { + "epoch": 0.9973918923270116, + "grad_norm": 0.21291593566328246, + "learning_rate": 2.0222724027074703e-09, + "loss": 0.3915, + "step": 3872 + }, + { + "epoch": 0.9976494832082944, + "grad_norm": 0.1550645695952983, + "learning_rate": 1.6380427441731184e-09, + "loss": 0.3743, + "step": 3873 + }, + { + "epoch": 0.9979070740895772, + "grad_norm": 0.166816058084006, + "learning_rate": 1.2942574785623772e-09, + "loss": 0.3438, + "step": 3874 + }, + { + "epoch": 0.99816466497086, + "grad_norm": 0.18251919599287586, + "learning_rate": 9.90916883986115e-10, + "loss": 0.2944, + "step": 3875 + }, + { + "epoch": 0.99816466497086, + "eval_PRM Accuracy": 0.8207547169811321, + "eval_PRM F1": 0.8848484848484849, + "eval_PRM F1 AUC": 0.7441068622315348, + "eval_PRM F1 AUC (fixed)": 0.863017286537454, + "eval_PRM F1 Neg": 0.5957446808510638, + "eval_PRM NPV": 0.5833333333333334, + "eval_PRM Precision": 0.8902439024390244, + "eval_PRM Recall": 0.8795180722891566, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35129615664482117, + "eval_runtime": 16.9038, + "eval_samples_per_second": 2.603, + "eval_steps_per_second": 0.177, + "step": 3875 + }, + { + "epoch": 0.9984222558521428, + "grad_norm": 0.15290702113417942, + "learning_rate": 7.280212058091706e-10, + "loss": 0.2959, + "step": 3876 + }, + { + "epoch": 0.9986798467334257, + "grad_norm": 0.14057128974127117, + "learning_rate": 5.055706566947649e-10, + "loss": 0.4001, + "step": 3877 + }, + { + "epoch": 0.9989374376147084, + "grad_norm": 0.14824828438814447, + "learning_rate": 3.2356541658784546e-10, + "loss": 0.3468, + "step": 3878 + }, + { + "epoch": 0.9991950284959913, + "grad_norm": 0.15676207191010466, + "learning_rate": 1.8200563271508765e-10, + "loss": 0.4174, + "step": 3879 + }, + { + "epoch": 0.999452619377274, + "grad_norm": 0.13897661504636863, + "learning_rate": 8.08914195793431e-11, + "loss": 0.342, + "step": 3880 + }, + { + "epoch": 0.999452619377274, + "eval_PRM Accuracy": 0.8301886792452831, + "eval_PRM F1": 0.891566265060241, + "eval_PRM F1 AUC": 0.7501309586170771, + "eval_PRM F1 AUC (fixed)": 0.859088528025144, + "eval_PRM F1 Neg": 0.6086956521739131, + "eval_PRM NPV": 0.6086956521739131, + "eval_PRM Precision": 0.891566265060241, + "eval_PRM Recall": 0.891566265060241, + "eval_PRM Specificty": 0.6086956521739131, + "eval_loss": 0.35400390625, + "eval_runtime": 17.0944, + "eval_samples_per_second": 2.574, + "eval_steps_per_second": 0.175, + "step": 3880 + }, + { + "epoch": 0.9997102102585569, + "grad_norm": 0.18222748366726835, + "learning_rate": 2.0222858987395398e-11, + "loss": 0.2709, + "step": 3881 + }, + { + "epoch": 0.9999678011398396, + "grad_norm": 0.175363916804034, + "learning_rate": 0.0, + "loss": 0.4232, + "step": 3882 } ], "logging_steps": 1, @@ -36438,12 +40404,12 @@ "should_evaluate": false, "should_log": false, "should_save": true, - "should_training_stop": false + "should_training_stop": true }, "attributes": {} } }, - "total_flos": 7639333596430336.0, + "total_flos": 8470762821648384.0, "train_batch_size": 4, "trial_name": null, "trial_params": null