{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.986666666666667, "eval_steps": 500, "global_step": 315, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.047407407407407405, "grad_norm": 1808.0, "learning_rate": 7.8125e-06, "log_odds_chosen": -1.008344054222107, "log_odds_ratio": -10.17955493927002, "logps/chosen": -21.696313858032227, "logps/rejected": -20.68819808959961, "loss": 736.0327, "nll_loss": 9.675058364868164, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -4.339262962341309, "rewards/margins": -0.20162281394004822, "rewards/rejected": -4.137639999389648, "step": 5 }, { "epoch": 0.09481481481481481, "grad_norm": 1256.0, "learning_rate": 1.5625e-05, "log_odds_chosen": -2.8046412467956543, "log_odds_ratio": -10.168278694152832, "logps/chosen": -21.001956939697266, "logps/rejected": -18.19767951965332, "loss": 738.0496, "nll_loss": 8.701889038085938, "rewards/accuracies": 0.4468750059604645, "rewards/chosen": -4.200392246246338, "rewards/margins": -0.5608552694320679, "rewards/rejected": -3.6395363807678223, "step": 10 }, { "epoch": 0.14222222222222222, "grad_norm": 5440.0, "learning_rate": 2.34375e-05, "log_odds_chosen": -1.2525489330291748, "log_odds_ratio": -10.522050857543945, "logps/chosen": -21.009998321533203, "logps/rejected": -19.756052017211914, "loss": 763.5602, "nll_loss": 8.136326789855957, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -4.201999664306641, "rewards/margins": -0.2507893443107605, "rewards/rejected": -3.9512104988098145, "step": 15 }, { "epoch": 0.18962962962962962, "grad_norm": 6496.0, "learning_rate": 3.125e-05, "log_odds_chosen": -4.4795145988464355, "log_odds_ratio": -9.770153999328613, "logps/chosen": -18.06368064880371, "logps/rejected": -13.585962295532227, "loss": 685.1017, "nll_loss": 7.039858818054199, "rewards/accuracies": 0.4375, "rewards/chosen": -3.612736463546753, "rewards/margins": -0.8955442309379578, "rewards/rejected": -2.7171921730041504, "step": 20 }, { "epoch": 0.23703703703703705, "grad_norm": 460.0, "learning_rate": 3.90625e-05, "log_odds_chosen": -0.08603362739086151, "log_odds_ratio": -2.429269313812256, "logps/chosen": -5.611455917358398, "logps/rejected": -5.512633323669434, "loss": 191.7091, "nll_loss": 3.994724988937378, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -1.1222912073135376, "rewards/margins": -0.019764503464102745, "rewards/rejected": -1.1025266647338867, "step": 25 }, { "epoch": 0.28444444444444444, "grad_norm": 290.0, "learning_rate": 4.6875e-05, "log_odds_chosen": 0.013890685513615608, "log_odds_ratio": -0.9068824052810669, "logps/chosen": -1.946455717086792, "logps/rejected": -1.9621555805206299, "loss": 62.3461, "nll_loss": 2.7580060958862305, "rewards/accuracies": 0.5, "rewards/chosen": -0.3892911374568939, "rewards/margins": 0.003139972686767578, "rewards/rejected": -0.3924311101436615, "step": 30 }, { "epoch": 0.33185185185185184, "grad_norm": 156.0, "learning_rate": 4.998613757348784e-05, "log_odds_chosen": 0.1850312501192093, "log_odds_ratio": -0.8029718399047852, "logps/chosen": -1.6453851461410522, "logps/rejected": -1.8108527660369873, "loss": 52.5707, "nll_loss": 2.74991512298584, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.32907700538635254, "rewards/margins": 0.03309354558587074, "rewards/rejected": -0.362170547246933, "step": 35 }, { "epoch": 0.37925925925925924, "grad_norm": 434.0, "learning_rate": 4.990147841143462e-05, "log_odds_chosen": 0.26716217398643494, "log_odds_ratio": -0.7125700116157532, "logps/chosen": -1.4205152988433838, "logps/rejected": -1.643204927444458, "loss": 45.4019, "nll_loss": 2.5210635662078857, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.28410303592681885, "rewards/margins": 0.04453796148300171, "rewards/rejected": -0.32864099740982056, "step": 40 }, { "epoch": 0.4266666666666667, "grad_norm": 149.0, "learning_rate": 4.97401218720448e-05, "log_odds_chosen": 0.1843370497226715, "log_odds_ratio": -0.7205697894096375, "logps/chosen": -1.357447862625122, "logps/rejected": -1.5186808109283447, "loss": 43.4227, "nll_loss": 2.3052825927734375, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.27148956060409546, "rewards/margins": 0.03224659711122513, "rewards/rejected": -0.30373615026474, "step": 45 }, { "epoch": 0.4740740740740741, "grad_norm": 89.5, "learning_rate": 4.9502564938797946e-05, "log_odds_chosen": 0.21526531875133514, "log_odds_ratio": -0.7007580995559692, "logps/chosen": -1.2479262351989746, "logps/rejected": -1.4284145832061768, "loss": 39.9249, "nll_loss": 2.381633996963501, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.24958527088165283, "rewards/margins": 0.036097653210163116, "rewards/rejected": -0.28568291664123535, "step": 50 }, { "epoch": 0.5214814814814814, "grad_norm": 127.0, "learning_rate": 4.918953929490768e-05, "log_odds_chosen": 0.19697749614715576, "log_odds_ratio": -0.7165523171424866, "logps/chosen": -1.2462198734283447, "logps/rejected": -1.4008221626281738, "loss": 39.8666, "nll_loss": 2.3082547187805176, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.24924394488334656, "rewards/margins": 0.03092046082019806, "rewards/rejected": -0.2801644206047058, "step": 55 }, { "epoch": 0.5688888888888889, "grad_norm": 258.0, "learning_rate": 4.88020090697132e-05, "log_odds_chosen": 0.19977203011512756, "log_odds_ratio": -0.6954725384712219, "logps/chosen": -1.2253552675247192, "logps/rejected": -1.3942426443099976, "loss": 39.1969, "nll_loss": 2.4659817218780518, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.24507102370262146, "rewards/margins": 0.03377751260995865, "rewards/rejected": -0.2788485586643219, "step": 60 }, { "epoch": 0.6162962962962963, "grad_norm": 98.0, "learning_rate": 4.834116786912897e-05, "log_odds_chosen": 0.24036189913749695, "log_odds_ratio": -0.67494797706604, "logps/chosen": -1.202803373336792, "logps/rejected": -1.386717438697815, "loss": 38.4748, "nll_loss": 2.561748504638672, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.24056068062782288, "rewards/margins": 0.03678280860185623, "rewards/rejected": -0.2773435115814209, "step": 65 }, { "epoch": 0.6637037037037037, "grad_norm": 156.0, "learning_rate": 4.7808435099299045e-05, "log_odds_chosen": 0.2758210301399231, "log_odds_ratio": -0.6614188551902771, "logps/chosen": -1.177128553390503, "logps/rejected": -1.3960373401641846, "loss": 37.6547, "nll_loss": 2.348580837249756, "rewards/accuracies": 0.628125011920929, "rewards/chosen": -0.23542571067810059, "rewards/margins": 0.043781764805316925, "rewards/rejected": -0.2792074978351593, "step": 70 }, { "epoch": 0.7111111111111111, "grad_norm": 296.0, "learning_rate": 4.720545159477922e-05, "log_odds_chosen": 0.283970445394516, "log_odds_ratio": -0.6692668199539185, "logps/chosen": -1.140987515449524, "logps/rejected": -1.3577347993850708, "loss": 36.4491, "nll_loss": 2.3564374446868896, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.22819750010967255, "rewards/margins": 0.04334944486618042, "rewards/rejected": -0.2715469300746918, "step": 75 }, { "epoch": 0.7585185185185185, "grad_norm": 110.0, "learning_rate": 4.653407456471222e-05, "log_odds_chosen": 0.16520562767982483, "log_odds_ratio": -0.7094644904136658, "logps/chosen": -1.1379332542419434, "logps/rejected": -1.269300103187561, "loss": 36.4075, "nll_loss": 2.159475803375244, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.22758665680885315, "rewards/margins": 0.026273369789123535, "rewards/rejected": -0.2538600564002991, "step": 80 }, { "epoch": 0.8059259259259259, "grad_norm": 164.0, "learning_rate": 4.579637187256222e-05, "log_odds_chosen": 0.16680458188056946, "log_odds_ratio": -0.691378653049469, "logps/chosen": -1.1199719905853271, "logps/rejected": -1.2509021759033203, "loss": 35.8328, "nll_loss": 2.1975584030151367, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.2239944040775299, "rewards/margins": 0.02618604339659214, "rewards/rejected": -0.2501804232597351, "step": 85 }, { "epoch": 0.8533333333333334, "grad_norm": 65.5, "learning_rate": 4.499461566702685e-05, "log_odds_chosen": 0.25345996022224426, "log_odds_ratio": -0.6629332900047302, "logps/chosen": -1.052328109741211, "logps/rejected": -1.2316312789916992, "loss": 33.6699, "nll_loss": 2.1638712882995605, "rewards/accuracies": 0.625, "rewards/chosen": -0.21046562492847443, "rewards/margins": 0.03586065024137497, "rewards/rejected": -0.2463262975215912, "step": 90 }, { "epoch": 0.9007407407407407, "grad_norm": 78.0, "learning_rate": 4.413127538374411e-05, "log_odds_chosen": 0.13198286294937134, "log_odds_ratio": -0.7187220454216003, "logps/chosen": -1.1173431873321533, "logps/rejected": -1.2194410562515259, "loss": 35.7508, "nll_loss": 2.091909170150757, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.223468616604805, "rewards/margins": 0.020419595763087273, "rewards/rejected": -0.24388821423053741, "step": 95 }, { "epoch": 0.9481481481481482, "grad_norm": 66.0, "learning_rate": 4.320901013934887e-05, "log_odds_chosen": 0.233420729637146, "log_odds_ratio": -0.6643728017807007, "logps/chosen": -1.0819002389907837, "logps/rejected": -1.2531683444976807, "loss": 34.6098, "nll_loss": 2.0589497089385986, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.2163800448179245, "rewards/margins": 0.03425363451242447, "rewards/rejected": -0.2506336569786072, "step": 100 }, { "epoch": 0.9955555555555555, "grad_norm": 152.0, "learning_rate": 4.223066054130568e-05, "log_odds_chosen": 0.25719505548477173, "log_odds_ratio": -0.6791940927505493, "logps/chosen": -1.0752637386322021, "logps/rejected": -1.2887135744094849, "loss": 34.3747, "nll_loss": 2.0260989665985107, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.2150527536869049, "rewards/margins": 0.04268994182348251, "rewards/rejected": -0.257742702960968, "step": 105 }, { "epoch": 1.0429629629629629, "grad_norm": 57.5, "learning_rate": 4.1199239938743797e-05, "log_odds_chosen": 0.5201781988143921, "log_odds_ratio": -0.564489483833313, "logps/chosen": -0.9281116724014282, "logps/rejected": -1.2665040493011475, "loss": 29.6815, "nll_loss": 1.8919875621795654, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.18562233448028564, "rewards/margins": 0.06767849624156952, "rewards/rejected": -0.25330081582069397, "step": 110 }, { "epoch": 1.0903703703703704, "grad_norm": 69.5, "learning_rate": 4.0117925141242174e-05, "log_odds_chosen": 0.7453327178955078, "log_odds_ratio": -0.5092401504516602, "logps/chosen": -0.8455835580825806, "logps/rejected": -1.3220586776733398, "loss": 26.9978, "nll_loss": 2.0071778297424316, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.16911670565605164, "rewards/margins": 0.09529503434896469, "rewards/rejected": -0.2644117474555969, "step": 115 }, { "epoch": 1.1377777777777778, "grad_norm": 114.5, "learning_rate": 3.899004663415084e-05, "log_odds_chosen": 0.6257216334342957, "log_odds_ratio": -0.525432288646698, "logps/chosen": -0.8546761274337769, "logps/rejected": -1.2405774593353271, "loss": 27.3343, "nll_loss": 2.060844898223877, "rewards/accuracies": 0.753125011920929, "rewards/chosen": -0.1709352433681488, "rewards/margins": 0.07718025892972946, "rewards/rejected": -0.24811549484729767, "step": 120 }, { "epoch": 1.1851851851851851, "grad_norm": 202.0, "learning_rate": 3.781907832058587e-05, "log_odds_chosen": 0.6118601560592651, "log_odds_ratio": -0.5294589996337891, "logps/chosen": -0.907837986946106, "logps/rejected": -1.2936393022537231, "loss": 29.0283, "nll_loss": 1.9723001718521118, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.18156759440898895, "rewards/margins": 0.07716026157140732, "rewards/rejected": -0.25872787833213806, "step": 125 }, { "epoch": 1.2325925925925927, "grad_norm": 47.25, "learning_rate": 3.660862682169282e-05, "log_odds_chosen": 0.7508286237716675, "log_odds_ratio": -0.5130770206451416, "logps/chosen": -0.8467851877212524, "logps/rejected": -1.3315311670303345, "loss": 26.9969, "nll_loss": 1.963587760925293, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.169357031583786, "rewards/margins": 0.09694920480251312, "rewards/rejected": -0.26630622148513794, "step": 130 }, { "epoch": 1.28, "grad_norm": 219.0, "learning_rate": 3.5362420368134356e-05, "log_odds_chosen": 0.6324235796928406, "log_odds_ratio": -0.5143457055091858, "logps/chosen": -0.8541671633720398, "logps/rejected": -1.2207109928131104, "loss": 27.3238, "nll_loss": 1.9547522068023682, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.17083343863487244, "rewards/margins": 0.07330875098705292, "rewards/rejected": -0.24414214491844177, "step": 135 }, { "epoch": 1.3274074074074074, "grad_norm": 219.0, "learning_rate": 3.408429731701635e-05, "log_odds_chosen": 0.7118546366691589, "log_odds_ratio": -0.5201038122177124, "logps/chosen": -0.8679434061050415, "logps/rejected": -1.3246322870254517, "loss": 27.7072, "nll_loss": 1.9330047369003296, "rewards/accuracies": 0.753125011920929, "rewards/chosen": -0.17358867824077606, "rewards/margins": 0.09133778512477875, "rewards/rejected": -0.2649264931678772, "step": 140 }, { "epoch": 1.374814814814815, "grad_norm": 66.0, "learning_rate": 3.2778194329621104e-05, "log_odds_chosen": 0.6516977548599243, "log_odds_ratio": -0.5285124778747559, "logps/chosen": -0.8772061467170715, "logps/rejected": -1.2873995304107666, "loss": 28.0346, "nll_loss": 2.029435873031616, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -0.17544123530387878, "rewards/margins": 0.08203869313001633, "rewards/rejected": -0.2574799358844757, "step": 145 }, { "epoch": 1.4222222222222223, "grad_norm": 162.0, "learning_rate": 3.144813424636031e-05, "log_odds_chosen": 0.7266349792480469, "log_odds_ratio": -0.5039714574813843, "logps/chosen": -0.8121232986450195, "logps/rejected": -1.273530125617981, "loss": 25.94, "nll_loss": 2.0966086387634277, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.16242465376853943, "rewards/margins": 0.09228137135505676, "rewards/rejected": -0.2547060549259186, "step": 150 }, { "epoch": 1.4696296296296296, "grad_norm": 143.0, "learning_rate": 3.0098213696293542e-05, "log_odds_chosen": 0.650794506072998, "log_odds_ratio": -0.5278457403182983, "logps/chosen": -0.8365495800971985, "logps/rejected": -1.2551826238632202, "loss": 26.7323, "nll_loss": 2.240175485610962, "rewards/accuracies": 0.734375, "rewards/chosen": -0.16730991005897522, "rewards/margins": 0.08372663706541061, "rewards/rejected": -0.25103655457496643, "step": 155 }, { "epoch": 1.5170370370370372, "grad_norm": 77.5, "learning_rate": 2.8732590479375165e-05, "log_odds_chosen": 0.6227356195449829, "log_odds_ratio": -0.5516515970230103, "logps/chosen": -0.828398585319519, "logps/rejected": -1.2222946882247925, "loss": 26.4676, "nll_loss": 2.183659076690674, "rewards/accuracies": 0.690625011920929, "rewards/chosen": -0.1656797230243683, "rewards/margins": 0.0787791982293129, "rewards/rejected": -0.24445891380310059, "step": 160 }, { "epoch": 1.5644444444444443, "grad_norm": 63.5, "learning_rate": 2.7355470760292956e-05, "log_odds_chosen": 0.7487412691116333, "log_odds_ratio": -0.49377554655075073, "logps/chosen": -0.7827351689338684, "logps/rejected": -1.2390353679656982, "loss": 25.0132, "nll_loss": 2.1968765258789062, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.15654703974723816, "rewards/margins": 0.09126004576683044, "rewards/rejected": -0.2478071004152298, "step": 165 }, { "epoch": 1.6118518518518519, "grad_norm": 81.0, "learning_rate": 2.597109611334169e-05, "log_odds_chosen": 0.6999706029891968, "log_odds_ratio": -0.514404833316803, "logps/chosen": -0.8419672846794128, "logps/rejected": -1.2977701425552368, "loss": 26.9121, "nll_loss": 2.095829963684082, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.16839346289634705, "rewards/margins": 0.09116056561470032, "rewards/rejected": -0.25955405831336975, "step": 170 }, { "epoch": 1.6592592592592592, "grad_norm": 60.5, "learning_rate": 2.458373045823404e-05, "log_odds_chosen": 0.7051068544387817, "log_odds_ratio": -0.5076509714126587, "logps/chosen": -0.8126438856124878, "logps/rejected": -1.2429084777832031, "loss": 25.972, "nll_loss": 2.211460828781128, "rewards/accuracies": 0.7406250238418579, "rewards/chosen": -0.16252879798412323, "rewards/margins": 0.08605290949344635, "rewards/rejected": -0.2485816925764084, "step": 175 }, { "epoch": 1.7066666666666666, "grad_norm": 68.0, "learning_rate": 2.3197646927086697e-05, "log_odds_chosen": 0.6526662707328796, "log_odds_ratio": -0.5229703783988953, "logps/chosen": -0.8144344091415405, "logps/rejected": -1.2128846645355225, "loss": 26.0409, "nll_loss": 2.0978920459747314, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.16288688778877258, "rewards/margins": 0.07969003915786743, "rewards/rejected": -0.24257692694664001, "step": 180 }, { "epoch": 1.7540740740740741, "grad_norm": 61.0, "learning_rate": 2.1817114703032176e-05, "log_odds_chosen": 0.5944602489471436, "log_odds_ratio": -0.5618599057197571, "logps/chosen": -0.8705334663391113, "logps/rejected": -1.2557179927825928, "loss": 27.8395, "nll_loss": 1.9792810678482056, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": -0.1741066873073578, "rewards/margins": 0.07703690975904465, "rewards/rejected": -0.25114360451698303, "step": 185 }, { "epoch": 1.8014814814814815, "grad_norm": 97.0, "learning_rate": 2.0446385870993467e-05, "log_odds_chosen": 0.6862818002700806, "log_odds_ratio": -0.5245167016983032, "logps/chosen": -0.8145660161972046, "logps/rejected": -1.232753038406372, "loss": 26.0206, "nll_loss": 2.093113422393799, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.16291318833827972, "rewards/margins": 0.08363740146160126, "rewards/rejected": -0.24655060470104218, "step": 190 }, { "epoch": 1.8488888888888888, "grad_norm": 50.0, "learning_rate": 1.9089682321121834e-05, "log_odds_chosen": 0.7545720338821411, "log_odds_ratio": -0.48210686445236206, "logps/chosen": -0.8307647705078125, "logps/rejected": -1.3077232837677002, "loss": 26.5505, "nll_loss": 2.2815651893615723, "rewards/accuracies": 0.746874988079071, "rewards/chosen": -0.1661529690027237, "rewards/margins": 0.09539170563220978, "rewards/rejected": -0.26154467463493347, "step": 195 }, { "epoch": 1.8962962962962964, "grad_norm": 65.0, "learning_rate": 1.775118274523545e-05, "log_odds_chosen": 0.6288995742797852, "log_odds_ratio": -0.5331851840019226, "logps/chosen": -0.8431414365768433, "logps/rejected": -1.233559489250183, "loss": 26.946, "nll_loss": 2.2366058826446533, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.16862830519676208, "rewards/margins": 0.0780835822224617, "rewards/rejected": -0.24671189486980438, "step": 200 }, { "epoch": 1.9437037037037037, "grad_norm": 56.0, "learning_rate": 1.643500976631037e-05, "log_odds_chosen": 0.6592320203781128, "log_odds_ratio": -0.5180245041847229, "logps/chosen": -0.8212572336196899, "logps/rejected": -1.2188732624053955, "loss": 26.2562, "nll_loss": 2.1104683876037598, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.16425147652626038, "rewards/margins": 0.07952319085597992, "rewards/rejected": -0.2437746524810791, "step": 205 }, { "epoch": 1.991111111111111, "grad_norm": 63.5, "learning_rate": 1.514521724066537e-05, "log_odds_chosen": 0.536708414554596, "log_odds_ratio": -0.5566378235816956, "logps/chosen": -0.8468238115310669, "logps/rejected": -1.1587189435958862, "loss": 27.0896, "nll_loss": 2.173583745956421, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16936475038528442, "rewards/margins": 0.06237905099987984, "rewards/rejected": -0.23174378275871277, "step": 210 }, { "epoch": 2.0385185185185186, "grad_norm": 36.75, "learning_rate": 1.3885777771950348e-05, "log_odds_chosen": 1.0355523824691772, "log_odds_ratio": -0.41216397285461426, "logps/chosen": -0.6247184872627258, "logps/rejected": -1.1848514080047607, "loss": 19.9299, "nll_loss": 2.0519230365753174, "rewards/accuracies": 0.8125, "rewards/chosen": -0.12494368851184845, "rewards/margins": 0.11202657222747803, "rewards/rejected": -0.23697027564048767, "step": 215 }, { "epoch": 2.0859259259259257, "grad_norm": 45.75, "learning_rate": 1.2660570475395683e-05, "log_odds_chosen": 1.188058614730835, "log_odds_ratio": -0.3877725303173065, "logps/chosen": -0.6814132332801819, "logps/rejected": -1.3385488986968994, "loss": 21.7134, "nll_loss": 2.186136245727539, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.13628263771533966, "rewards/margins": 0.13142715394496918, "rewards/rejected": -0.26770979166030884, "step": 220 }, { "epoch": 2.1333333333333333, "grad_norm": 69.0, "learning_rate": 1.1473369030008974e-05, "log_odds_chosen": 1.3558170795440674, "log_odds_ratio": -0.3460015654563904, "logps/chosen": -0.6204769015312195, "logps/rejected": -1.3810127973556519, "loss": 19.7129, "nll_loss": 2.1640377044677734, "rewards/accuracies": 0.8656250238418579, "rewards/chosen": -0.1240953654050827, "rewards/margins": 0.1521071493625641, "rewards/rejected": -0.27620255947113037, "step": 225 }, { "epoch": 2.180740740740741, "grad_norm": 60.5, "learning_rate": 1.0327830055518842e-05, "log_odds_chosen": 1.3206228017807007, "log_odds_ratio": -0.3496930003166199, "logps/chosen": -0.6283946633338928, "logps/rejected": -1.3481276035308838, "loss": 20.0634, "nll_loss": 2.163454532623291, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.1256789267063141, "rewards/margins": 0.1439466029405594, "rewards/rejected": -0.2696255147457123, "step": 230 }, { "epoch": 2.228148148148148, "grad_norm": 46.25, "learning_rate": 9.227481849865235e-06, "log_odds_chosen": 1.3826463222503662, "log_odds_ratio": -0.34246888756752014, "logps/chosen": -0.601762056350708, "logps/rejected": -1.3406898975372314, "loss": 19.1994, "nll_loss": 2.1239700317382812, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.12035240978002548, "rewards/margins": 0.14778557419776917, "rewards/rejected": -0.26813799142837524, "step": 235 }, { "epoch": 2.2755555555555556, "grad_norm": 52.25, "learning_rate": 8.175713521924978e-06, "log_odds_chosen": 1.3184112310409546, "log_odds_ratio": -0.36138203740119934, "logps/chosen": -0.626305878162384, "logps/rejected": -1.3914432525634766, "loss": 19.7327, "nll_loss": 2.0872886180877686, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.12526118755340576, "rewards/margins": 0.1530275046825409, "rewards/rejected": -0.27828869223594666, "step": 240 }, { "epoch": 2.322962962962963, "grad_norm": 47.5, "learning_rate": 7.1757645529443665e-06, "log_odds_chosen": 1.342882513999939, "log_odds_ratio": -0.33094173669815063, "logps/chosen": -0.5907199382781982, "logps/rejected": -1.3384162187576294, "loss": 18.8515, "nll_loss": 2.086435556411743, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.11814399063587189, "rewards/margins": 0.1495392769575119, "rewards/rejected": -0.2676832377910614, "step": 245 }, { "epoch": 2.3703703703703702, "grad_norm": 48.0, "learning_rate": 6.230714818829733e-06, "log_odds_chosen": 1.4440391063690186, "log_odds_ratio": -0.3371729254722595, "logps/chosen": -0.5727447867393494, "logps/rejected": -1.3450387716293335, "loss": 17.9915, "nll_loss": 2.1893529891967773, "rewards/accuracies": 0.875, "rewards/chosen": -0.11454895883798599, "rewards/margins": 0.15445882081985474, "rewards/rejected": -0.26900777220726013, "step": 250 }, { "epoch": 2.417777777777778, "grad_norm": 44.0, "learning_rate": 5.343475104027743e-06, "log_odds_chosen": 1.4869956970214844, "log_odds_ratio": -0.33424651622772217, "logps/chosen": -0.5934678912162781, "logps/rejected": -1.4604480266571045, "loss": 18.3318, "nll_loss": 2.072566270828247, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.11869357526302338, "rewards/margins": 0.1733960211277008, "rewards/rejected": -0.2920895516872406, "step": 255 }, { "epoch": 2.4651851851851854, "grad_norm": 82.0, "learning_rate": 4.516778136213037e-06, "log_odds_chosen": 1.3816086053848267, "log_odds_ratio": -0.34434396028518677, "logps/chosen": -0.5892956256866455, "logps/rejected": -1.3579511642456055, "loss": 18.3788, "nll_loss": 2.0510034561157227, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.1178591251373291, "rewards/margins": 0.15373112261295319, "rewards/rejected": -0.2715902626514435, "step": 260 }, { "epoch": 2.5125925925925925, "grad_norm": 54.75, "learning_rate": 3.7531701693965554e-06, "log_odds_chosen": 1.349381685256958, "log_odds_ratio": -0.3549429774284363, "logps/chosen": -0.6175572276115417, "logps/rejected": -1.377739667892456, "loss": 19.2041, "nll_loss": 2.2108561992645264, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.12351144850254059, "rewards/margins": 0.15203648805618286, "rewards/rejected": -0.27554795145988464, "step": 265 }, { "epoch": 2.56, "grad_norm": 46.0, "learning_rate": 3.055003141378948e-06, "log_odds_chosen": 1.4625142812728882, "log_odds_ratio": -0.33779287338256836, "logps/chosen": -0.6002839803695679, "logps/rejected": -1.4510117769241333, "loss": 18.223, "nll_loss": 2.1512744426727295, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.12005682289600372, "rewards/margins": 0.17014555633068085, "rewards/rejected": -0.2902023494243622, "step": 270 }, { "epoch": 2.6074074074074076, "grad_norm": 41.75, "learning_rate": 2.424427429704365e-06, "log_odds_chosen": 1.3319700956344604, "log_odds_ratio": -0.35751980543136597, "logps/chosen": -0.6509729623794556, "logps/rejected": -1.444106101989746, "loss": 19.8273, "nll_loss": 1.982131004333496, "rewards/accuracies": 0.878125011920929, "rewards/chosen": -0.13019458949565887, "rewards/margins": 0.15862663090229034, "rewards/rejected": -0.2888212203979492, "step": 275 }, { "epoch": 2.6548148148148147, "grad_norm": 50.75, "learning_rate": 1.8633852284264508e-06, "log_odds_chosen": 1.2478935718536377, "log_odds_ratio": -0.3773137629032135, "logps/chosen": -0.6254156827926636, "logps/rejected": -1.2992069721221924, "loss": 19.9581, "nll_loss": 2.197223663330078, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.12508316338062286, "rewards/margins": 0.13475826382637024, "rewards/rejected": -0.2598413825035095, "step": 280 }, { "epoch": 2.7022222222222223, "grad_norm": 47.5, "learning_rate": 1.3736045660864034e-06, "log_odds_chosen": 1.3165029287338257, "log_odds_ratio": -0.3543280363082886, "logps/chosen": -0.6258935332298279, "logps/rejected": -1.3820149898529053, "loss": 19.8195, "nll_loss": 2.149914264678955, "rewards/accuracies": 0.8656250238418579, "rewards/chosen": -0.12517870962619781, "rewards/margins": 0.1512243151664734, "rewards/rejected": -0.2764030396938324, "step": 285 }, { "epoch": 2.74962962962963, "grad_norm": 51.25, "learning_rate": 9.565939833279192e-07, "log_odds_chosen": 1.294450044631958, "log_odds_ratio": -0.36391139030456543, "logps/chosen": -0.6437335014343262, "logps/rejected": -1.3571479320526123, "loss": 20.5514, "nll_loss": 2.049088716506958, "rewards/accuracies": 0.846875011920929, "rewards/chosen": -0.12874671816825867, "rewards/margins": 0.14268288016319275, "rewards/rejected": -0.2714295983314514, "step": 290 }, { "epoch": 2.797037037037037, "grad_norm": 43.75, "learning_rate": 6.136378865420872e-07, "log_odds_chosen": 1.2758103609085083, "log_odds_ratio": -0.36855071783065796, "logps/chosen": -0.6186591982841492, "logps/rejected": -1.315861701965332, "loss": 19.73, "nll_loss": 2.048844814300537, "rewards/accuracies": 0.8218749761581421, "rewards/chosen": -0.12373185157775879, "rewards/margins": 0.13944050669670105, "rewards/rejected": -0.26317232847213745, "step": 295 }, { "epoch": 2.8444444444444446, "grad_norm": 51.25, "learning_rate": 3.45792591853214e-07, "log_odds_chosen": 1.4028387069702148, "log_odds_ratio": -0.34696242213249207, "logps/chosen": -0.5851832032203674, "logps/rejected": -1.3483918905258179, "loss": 18.6233, "nll_loss": 2.146777629852295, "rewards/accuracies": 0.8656250238418579, "rewards/chosen": -0.11703664064407349, "rewards/margins": 0.15264175832271576, "rewards/rejected": -0.26967838406562805, "step": 300 }, { "epoch": 2.891851851851852, "grad_norm": 108.5, "learning_rate": 1.538830716302092e-07, "log_odds_chosen": 1.4782116413116455, "log_odds_ratio": -0.3342163860797882, "logps/chosen": -0.5890188813209534, "logps/rejected": -1.4168431758880615, "loss": 17.9475, "nll_loss": 2.091174840927124, "rewards/accuracies": 0.875, "rewards/chosen": -0.11780376732349396, "rewards/margins": 0.1655648797750473, "rewards/rejected": -0.28336864709854126, "step": 305 }, { "epoch": 2.9392592592592592, "grad_norm": 40.5, "learning_rate": 3.8500413544415025e-08, "log_odds_chosen": 1.4966692924499512, "log_odds_ratio": -0.3115060031414032, "logps/chosen": -0.5653601288795471, "logps/rejected": -1.383745551109314, "loss": 17.9926, "nll_loss": 2.0637173652648926, "rewards/accuracies": 0.909375011920929, "rewards/chosen": -0.11307201534509659, "rewards/margins": 0.1636771261692047, "rewards/rejected": -0.2767491042613983, "step": 310 }, { "epoch": 2.986666666666667, "grad_norm": 88.5, "learning_rate": 0.0, "log_odds_chosen": 1.2434196472167969, "log_odds_ratio": -0.36147943139076233, "logps/chosen": -0.605234682559967, "logps/rejected": -1.2406527996063232, "loss": 19.3401, "nll_loss": 2.0648436546325684, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.12104693800210953, "rewards/margins": 0.12708361446857452, "rewards/rejected": -0.24813053011894226, "step": 315 }, { "epoch": 2.986666666666667, "step": 315, "total_flos": 0.0, "train_loss": 75.11768820626395, "train_runtime": 7196.0938, "train_samples_per_second": 2.814, "train_steps_per_second": 0.044 } ], "logging_steps": 5, "max_steps": 315, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }