{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9993222089532967, "eval_steps": 100, "global_step": 2904, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.7182130584192438e-09, "logits/chosen": -2.447075843811035, "logits/rejected": -2.526996612548828, "logps/chosen": -235.39663696289062, "logps/rejected": -214.08815002441406, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 1.718213058419244e-08, "logits/chosen": -2.487886667251587, "logits/rejected": -2.427130699157715, "logps/chosen": -280.10888671875, "logps/rejected": -230.16168212890625, "loss": 0.691, "rewards/accuracies": 0.4722222089767456, "rewards/chosen": 0.0025838064029812813, "rewards/margins": 0.0049818274565041065, "rewards/rejected": -0.0023980215191841125, "step": 10 }, { "epoch": 0.02, "learning_rate": 3.436426116838488e-08, "logits/chosen": -2.41877818107605, "logits/rejected": -2.356771230697632, "logps/chosen": -255.56265258789062, "logps/rejected": -226.37399291992188, "loss": 0.6932, "rewards/accuracies": 0.503125011920929, "rewards/chosen": 0.001528903958387673, "rewards/margins": 0.0006666237604804337, "rewards/rejected": 0.0008622803725302219, "step": 20 }, { "epoch": 0.03, "learning_rate": 5.154639175257731e-08, "logits/chosen": -2.42828369140625, "logits/rejected": -2.4059910774230957, "logps/chosen": -272.57012939453125, "logps/rejected": -227.35250854492188, "loss": 0.6945, "rewards/accuracies": 0.4921875, "rewards/chosen": -0.001070805243216455, "rewards/margins": -0.0018140410538762808, "rewards/rejected": 0.000743235694244504, "step": 30 }, { "epoch": 0.04, "learning_rate": 6.872852233676976e-08, "logits/chosen": -2.425325870513916, "logits/rejected": -2.374124050140381, "logps/chosen": -249.1795654296875, "logps/rejected": -220.6439971923828, "loss": 0.6935, "rewards/accuracies": 0.520312488079071, "rewards/chosen": 0.0025015759747475386, "rewards/margins": 8.866000280249864e-05, "rewards/rejected": 0.0024129163939505816, "step": 40 }, { "epoch": 0.05, "learning_rate": 8.59106529209622e-08, "logits/chosen": -2.4614310264587402, "logits/rejected": -2.416882038116455, "logps/chosen": -259.7109680175781, "logps/rejected": -220.2974090576172, "loss": 0.6917, "rewards/accuracies": 0.5078125, "rewards/chosen": 0.0015415346715599298, "rewards/margins": 0.003707319498062134, "rewards/rejected": -0.0021657845936715603, "step": 50 }, { "epoch": 0.06, "learning_rate": 1.0309278350515462e-07, "logits/chosen": -2.462627649307251, "logits/rejected": -2.4049839973449707, "logps/chosen": -259.0118713378906, "logps/rejected": -228.43917846679688, "loss": 0.6927, "rewards/accuracies": 0.512499988079071, "rewards/chosen": 0.002671582391485572, "rewards/margins": 0.0019277830142527819, "rewards/rejected": 0.0007437997264787555, "step": 60 }, { "epoch": 0.07, "learning_rate": 1.202749140893471e-07, "logits/chosen": -2.4417717456817627, "logits/rejected": -2.4220786094665527, "logps/chosen": -267.39825439453125, "logps/rejected": -210.96157836914062, "loss": 0.692, "rewards/accuracies": 0.503125011920929, "rewards/chosen": 0.001063968287780881, "rewards/margins": 0.002977523719891906, "rewards/rejected": -0.001913555315695703, "step": 70 }, { "epoch": 0.08, "learning_rate": 1.3745704467353952e-07, "logits/chosen": -2.453876495361328, "logits/rejected": -2.3886351585388184, "logps/chosen": -280.5273132324219, "logps/rejected": -225.0200653076172, "loss": 0.6929, "rewards/accuracies": 0.534375011920929, "rewards/chosen": 0.0006787125021219254, "rewards/margins": 0.0013104949612170458, "rewards/rejected": -0.0006317828083410859, "step": 80 }, { "epoch": 0.09, "learning_rate": 1.5463917525773197e-07, "logits/chosen": -2.4767956733703613, "logits/rejected": -2.3978798389434814, "logps/chosen": -271.4781799316406, "logps/rejected": -231.6018524169922, "loss": 0.6932, "rewards/accuracies": 0.503125011920929, "rewards/chosen": -0.00017936174117494375, "rewards/margins": 0.0006834475207142532, "rewards/rejected": -0.0008628091891296208, "step": 90 }, { "epoch": 0.1, "learning_rate": 1.718213058419244e-07, "logits/chosen": -2.4933345317840576, "logits/rejected": -2.397916555404663, "logps/chosen": -265.00872802734375, "logps/rejected": -215.407470703125, "loss": 0.6917, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0026833172887563705, "rewards/margins": 0.003812385257333517, "rewards/rejected": -0.0011290680849924684, "step": 100 }, { "epoch": 0.11, "learning_rate": 1.8900343642611682e-07, "logits/chosen": -2.4396605491638184, "logits/rejected": -2.366703748703003, "logps/chosen": -283.7935791015625, "logps/rejected": -214.5601806640625, "loss": 0.6894, "rewards/accuracies": 0.559374988079071, "rewards/chosen": 0.0025542343501001596, "rewards/margins": 0.00826872419565916, "rewards/rejected": -0.005714490078389645, "step": 110 }, { "epoch": 0.12, "learning_rate": 2.0618556701030925e-07, "logits/chosen": -2.4569156169891357, "logits/rejected": -2.429029703140259, "logps/chosen": -271.7438049316406, "logps/rejected": -229.4224395751953, "loss": 0.6882, "rewards/accuracies": 0.5625, "rewards/chosen": 0.004260816611349583, "rewards/margins": 0.010780954733490944, "rewards/rejected": -0.0065201385878026485, "step": 120 }, { "epoch": 0.13, "learning_rate": 2.2336769759450173e-07, "logits/chosen": -2.452051877975464, "logits/rejected": -2.3855373859405518, "logps/chosen": -267.55743408203125, "logps/rejected": -212.14273071289062, "loss": 0.691, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0025894823484122753, "rewards/margins": 0.005025609862059355, "rewards/rejected": -0.0024361279793083668, "step": 130 }, { "epoch": 0.14, "learning_rate": 2.405498281786942e-07, "logits/chosen": -2.4718971252441406, "logits/rejected": -2.417950391769409, "logps/chosen": -274.26593017578125, "logps/rejected": -212.1128692626953, "loss": 0.6894, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": 0.0036728009581565857, "rewards/margins": 0.008318398147821426, "rewards/rejected": -0.004645597655326128, "step": 140 }, { "epoch": 0.15, "learning_rate": 2.5773195876288655e-07, "logits/chosen": -2.419431209564209, "logits/rejected": -2.3849945068359375, "logps/chosen": -250.10806274414062, "logps/rejected": -210.3776397705078, "loss": 0.6897, "rewards/accuracies": 0.542187511920929, "rewards/chosen": 0.0029598295222967863, "rewards/margins": 0.007620878517627716, "rewards/rejected": -0.004661048296838999, "step": 150 }, { "epoch": 0.17, "learning_rate": 2.7491408934707903e-07, "logits/chosen": -2.4403343200683594, "logits/rejected": -2.378030776977539, "logps/chosen": -267.47332763671875, "logps/rejected": -218.4069061279297, "loss": 0.6891, "rewards/accuracies": 0.546875, "rewards/chosen": 0.0031638103537261486, "rewards/margins": 0.009145173244178295, "rewards/rejected": -0.0059813628904521465, "step": 160 }, { "epoch": 0.18, "learning_rate": 2.9209621993127146e-07, "logits/chosen": -2.4039931297302246, "logits/rejected": -2.3714652061462402, "logps/chosen": -277.943359375, "logps/rejected": -221.7199249267578, "loss": 0.6873, "rewards/accuracies": 0.5484374761581421, "rewards/chosen": 0.008871063590049744, "rewards/margins": 0.012961235828697681, "rewards/rejected": -0.004090171307325363, "step": 170 }, { "epoch": 0.19, "learning_rate": 3.0927835051546394e-07, "logits/chosen": -2.41255784034729, "logits/rejected": -2.382023572921753, "logps/chosen": -271.4554443359375, "logps/rejected": -226.9301300048828, "loss": 0.6874, "rewards/accuracies": 0.559374988079071, "rewards/chosen": 0.005444863811135292, "rewards/margins": 0.01286339946091175, "rewards/rejected": -0.007418536581099033, "step": 180 }, { "epoch": 0.2, "learning_rate": 3.2646048109965636e-07, "logits/chosen": -2.4459285736083984, "logits/rejected": -2.394118547439575, "logps/chosen": -276.55389404296875, "logps/rejected": -222.62655639648438, "loss": 0.6821, "rewards/accuracies": 0.625, "rewards/chosen": 0.01423065084964037, "rewards/margins": 0.02362729236483574, "rewards/rejected": -0.009396640583872795, "step": 190 }, { "epoch": 0.21, "learning_rate": 3.436426116838488e-07, "logits/chosen": -2.4238436222076416, "logits/rejected": -2.393543243408203, "logps/chosen": -249.68899536132812, "logps/rejected": -214.36233520507812, "loss": 0.6785, "rewards/accuracies": 0.625, "rewards/chosen": 0.014417588710784912, "rewards/margins": 0.0309614147990942, "rewards/rejected": -0.01654382422566414, "step": 200 }, { "epoch": 0.22, "learning_rate": 3.608247422680412e-07, "logits/chosen": -2.4502434730529785, "logits/rejected": -2.4075448513031006, "logps/chosen": -270.61175537109375, "logps/rejected": -235.2810516357422, "loss": 0.6815, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": 0.01293298788368702, "rewards/margins": 0.025450533255934715, "rewards/rejected": -0.01251754630357027, "step": 210 }, { "epoch": 0.23, "learning_rate": 3.7800687285223364e-07, "logits/chosen": -2.414132595062256, "logits/rejected": -2.364130735397339, "logps/chosen": -263.3313903808594, "logps/rejected": -219.0230712890625, "loss": 0.6793, "rewards/accuracies": 0.604687511920929, "rewards/chosen": 0.014896327629685402, "rewards/margins": 0.030013080686330795, "rewards/rejected": -0.015116755850613117, "step": 220 }, { "epoch": 0.24, "learning_rate": 3.9518900343642607e-07, "logits/chosen": -2.4107182025909424, "logits/rejected": -2.3757405281066895, "logps/chosen": -273.1572265625, "logps/rejected": -231.4423065185547, "loss": 0.6761, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.015099003911018372, "rewards/margins": 0.037129949778318405, "rewards/rejected": -0.022030945867300034, "step": 230 }, { "epoch": 0.25, "learning_rate": 4.123711340206185e-07, "logits/chosen": -2.4387900829315186, "logits/rejected": -2.396888256072998, "logps/chosen": -271.6656799316406, "logps/rejected": -233.677734375, "loss": 0.6727, "rewards/accuracies": 0.65625, "rewards/chosen": 0.021000446751713753, "rewards/margins": 0.04467698931694031, "rewards/rejected": -0.023676546290516853, "step": 240 }, { "epoch": 0.26, "learning_rate": 4.2955326460481097e-07, "logits/chosen": -2.431246519088745, "logits/rejected": -2.461184501647949, "logps/chosen": -264.908447265625, "logps/rejected": -225.65451049804688, "loss": 0.6699, "rewards/accuracies": 0.6468750238418579, "rewards/chosen": 0.022870570421218872, "rewards/margins": 0.05118563771247864, "rewards/rejected": -0.028315063565969467, "step": 250 }, { "epoch": 0.27, "learning_rate": 4.4673539518900345e-07, "logits/chosen": -2.409027576446533, "logits/rejected": -2.4082815647125244, "logps/chosen": -249.64242553710938, "logps/rejected": -204.5191650390625, "loss": 0.6666, "rewards/accuracies": 0.676562488079071, "rewards/chosen": 0.02077900990843773, "rewards/margins": 0.05811852216720581, "rewards/rejected": -0.03733951598405838, "step": 260 }, { "epoch": 0.28, "learning_rate": 4.639175257731959e-07, "logits/chosen": -2.4640724658966064, "logits/rejected": -2.438767910003662, "logps/chosen": -281.8011169433594, "logps/rejected": -224.46932983398438, "loss": 0.6628, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.025280708447098732, "rewards/margins": 0.06713660806417465, "rewards/rejected": -0.04185590520501137, "step": 270 }, { "epoch": 0.29, "learning_rate": 4.810996563573884e-07, "logits/chosen": -2.441326141357422, "logits/rejected": -2.3782386779785156, "logps/chosen": -266.28228759765625, "logps/rejected": -217.6759796142578, "loss": 0.6539, "rewards/accuracies": 0.6656249761581421, "rewards/chosen": 0.02818796969950199, "rewards/margins": 0.08737680315971375, "rewards/rejected": -0.059188831597566605, "step": 280 }, { "epoch": 0.3, "learning_rate": 4.982817869415807e-07, "logits/chosen": -2.4530272483825684, "logits/rejected": -2.4197421073913574, "logps/chosen": -251.4274444580078, "logps/rejected": -206.58395385742188, "loss": 0.6597, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.019450683146715164, "rewards/margins": 0.07725103944540024, "rewards/rejected": -0.05780036002397537, "step": 290 }, { "epoch": 0.31, "learning_rate": 4.982778415614236e-07, "logits/chosen": -2.4357409477233887, "logits/rejected": -2.401296615600586, "logps/chosen": -258.9688415527344, "logps/rejected": -214.4955291748047, "loss": 0.6529, "rewards/accuracies": 0.6796875, "rewards/chosen": 0.02522132731974125, "rewards/margins": 0.09243801981210709, "rewards/rejected": -0.06721669435501099, "step": 300 }, { "epoch": 0.32, "learning_rate": 4.963643321852277e-07, "logits/chosen": -2.433469772338867, "logits/rejected": -2.397340774536133, "logps/chosen": -264.56365966796875, "logps/rejected": -223.6669464111328, "loss": 0.6494, "rewards/accuracies": 0.690625011920929, "rewards/chosen": 0.027543241158127785, "rewards/margins": 0.10247315466403961, "rewards/rejected": -0.07492991536855698, "step": 310 }, { "epoch": 0.33, "learning_rate": 4.944508228090318e-07, "logits/chosen": -2.4279608726501465, "logits/rejected": -2.383455514907837, "logps/chosen": -268.522216796875, "logps/rejected": -215.8023223876953, "loss": 0.643, "rewards/accuracies": 0.723437488079071, "rewards/chosen": 0.032382432371377945, "rewards/margins": 0.11742101609706879, "rewards/rejected": -0.08503858745098114, "step": 320 }, { "epoch": 0.34, "learning_rate": 4.925373134328357e-07, "logits/chosen": -2.483980178833008, "logits/rejected": -2.4091663360595703, "logps/chosen": -266.2663879394531, "logps/rejected": -230.7337188720703, "loss": 0.6403, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.03822886198759079, "rewards/margins": 0.12609949707984924, "rewards/rejected": -0.08787062764167786, "step": 330 }, { "epoch": 0.35, "learning_rate": 4.906238040566398e-07, "logits/chosen": -2.437373161315918, "logits/rejected": -2.3692476749420166, "logps/chosen": -252.1580047607422, "logps/rejected": -221.46554565429688, "loss": 0.6414, "rewards/accuracies": 0.682812511920929, "rewards/chosen": 0.034671518951654434, "rewards/margins": 0.12736742198467255, "rewards/rejected": -0.09269589185714722, "step": 340 }, { "epoch": 0.36, "learning_rate": 4.887102946804438e-07, "logits/chosen": -2.457171678543091, "logits/rejected": -2.3946237564086914, "logps/chosen": -263.380615234375, "logps/rejected": -218.726318359375, "loss": 0.6377, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.023257676512002945, "rewards/margins": 0.13810031116008759, "rewards/rejected": -0.11484263837337494, "step": 350 }, { "epoch": 0.37, "learning_rate": 4.867967853042479e-07, "logits/chosen": -2.4557504653930664, "logits/rejected": -2.4013724327087402, "logps/chosen": -267.2643737792969, "logps/rejected": -222.85366821289062, "loss": 0.6286, "rewards/accuracies": 0.6484375, "rewards/chosen": 0.03796042129397392, "rewards/margins": 0.160946324467659, "rewards/rejected": -0.12298589944839478, "step": 360 }, { "epoch": 0.38, "learning_rate": 4.84883275928052e-07, "logits/chosen": -2.4332971572875977, "logits/rejected": -2.421247959136963, "logps/chosen": -266.8581237792969, "logps/rejected": -235.67788696289062, "loss": 0.6366, "rewards/accuracies": 0.6734374761581421, "rewards/chosen": 0.01746644265949726, "rewards/margins": 0.14841753244400024, "rewards/rejected": -0.13095109164714813, "step": 370 }, { "epoch": 0.39, "learning_rate": 4.82969766551856e-07, "logits/chosen": -2.417196750640869, "logits/rejected": -2.37961483001709, "logps/chosen": -261.7236633300781, "logps/rejected": -229.08639526367188, "loss": 0.6354, "rewards/accuracies": 0.651562511920929, "rewards/chosen": 0.01633612811565399, "rewards/margins": 0.1533532738685608, "rewards/rejected": -0.1370171457529068, "step": 380 }, { "epoch": 0.4, "learning_rate": 4.810562571756601e-07, "logits/chosen": -2.4581520557403564, "logits/rejected": -2.3880105018615723, "logps/chosen": -263.3890686035156, "logps/rejected": -218.2093505859375, "loss": 0.6132, "rewards/accuracies": 0.721875011920929, "rewards/chosen": 0.046173859387636185, "rewards/margins": 0.2001974880695343, "rewards/rejected": -0.15402361750602722, "step": 390 }, { "epoch": 0.41, "learning_rate": 4.791427477994642e-07, "logits/chosen": -2.4509260654449463, "logits/rejected": -2.4113948345184326, "logps/chosen": -270.0736083984375, "logps/rejected": -221.9901123046875, "loss": 0.6236, "rewards/accuracies": 0.692187488079071, "rewards/chosen": 0.04222818464040756, "rewards/margins": 0.1874697059392929, "rewards/rejected": -0.14524152874946594, "step": 400 }, { "epoch": 0.42, "learning_rate": 4.772292384232682e-07, "logits/chosen": -2.4471030235290527, "logits/rejected": -2.4141643047332764, "logps/chosen": -261.27337646484375, "logps/rejected": -230.60299682617188, "loss": 0.6294, "rewards/accuracies": 0.6546875238418579, "rewards/chosen": 0.021040040999650955, "rewards/margins": 0.18236112594604492, "rewards/rejected": -0.16132107377052307, "step": 410 }, { "epoch": 0.43, "learning_rate": 4.753157290470723e-07, "logits/chosen": -2.460665464401245, "logits/rejected": -2.4335570335388184, "logps/chosen": -276.3302917480469, "logps/rejected": -226.70639038085938, "loss": 0.6125, "rewards/accuracies": 0.6859375238418579, "rewards/chosen": 0.026431281119585037, "rewards/margins": 0.2242995798587799, "rewards/rejected": -0.19786831736564636, "step": 420 }, { "epoch": 0.44, "learning_rate": 4.7340221967087635e-07, "logits/chosen": -2.4207959175109863, "logits/rejected": -2.383884906768799, "logps/chosen": -250.6901397705078, "logps/rejected": -207.92062377929688, "loss": 0.6128, "rewards/accuracies": 0.6953125, "rewards/chosen": 0.01974855735898018, "rewards/margins": 0.21391530334949493, "rewards/rejected": -0.19416674971580505, "step": 430 }, { "epoch": 0.45, "learning_rate": 4.714887102946804e-07, "logits/chosen": -2.457104206085205, "logits/rejected": -2.3864612579345703, "logps/chosen": -274.47650146484375, "logps/rejected": -225.985107421875, "loss": 0.6012, "rewards/accuracies": 0.7203124761581421, "rewards/chosen": 0.04322098195552826, "rewards/margins": 0.24149248003959656, "rewards/rejected": -0.19827154278755188, "step": 440 }, { "epoch": 0.46, "learning_rate": 4.6957520091848447e-07, "logits/chosen": -2.4684674739837646, "logits/rejected": -2.432194948196411, "logps/chosen": -262.0184020996094, "logps/rejected": -226.8969268798828, "loss": 0.6132, "rewards/accuracies": 0.6703125238418579, "rewards/chosen": 0.012870723381638527, "rewards/margins": 0.226064994931221, "rewards/rejected": -0.21319429576396942, "step": 450 }, { "epoch": 0.48, "learning_rate": 4.6766169154228853e-07, "logits/chosen": -2.4258971214294434, "logits/rejected": -2.3564021587371826, "logps/chosen": -256.30084228515625, "logps/rejected": -219.12112426757812, "loss": 0.6188, "rewards/accuracies": 0.6734374761581421, "rewards/chosen": -0.004575688857585192, "rewards/margins": 0.21516656875610352, "rewards/rejected": -0.21974226832389832, "step": 460 }, { "epoch": 0.49, "learning_rate": 4.657481821660926e-07, "logits/chosen": -2.4722161293029785, "logits/rejected": -2.4338574409484863, "logps/chosen": -279.41644287109375, "logps/rejected": -232.3635711669922, "loss": 0.6072, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.012542584910988808, "rewards/margins": 0.2474808394908905, "rewards/rejected": -0.23493823409080505, "step": 470 }, { "epoch": 0.5, "learning_rate": 4.6383467278989666e-07, "logits/chosen": -2.381640911102295, "logits/rejected": -2.4078078269958496, "logps/chosen": -262.5255126953125, "logps/rejected": -226.96853637695312, "loss": 0.5953, "rewards/accuracies": 0.707812488079071, "rewards/chosen": 0.03892933949828148, "rewards/margins": 0.2794772982597351, "rewards/rejected": -0.24054794013500214, "step": 480 }, { "epoch": 0.51, "learning_rate": 4.6192116341370067e-07, "logits/chosen": -2.436652421951294, "logits/rejected": -2.3565993309020996, "logps/chosen": -263.81829833984375, "logps/rejected": -223.61801147460938, "loss": 0.5897, "rewards/accuracies": 0.723437488079071, "rewards/chosen": 0.016417725011706352, "rewards/margins": 0.2946879267692566, "rewards/rejected": -0.2782701849937439, "step": 490 }, { "epoch": 0.52, "learning_rate": 4.6000765403750473e-07, "logits/chosen": -2.3938071727752686, "logits/rejected": -2.37441086769104, "logps/chosen": -273.1866760253906, "logps/rejected": -219.1422576904297, "loss": 0.5987, "rewards/accuracies": 0.7046874761581421, "rewards/chosen": 0.022179026156663895, "rewards/margins": 0.27144354581832886, "rewards/rejected": -0.24926450848579407, "step": 500 }, { "epoch": 0.53, "learning_rate": 4.580941446613088e-07, "logits/chosen": -2.438375473022461, "logits/rejected": -2.4063642024993896, "logps/chosen": -268.3760681152344, "logps/rejected": -213.6297607421875, "loss": 0.5779, "rewards/accuracies": 0.7265625, "rewards/chosen": 0.025703424587845802, "rewards/margins": 0.3301311433315277, "rewards/rejected": -0.30442774295806885, "step": 510 }, { "epoch": 0.54, "learning_rate": 4.5618063528511285e-07, "logits/chosen": -2.4285857677459717, "logits/rejected": -2.3742969036102295, "logps/chosen": -270.7893371582031, "logps/rejected": -229.7726593017578, "loss": 0.5886, "rewards/accuracies": 0.706250011920929, "rewards/chosen": 0.017922762781381607, "rewards/margins": 0.3229644298553467, "rewards/rejected": -0.305041640996933, "step": 520 }, { "epoch": 0.55, "learning_rate": 4.542671259089169e-07, "logits/chosen": -2.4130568504333496, "logits/rejected": -2.3629188537597656, "logps/chosen": -272.3194885253906, "logps/rejected": -231.18997192382812, "loss": 0.5947, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": 0.015474101528525352, "rewards/margins": 0.3130945861339569, "rewards/rejected": -0.2976204752922058, "step": 530 }, { "epoch": 0.56, "learning_rate": 4.52353616532721e-07, "logits/chosen": -2.443058490753174, "logits/rejected": -2.3707220554351807, "logps/chosen": -265.5616760253906, "logps/rejected": -224.46688842773438, "loss": 0.5945, "rewards/accuracies": 0.6796875, "rewards/chosen": -0.0283407811075449, "rewards/margins": 0.30165895819664, "rewards/rejected": -0.32999974489212036, "step": 540 }, { "epoch": 0.57, "learning_rate": 4.5044010715652504e-07, "logits/chosen": -2.459993362426758, "logits/rejected": -2.4190433025360107, "logps/chosen": -262.33197021484375, "logps/rejected": -231.3585662841797, "loss": 0.5988, "rewards/accuracies": 0.6953125, "rewards/chosen": 0.00041465210961177945, "rewards/margins": 0.29224497079849243, "rewards/rejected": -0.29183030128479004, "step": 550 }, { "epoch": 0.58, "learning_rate": 4.485265977803291e-07, "logits/chosen": -2.3841280937194824, "logits/rejected": -2.3862245082855225, "logps/chosen": -268.51177978515625, "logps/rejected": -231.3872833251953, "loss": 0.5903, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.008536433801054955, "rewards/margins": 0.32063713669776917, "rewards/rejected": -0.31210070848464966, "step": 560 }, { "epoch": 0.59, "learning_rate": 4.4661308840413316e-07, "logits/chosen": -2.4563305377960205, "logits/rejected": -2.423436403274536, "logps/chosen": -267.9896545410156, "logps/rejected": -222.6366729736328, "loss": 0.5746, "rewards/accuracies": 0.7171875238418579, "rewards/chosen": 0.011319964192807674, "rewards/margins": 0.36020052433013916, "rewards/rejected": -0.3488805890083313, "step": 570 }, { "epoch": 0.6, "learning_rate": 4.446995790279372e-07, "logits/chosen": -2.4537854194641113, "logits/rejected": -2.3811707496643066, "logps/chosen": -270.5040588378906, "logps/rejected": -231.43017578125, "loss": 0.574, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": 0.019544053822755814, "rewards/margins": 0.3704259693622589, "rewards/rejected": -0.3508819341659546, "step": 580 }, { "epoch": 0.61, "learning_rate": 4.4278606965174123e-07, "logits/chosen": -2.447350025177002, "logits/rejected": -2.388247013092041, "logps/chosen": -271.6213684082031, "logps/rejected": -223.79696655273438, "loss": 0.5809, "rewards/accuracies": 0.7046874761581421, "rewards/chosen": 0.0032621710561215878, "rewards/margins": 0.367009699344635, "rewards/rejected": -0.3637475371360779, "step": 590 }, { "epoch": 0.62, "learning_rate": 4.408725602755453e-07, "logits/chosen": -2.457573175430298, "logits/rejected": -2.429401397705078, "logps/chosen": -266.47222900390625, "logps/rejected": -231.684814453125, "loss": 0.5816, "rewards/accuracies": 0.6953125, "rewards/chosen": -0.007931029424071312, "rewards/margins": 0.3559093475341797, "rewards/rejected": -0.36384040117263794, "step": 600 }, { "epoch": 0.63, "learning_rate": 4.3895905089934936e-07, "logits/chosen": -2.4467155933380127, "logits/rejected": -2.4398138523101807, "logps/chosen": -280.1789855957031, "logps/rejected": -237.6522216796875, "loss": 0.5711, "rewards/accuracies": 0.715624988079071, "rewards/chosen": 0.0163104385137558, "rewards/margins": 0.3898230493068695, "rewards/rejected": -0.3735126256942749, "step": 610 }, { "epoch": 0.64, "learning_rate": 4.370455415231534e-07, "logits/chosen": -2.4456491470336914, "logits/rejected": -2.397401809692383, "logps/chosen": -257.31146240234375, "logps/rejected": -213.8458709716797, "loss": 0.5746, "rewards/accuracies": 0.7046874761581421, "rewards/chosen": 0.0058257849887013435, "rewards/margins": 0.402109295129776, "rewards/rejected": -0.39628344774246216, "step": 620 }, { "epoch": 0.65, "learning_rate": 4.351320321469575e-07, "logits/chosen": -2.455310821533203, "logits/rejected": -2.4044442176818848, "logps/chosen": -269.50531005859375, "logps/rejected": -223.09915161132812, "loss": 0.5824, "rewards/accuracies": 0.703125, "rewards/chosen": -0.03774386644363403, "rewards/margins": 0.37117189168930054, "rewards/rejected": -0.40891575813293457, "step": 630 }, { "epoch": 0.66, "learning_rate": 4.3321852277076154e-07, "logits/chosen": -2.429537773132324, "logits/rejected": -2.4004569053649902, "logps/chosen": -278.3745422363281, "logps/rejected": -238.91348266601562, "loss": 0.5602, "rewards/accuracies": 0.7203124761581421, "rewards/chosen": -0.02088163048028946, "rewards/margins": 0.4280461370944977, "rewards/rejected": -0.4489278197288513, "step": 640 }, { "epoch": 0.67, "learning_rate": 4.313050133945656e-07, "logits/chosen": -2.462010622024536, "logits/rejected": -2.382342576980591, "logps/chosen": -274.82489013671875, "logps/rejected": -228.21871948242188, "loss": 0.5689, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.027593884617090225, "rewards/margins": 0.4239775538444519, "rewards/rejected": -0.451571524143219, "step": 650 }, { "epoch": 0.68, "learning_rate": 4.2939150401836967e-07, "logits/chosen": -2.408452033996582, "logits/rejected": -2.367763042449951, "logps/chosen": -279.24713134765625, "logps/rejected": -234.92257690429688, "loss": 0.5812, "rewards/accuracies": 0.7046874761581421, "rewards/chosen": -0.029710102826356888, "rewards/margins": 0.3825686275959015, "rewards/rejected": -0.4122787117958069, "step": 660 }, { "epoch": 0.69, "learning_rate": 4.2747799464217373e-07, "logits/chosen": -2.400705575942993, "logits/rejected": -2.386396884918213, "logps/chosen": -265.80059814453125, "logps/rejected": -221.22183227539062, "loss": 0.5795, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.01276162825524807, "rewards/margins": 0.3836382031440735, "rewards/rejected": -0.3963998258113861, "step": 670 }, { "epoch": 0.7, "learning_rate": 4.255644852659778e-07, "logits/chosen": -2.434727191925049, "logits/rejected": -2.3701629638671875, "logps/chosen": -265.0262145996094, "logps/rejected": -230.69918823242188, "loss": 0.5857, "rewards/accuracies": 0.690625011920929, "rewards/chosen": -0.052033863961696625, "rewards/margins": 0.3843652307987213, "rewards/rejected": -0.43639907240867615, "step": 680 }, { "epoch": 0.71, "learning_rate": 4.236509758897818e-07, "logits/chosen": -2.4166369438171387, "logits/rejected": -2.3753108978271484, "logps/chosen": -263.7073059082031, "logps/rejected": -228.69186401367188, "loss": 0.5598, "rewards/accuracies": 0.7421875, "rewards/chosen": -0.031519632786512375, "rewards/margins": 0.4231399893760681, "rewards/rejected": -0.454659640789032, "step": 690 }, { "epoch": 0.72, "learning_rate": 4.2173746651358586e-07, "logits/chosen": -2.494065761566162, "logits/rejected": -2.3916873931884766, "logps/chosen": -277.77325439453125, "logps/rejected": -226.1985321044922, "loss": 0.5795, "rewards/accuracies": 0.714062511920929, "rewards/chosen": -0.04932181164622307, "rewards/margins": 0.41205042600631714, "rewards/rejected": -0.4613722264766693, "step": 700 }, { "epoch": 0.73, "learning_rate": 4.198239571373899e-07, "logits/chosen": -2.431324005126953, "logits/rejected": -2.4029393196105957, "logps/chosen": -280.7895812988281, "logps/rejected": -239.1829833984375, "loss": 0.584, "rewards/accuracies": 0.7171875238418579, "rewards/chosen": -0.054767437279224396, "rewards/margins": 0.40931397676467896, "rewards/rejected": -0.46408137679100037, "step": 710 }, { "epoch": 0.74, "learning_rate": 4.17910447761194e-07, "logits/chosen": -2.4472877979278564, "logits/rejected": -2.357172727584839, "logps/chosen": -252.1331329345703, "logps/rejected": -216.9487762451172, "loss": 0.5796, "rewards/accuracies": 0.692187488079071, "rewards/chosen": -0.049075834453105927, "rewards/margins": 0.3829793632030487, "rewards/rejected": -0.43205517530441284, "step": 720 }, { "epoch": 0.75, "learning_rate": 4.1599693838499805e-07, "logits/chosen": -2.3492183685302734, "logits/rejected": -2.34523606300354, "logps/chosen": -248.2432403564453, "logps/rejected": -214.99880981445312, "loss": 0.5851, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07765182107686996, "rewards/margins": 0.3819560408592224, "rewards/rejected": -0.45960789918899536, "step": 730 }, { "epoch": 0.76, "learning_rate": 4.140834290088021e-07, "logits/chosen": -2.3994088172912598, "logits/rejected": -2.3783352375030518, "logps/chosen": -246.6106719970703, "logps/rejected": -206.70840454101562, "loss": 0.5663, "rewards/accuracies": 0.707812488079071, "rewards/chosen": -0.04350767284631729, "rewards/margins": 0.439382404088974, "rewards/rejected": -0.4828900694847107, "step": 740 }, { "epoch": 0.77, "learning_rate": 4.121699196326062e-07, "logits/chosen": -2.3943963050842285, "logits/rejected": -2.3858072757720947, "logps/chosen": -266.1705627441406, "logps/rejected": -225.35940551757812, "loss": 0.5473, "rewards/accuracies": 0.723437488079071, "rewards/chosen": -0.010749602690339088, "rewards/margins": 0.48278599977493286, "rewards/rejected": -0.49353551864624023, "step": 750 }, { "epoch": 0.78, "learning_rate": 4.1025641025641024e-07, "logits/chosen": -2.470837354660034, "logits/rejected": -2.391634464263916, "logps/chosen": -269.9073791503906, "logps/rejected": -229.0169677734375, "loss": 0.5639, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.05836876481771469, "rewards/margins": 0.463728666305542, "rewards/rejected": -0.5220974087715149, "step": 760 }, { "epoch": 0.8, "learning_rate": 4.083429008802143e-07, "logits/chosen": -2.399672746658325, "logits/rejected": -2.386239528656006, "logps/chosen": -265.0301513671875, "logps/rejected": -216.77737426757812, "loss": 0.5693, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.05191594362258911, "rewards/margins": 0.4625419080257416, "rewards/rejected": -0.5144578218460083, "step": 770 }, { "epoch": 0.81, "learning_rate": 4.0642939150401836e-07, "logits/chosen": -2.4455151557922363, "logits/rejected": -2.3676414489746094, "logps/chosen": -263.79571533203125, "logps/rejected": -222.31787109375, "loss": 0.5713, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": -0.07387879490852356, "rewards/margins": 0.4389980435371399, "rewards/rejected": -0.5128768086433411, "step": 780 }, { "epoch": 0.82, "learning_rate": 4.0451588212782237e-07, "logits/chosen": -2.4102261066436768, "logits/rejected": -2.353691577911377, "logps/chosen": -277.6340026855469, "logps/rejected": -241.7203826904297, "loss": 0.5791, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08135993033647537, "rewards/margins": 0.44334641098976135, "rewards/rejected": -0.524706244468689, "step": 790 }, { "epoch": 0.83, "learning_rate": 4.0260237275162643e-07, "logits/chosen": -2.430101156234741, "logits/rejected": -2.385629177093506, "logps/chosen": -267.8277587890625, "logps/rejected": -237.6192169189453, "loss": 0.5621, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.09289533644914627, "rewards/margins": 0.47283419966697693, "rewards/rejected": -0.5657294988632202, "step": 800 }, { "epoch": 0.84, "learning_rate": 4.006888633754305e-07, "logits/chosen": -2.424495220184326, "logits/rejected": -2.3845698833465576, "logps/chosen": -265.8463134765625, "logps/rejected": -226.7728729248047, "loss": 0.5831, "rewards/accuracies": 0.6953125, "rewards/chosen": -0.05096619576215744, "rewards/margins": 0.43721461296081543, "rewards/rejected": -0.4881807863712311, "step": 810 }, { "epoch": 0.85, "learning_rate": 3.9877535399923456e-07, "logits/chosen": -2.4253883361816406, "logits/rejected": -2.3850014209747314, "logps/chosen": -272.5957946777344, "logps/rejected": -233.60498046875, "loss": 0.5633, "rewards/accuracies": 0.7171875238418579, "rewards/chosen": -0.06406211853027344, "rewards/margins": 0.48712214827537537, "rewards/rejected": -0.5511842370033264, "step": 820 }, { "epoch": 0.86, "learning_rate": 3.968618446230386e-07, "logits/chosen": -2.4437859058380127, "logits/rejected": -2.3819785118103027, "logps/chosen": -264.5028381347656, "logps/rejected": -227.0218048095703, "loss": 0.5708, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": -0.06855222582817078, "rewards/margins": 0.4592631459236145, "rewards/rejected": -0.5278154015541077, "step": 830 }, { "epoch": 0.87, "learning_rate": 3.949483352468427e-07, "logits/chosen": -2.427250385284424, "logits/rejected": -2.3620200157165527, "logps/chosen": -254.1734161376953, "logps/rejected": -229.9873046875, "loss": 0.5823, "rewards/accuracies": 0.684374988079071, "rewards/chosen": -0.0878441333770752, "rewards/margins": 0.43157902359962463, "rewards/rejected": -0.5194231271743774, "step": 840 }, { "epoch": 0.88, "learning_rate": 3.9303482587064674e-07, "logits/chosen": -2.4152088165283203, "logits/rejected": -2.399456024169922, "logps/chosen": -268.06689453125, "logps/rejected": -232.3248748779297, "loss": 0.5626, "rewards/accuracies": 0.723437488079071, "rewards/chosen": -0.09431511908769608, "rewards/margins": 0.4891575872898102, "rewards/rejected": -0.5834725499153137, "step": 850 }, { "epoch": 0.89, "learning_rate": 3.911213164944508e-07, "logits/chosen": -2.461259126663208, "logits/rejected": -2.4431066513061523, "logps/chosen": -260.641845703125, "logps/rejected": -233.93637084960938, "loss": 0.5791, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -0.07183202356100082, "rewards/margins": 0.44370943307876587, "rewards/rejected": -0.5155414342880249, "step": 860 }, { "epoch": 0.9, "learning_rate": 3.8920780711825487e-07, "logits/chosen": -2.423548698425293, "logits/rejected": -2.3617987632751465, "logps/chosen": -264.8348083496094, "logps/rejected": -234.61605834960938, "loss": 0.5649, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.08685998618602753, "rewards/margins": 0.47184914350509644, "rewards/rejected": -0.5587090849876404, "step": 870 }, { "epoch": 0.91, "learning_rate": 3.8729429774205893e-07, "logits/chosen": -2.4239916801452637, "logits/rejected": -2.3515267372131348, "logps/chosen": -258.3001403808594, "logps/rejected": -219.2425079345703, "loss": 0.5832, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.12072154134511948, "rewards/margins": 0.434969425201416, "rewards/rejected": -0.5556910037994385, "step": 880 }, { "epoch": 0.92, "learning_rate": 3.8538078836586294e-07, "logits/chosen": -2.4307010173797607, "logits/rejected": -2.3626708984375, "logps/chosen": -283.5355224609375, "logps/rejected": -235.6796417236328, "loss": 0.5489, "rewards/accuracies": 0.745312511920929, "rewards/chosen": -0.06075868755578995, "rewards/margins": 0.5323190689086914, "rewards/rejected": -0.5930777788162231, "step": 890 }, { "epoch": 0.93, "learning_rate": 3.83467278989667e-07, "logits/chosen": -2.445495128631592, "logits/rejected": -2.368015766143799, "logps/chosen": -273.6012878417969, "logps/rejected": -237.4881134033203, "loss": 0.5823, "rewards/accuracies": 0.6796875, "rewards/chosen": -0.09077002108097076, "rewards/margins": 0.45550060272216797, "rewards/rejected": -0.5462706685066223, "step": 900 }, { "epoch": 0.94, "learning_rate": 3.8155376961347106e-07, "logits/chosen": -2.431802988052368, "logits/rejected": -2.3802406787872314, "logps/chosen": -268.13336181640625, "logps/rejected": -231.0006561279297, "loss": 0.5636, "rewards/accuracies": 0.7171875238418579, "rewards/chosen": -0.08810480684041977, "rewards/margins": 0.49154072999954224, "rewards/rejected": -0.579645574092865, "step": 910 }, { "epoch": 0.95, "learning_rate": 3.796402602372751e-07, "logits/chosen": -2.4426496028900146, "logits/rejected": -2.385349750518799, "logps/chosen": -276.5380859375, "logps/rejected": -233.1389617919922, "loss": 0.5482, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.05111056566238403, "rewards/margins": 0.542784571647644, "rewards/rejected": -0.5938950777053833, "step": 920 }, { "epoch": 0.96, "learning_rate": 3.777267508610792e-07, "logits/chosen": -2.383472204208374, "logits/rejected": -2.399059295654297, "logps/chosen": -272.26556396484375, "logps/rejected": -223.87905883789062, "loss": 0.5578, "rewards/accuracies": 0.714062511920929, "rewards/chosen": -0.09871871769428253, "rewards/margins": 0.5212680101394653, "rewards/rejected": -0.6199867129325867, "step": 930 }, { "epoch": 0.97, "learning_rate": 3.7581324148488325e-07, "logits/chosen": -2.4192233085632324, "logits/rejected": -2.3954081535339355, "logps/chosen": -273.0626525878906, "logps/rejected": -239.1441192626953, "loss": 0.5488, "rewards/accuracies": 0.7171875238418579, "rewards/chosen": -0.10023512691259384, "rewards/margins": 0.5407330989837646, "rewards/rejected": -0.6409682035446167, "step": 940 }, { "epoch": 0.98, "learning_rate": 3.738997321086873e-07, "logits/chosen": -2.406310558319092, "logits/rejected": -2.383169651031494, "logps/chosen": -268.0104064941406, "logps/rejected": -233.89749145507812, "loss": 0.57, "rewards/accuracies": 0.6890624761581421, "rewards/chosen": -0.09341312944889069, "rewards/margins": 0.4971606135368347, "rewards/rejected": -0.5905737280845642, "step": 950 }, { "epoch": 0.99, "learning_rate": 3.7198622273249137e-07, "logits/chosen": -2.4181647300720215, "logits/rejected": -2.3776590824127197, "logps/chosen": -284.4306640625, "logps/rejected": -238.550537109375, "loss": 0.5654, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.09716256707906723, "rewards/margins": 0.506054699420929, "rewards/rejected": -0.6032172441482544, "step": 960 }, { "epoch": 1.0, "eval_logits/chosen": -2.079043388366699, "eval_logits/rejected": -2.0256688594818115, "eval_logps/chosen": -265.5612487792969, "eval_logps/rejected": -229.98611450195312, "eval_loss": 0.5545315742492676, "eval_rewards/accuracies": 0.7160000205039978, "eval_rewards/chosen": -0.09934788197278976, "eval_rewards/margins": 0.5339328050613403, "eval_rewards/rejected": -0.6332806348800659, "eval_runtime": 1088.7146, "eval_samples_per_second": 1.837, "eval_steps_per_second": 0.459, "step": 968 }, { "epoch": 1.0, "learning_rate": 3.7007271335629544e-07, "logits/chosen": -2.449903964996338, "logits/rejected": -2.3904850482940674, "logps/chosen": -269.0638732910156, "logps/rejected": -230.3978271484375, "loss": 0.5468, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.05560935288667679, "rewards/margins": 0.5521260499954224, "rewards/rejected": -0.6077354550361633, "step": 970 }, { "epoch": 1.01, "learning_rate": 3.681592039800995e-07, "logits/chosen": -2.4261183738708496, "logits/rejected": -2.3550448417663574, "logps/chosen": -267.64080810546875, "logps/rejected": -227.04812622070312, "loss": 0.545, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.09351503103971481, "rewards/margins": 0.5480056405067444, "rewards/rejected": -0.641520619392395, "step": 980 }, { "epoch": 1.02, "learning_rate": 3.662456946039035e-07, "logits/chosen": -2.4298439025878906, "logits/rejected": -2.3898258209228516, "logps/chosen": -277.6336364746094, "logps/rejected": -225.4404296875, "loss": 0.5438, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -0.08874578773975372, "rewards/margins": 0.5600773096084595, "rewards/rejected": -0.6488231420516968, "step": 990 }, { "epoch": 1.03, "learning_rate": 3.6433218522770757e-07, "logits/chosen": -2.440823793411255, "logits/rejected": -2.3596456050872803, "logps/chosen": -280.1471862792969, "logps/rejected": -238.19503784179688, "loss": 0.5523, "rewards/accuracies": 0.71875, "rewards/chosen": -0.09118635207414627, "rewards/margins": 0.556471049785614, "rewards/rejected": -0.6476574540138245, "step": 1000 }, { "epoch": 1.04, "learning_rate": 3.6241867585151163e-07, "logits/chosen": -2.4096405506134033, "logits/rejected": -2.34090256690979, "logps/chosen": -257.96527099609375, "logps/rejected": -223.86474609375, "loss": 0.5731, "rewards/accuracies": 0.714062511920929, "rewards/chosen": -0.13044361770153046, "rewards/margins": 0.49574214220046997, "rewards/rejected": -0.6261857151985168, "step": 1010 }, { "epoch": 1.05, "learning_rate": 3.605051664753157e-07, "logits/chosen": -2.436314105987549, "logits/rejected": -2.3611092567443848, "logps/chosen": -272.37335205078125, "logps/rejected": -231.5602264404297, "loss": 0.5459, "rewards/accuracies": 0.729687511920929, "rewards/chosen": -0.07726888358592987, "rewards/margins": 0.5683926343917847, "rewards/rejected": -0.645661473274231, "step": 1020 }, { "epoch": 1.06, "learning_rate": 3.5859165709911975e-07, "logits/chosen": -2.4350028038024902, "logits/rejected": -2.3586974143981934, "logps/chosen": -275.226806640625, "logps/rejected": -223.6283721923828, "loss": 0.5453, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.06087593361735344, "rewards/margins": 0.5964738130569458, "rewards/rejected": -0.6573497653007507, "step": 1030 }, { "epoch": 1.07, "learning_rate": 3.566781477229238e-07, "logits/chosen": -2.400864601135254, "logits/rejected": -2.3652467727661133, "logps/chosen": -255.86477661132812, "logps/rejected": -222.24752807617188, "loss": 0.5751, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": -0.1341889202594757, "rewards/margins": 0.5123754739761353, "rewards/rejected": -0.6465644240379333, "step": 1040 }, { "epoch": 1.08, "learning_rate": 3.547646383467279e-07, "logits/chosen": -2.3898608684539795, "logits/rejected": -2.379241466522217, "logps/chosen": -261.6153869628906, "logps/rejected": -223.2140655517578, "loss": 0.5499, "rewards/accuracies": 0.714062511920929, "rewards/chosen": -0.08567940443754196, "rewards/margins": 0.5808093547821045, "rewards/rejected": -0.666488766670227, "step": 1050 }, { "epoch": 1.09, "learning_rate": 3.5285112897053194e-07, "logits/chosen": -2.4234771728515625, "logits/rejected": -2.4022397994995117, "logps/chosen": -280.1412048339844, "logps/rejected": -242.2364959716797, "loss": 0.5684, "rewards/accuracies": 0.7203124761581421, "rewards/chosen": -0.11185096204280853, "rewards/margins": 0.5072935223579407, "rewards/rejected": -0.6191444993019104, "step": 1060 }, { "epoch": 1.11, "learning_rate": 3.50937619594336e-07, "logits/chosen": -2.4101145267486572, "logits/rejected": -2.34965181350708, "logps/chosen": -268.9992370605469, "logps/rejected": -218.4785614013672, "loss": 0.54, "rewards/accuracies": 0.7171875238418579, "rewards/chosen": -0.09902816265821457, "rewards/margins": 0.6141443252563477, "rewards/rejected": -0.7131724953651428, "step": 1070 }, { "epoch": 1.12, "learning_rate": 3.4902411021814007e-07, "logits/chosen": -2.4058384895324707, "logits/rejected": -2.3813834190368652, "logps/chosen": -261.517333984375, "logps/rejected": -220.6446990966797, "loss": 0.5651, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.11156700551509857, "rewards/margins": 0.5487754940986633, "rewards/rejected": -0.6603423357009888, "step": 1080 }, { "epoch": 1.13, "learning_rate": 3.4711060084194413e-07, "logits/chosen": -2.4069576263427734, "logits/rejected": -2.3752903938293457, "logps/chosen": -265.21124267578125, "logps/rejected": -223.32421875, "loss": 0.5423, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.1168740764260292, "rewards/margins": 0.5669043064117432, "rewards/rejected": -0.6837784051895142, "step": 1090 }, { "epoch": 1.14, "learning_rate": 3.4519709146574814e-07, "logits/chosen": -2.4091246128082275, "logits/rejected": -2.359158515930176, "logps/chosen": -261.7292175292969, "logps/rejected": -225.2208709716797, "loss": 0.5656, "rewards/accuracies": 0.7046874761581421, "rewards/chosen": -0.12864422798156738, "rewards/margins": 0.5372087955474854, "rewards/rejected": -0.6658530831336975, "step": 1100 }, { "epoch": 1.15, "learning_rate": 3.432835820895522e-07, "logits/chosen": -2.466919183731079, "logits/rejected": -2.3888449668884277, "logps/chosen": -277.5549011230469, "logps/rejected": -240.7705841064453, "loss": 0.5392, "rewards/accuracies": 0.734375, "rewards/chosen": -0.05221106857061386, "rewards/margins": 0.6037675738334656, "rewards/rejected": -0.6559786796569824, "step": 1110 }, { "epoch": 1.16, "learning_rate": 3.4137007271335626e-07, "logits/chosen": -2.4204328060150146, "logits/rejected": -2.3684065341949463, "logps/chosen": -269.5735778808594, "logps/rejected": -219.9510498046875, "loss": 0.5382, "rewards/accuracies": 0.71875, "rewards/chosen": -0.09449413418769836, "rewards/margins": 0.6241403818130493, "rewards/rejected": -0.7186344861984253, "step": 1120 }, { "epoch": 1.17, "learning_rate": 3.394565633371603e-07, "logits/chosen": -2.431792736053467, "logits/rejected": -2.3539392948150635, "logps/chosen": -279.9765930175781, "logps/rejected": -239.1553955078125, "loss": 0.5447, "rewards/accuracies": 0.7265625, "rewards/chosen": -0.07704336196184158, "rewards/margins": 0.6232292652130127, "rewards/rejected": -0.7002726197242737, "step": 1130 }, { "epoch": 1.18, "learning_rate": 3.375430539609644e-07, "logits/chosen": -2.404470920562744, "logits/rejected": -2.3776755332946777, "logps/chosen": -256.79559326171875, "logps/rejected": -227.1933135986328, "loss": 0.5584, "rewards/accuracies": 0.7171875238418579, "rewards/chosen": -0.11403951793909073, "rewards/margins": 0.545345664024353, "rewards/rejected": -0.6593851447105408, "step": 1140 }, { "epoch": 1.19, "learning_rate": 3.3562954458476845e-07, "logits/chosen": -2.357815980911255, "logits/rejected": -2.331373691558838, "logps/chosen": -253.86587524414062, "logps/rejected": -217.3060760498047, "loss": 0.5472, "rewards/accuracies": 0.7265625, "rewards/chosen": -0.10227999836206436, "rewards/margins": 0.5686275362968445, "rewards/rejected": -0.6709075570106506, "step": 1150 }, { "epoch": 1.2, "learning_rate": 3.337160352085725e-07, "logits/chosen": -2.409895420074463, "logits/rejected": -2.3179931640625, "logps/chosen": -266.31640625, "logps/rejected": -218.92160034179688, "loss": 0.5432, "rewards/accuracies": 0.7359374761581421, "rewards/chosen": -0.10928237438201904, "rewards/margins": 0.6051470041275024, "rewards/rejected": -0.7144292593002319, "step": 1160 }, { "epoch": 1.21, "learning_rate": 3.3180252583237657e-07, "logits/chosen": -2.4335556030273438, "logits/rejected": -2.3714287281036377, "logps/chosen": -277.588623046875, "logps/rejected": -233.22079467773438, "loss": 0.5461, "rewards/accuracies": 0.7265625, "rewards/chosen": -0.14634881913661957, "rewards/margins": 0.5770747661590576, "rewards/rejected": -0.7234236001968384, "step": 1170 }, { "epoch": 1.22, "learning_rate": 3.2988901645618063e-07, "logits/chosen": -2.4803996086120605, "logits/rejected": -2.409782886505127, "logps/chosen": -267.01678466796875, "logps/rejected": -247.248291015625, "loss": 0.5607, "rewards/accuracies": 0.71875, "rewards/chosen": -0.1365794688463211, "rewards/margins": 0.5790367126464844, "rewards/rejected": -0.7156162261962891, "step": 1180 }, { "epoch": 1.23, "learning_rate": 3.279755070799847e-07, "logits/chosen": -2.3643290996551514, "logits/rejected": -2.3453285694122314, "logps/chosen": -260.19134521484375, "logps/rejected": -226.71481323242188, "loss": 0.5624, "rewards/accuracies": 0.698437511920929, "rewards/chosen": -0.1081305742263794, "rewards/margins": 0.571107029914856, "rewards/rejected": -0.6792376637458801, "step": 1190 }, { "epoch": 1.24, "learning_rate": 3.260619977037887e-07, "logits/chosen": -2.3916163444519043, "logits/rejected": -2.358982563018799, "logps/chosen": -260.97052001953125, "logps/rejected": -222.0037078857422, "loss": 0.5276, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10513798892498016, "rewards/margins": 0.6672986149787903, "rewards/rejected": -0.7724366188049316, "step": 1200 }, { "epoch": 1.25, "learning_rate": 3.2414848832759277e-07, "logits/chosen": -2.3794853687286377, "logits/rejected": -2.3386852741241455, "logps/chosen": -257.59130859375, "logps/rejected": -222.3406219482422, "loss": 0.5545, "rewards/accuracies": 0.7046874761581421, "rewards/chosen": -0.13189749419689178, "rewards/margins": 0.5676501393318176, "rewards/rejected": -0.6995476484298706, "step": 1210 }, { "epoch": 1.26, "learning_rate": 3.2223497895139683e-07, "logits/chosen": -2.384241819381714, "logits/rejected": -2.3438777923583984, "logps/chosen": -272.30767822265625, "logps/rejected": -231.92471313476562, "loss": 0.5362, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.1082894578576088, "rewards/margins": 0.6236446499824524, "rewards/rejected": -0.731934130191803, "step": 1220 }, { "epoch": 1.27, "learning_rate": 3.203214695752009e-07, "logits/chosen": -2.4498887062072754, "logits/rejected": -2.382390260696411, "logps/chosen": -270.11798095703125, "logps/rejected": -228.3955841064453, "loss": 0.5393, "rewards/accuracies": 0.7484375238418579, "rewards/chosen": -0.13679789006710052, "rewards/margins": 0.6304437518119812, "rewards/rejected": -0.7672415971755981, "step": 1230 }, { "epoch": 1.28, "learning_rate": 3.1840796019900495e-07, "logits/chosen": -2.3304078578948975, "logits/rejected": -2.328829288482666, "logps/chosen": -244.64791870117188, "logps/rejected": -224.4540252685547, "loss": 0.5672, "rewards/accuracies": 0.723437488079071, "rewards/chosen": -0.1754181832075119, "rewards/margins": 0.5484617948532104, "rewards/rejected": -0.7238799333572388, "step": 1240 }, { "epoch": 1.29, "learning_rate": 3.16494450822809e-07, "logits/chosen": -2.4004368782043457, "logits/rejected": -2.394761562347412, "logps/chosen": -260.854248046875, "logps/rejected": -217.267333984375, "loss": 0.5264, "rewards/accuracies": 0.7328125238418579, "rewards/chosen": -0.09097670018672943, "rewards/margins": 0.6562029123306274, "rewards/rejected": -0.7471795678138733, "step": 1250 }, { "epoch": 1.3, "learning_rate": 3.145809414466131e-07, "logits/chosen": -2.420809268951416, "logits/rejected": -2.393630266189575, "logps/chosen": -268.07220458984375, "logps/rejected": -243.11996459960938, "loss": 0.5434, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.12649384140968323, "rewards/margins": 0.5802772641181946, "rewards/rejected": -0.706771194934845, "step": 1260 }, { "epoch": 1.31, "learning_rate": 3.1266743207041714e-07, "logits/chosen": -2.4304604530334473, "logits/rejected": -2.3626341819763184, "logps/chosen": -274.98638916015625, "logps/rejected": -246.15872192382812, "loss": 0.5499, "rewards/accuracies": 0.739062488079071, "rewards/chosen": -0.07647743821144104, "rewards/margins": 0.5869981646537781, "rewards/rejected": -0.6634755730628967, "step": 1270 }, { "epoch": 1.32, "learning_rate": 3.107539226942212e-07, "logits/chosen": -2.392775774002075, "logits/rejected": -2.3546760082244873, "logps/chosen": -280.3741149902344, "logps/rejected": -248.8837127685547, "loss": 0.5092, "rewards/accuracies": 0.7593749761581421, "rewards/chosen": -0.0769033133983612, "rewards/margins": 0.693490743637085, "rewards/rejected": -0.7703940868377686, "step": 1280 }, { "epoch": 1.33, "learning_rate": 3.0884041331802526e-07, "logits/chosen": -2.3941831588745117, "logits/rejected": -2.349119186401367, "logps/chosen": -275.7878112792969, "logps/rejected": -238.73684692382812, "loss": 0.5188, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.11901184171438217, "rewards/margins": 0.6922268867492676, "rewards/rejected": -0.8112386465072632, "step": 1290 }, { "epoch": 1.34, "learning_rate": 3.0692690394182927e-07, "logits/chosen": -2.3558011054992676, "logits/rejected": -2.365652322769165, "logps/chosen": -254.7240447998047, "logps/rejected": -225.5684051513672, "loss": 0.5479, "rewards/accuracies": 0.7015625238418579, "rewards/chosen": -0.11073043197393417, "rewards/margins": 0.6586212515830994, "rewards/rejected": -0.7693516612052917, "step": 1300 }, { "epoch": 1.35, "learning_rate": 3.0501339456563334e-07, "logits/chosen": -2.400010108947754, "logits/rejected": -2.3430371284484863, "logps/chosen": -282.26483154296875, "logps/rejected": -241.29495239257812, "loss": 0.553, "rewards/accuracies": 0.7109375, "rewards/chosen": -0.1404508799314499, "rewards/margins": 0.5930649042129517, "rewards/rejected": -0.7335157990455627, "step": 1310 }, { "epoch": 1.36, "learning_rate": 3.030998851894374e-07, "logits/chosen": -2.429117202758789, "logits/rejected": -2.380638360977173, "logps/chosen": -277.0819396972656, "logps/rejected": -231.4957733154297, "loss": 0.5398, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.10495986044406891, "rewards/margins": 0.6354261040687561, "rewards/rejected": -0.740385890007019, "step": 1320 }, { "epoch": 1.37, "learning_rate": 3.0118637581324146e-07, "logits/chosen": -2.3560943603515625, "logits/rejected": -2.299285650253296, "logps/chosen": -283.2480773925781, "logps/rejected": -236.7747802734375, "loss": 0.5339, "rewards/accuracies": 0.745312511920929, "rewards/chosen": -0.15119323134422302, "rewards/margins": 0.6373868584632874, "rewards/rejected": -0.788580060005188, "step": 1330 }, { "epoch": 1.38, "learning_rate": 2.992728664370455e-07, "logits/chosen": -2.4186995029449463, "logits/rejected": -2.403923511505127, "logps/chosen": -265.8408203125, "logps/rejected": -224.98312377929688, "loss": 0.5481, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.1380973756313324, "rewards/margins": 0.6355406045913696, "rewards/rejected": -0.7736380100250244, "step": 1340 }, { "epoch": 1.39, "learning_rate": 2.973593570608496e-07, "logits/chosen": -2.3618245124816895, "logits/rejected": -2.340223550796509, "logps/chosen": -267.28338623046875, "logps/rejected": -229.21469116210938, "loss": 0.5572, "rewards/accuracies": 0.7109375, "rewards/chosen": -0.126164972782135, "rewards/margins": 0.6094905138015747, "rewards/rejected": -0.7356554865837097, "step": 1350 }, { "epoch": 1.4, "learning_rate": 2.9544584768465365e-07, "logits/chosen": -2.4243083000183105, "logits/rejected": -2.398084878921509, "logps/chosen": -256.0418395996094, "logps/rejected": -234.45346069335938, "loss": 0.5438, "rewards/accuracies": 0.7109375, "rewards/chosen": -0.10923846065998077, "rewards/margins": 0.6375387907028198, "rewards/rejected": -0.7467772364616394, "step": 1360 }, { "epoch": 1.41, "learning_rate": 2.935323383084577e-07, "logits/chosen": -2.448951005935669, "logits/rejected": -2.3950791358947754, "logps/chosen": -276.74725341796875, "logps/rejected": -226.75149536132812, "loss": 0.5584, "rewards/accuracies": 0.707812488079071, "rewards/chosen": -0.17273911833763123, "rewards/margins": 0.5847989916801453, "rewards/rejected": -0.7575381994247437, "step": 1370 }, { "epoch": 1.43, "learning_rate": 2.9161882893226177e-07, "logits/chosen": -2.427473545074463, "logits/rejected": -2.3801541328430176, "logps/chosen": -267.68463134765625, "logps/rejected": -228.783447265625, "loss": 0.5465, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.1168685331940651, "rewards/margins": 0.6201252937316895, "rewards/rejected": -0.7369938492774963, "step": 1380 }, { "epoch": 1.44, "learning_rate": 2.8970531955606583e-07, "logits/chosen": -2.4376165866851807, "logits/rejected": -2.39223051071167, "logps/chosen": -269.39691162109375, "logps/rejected": -226.72702026367188, "loss": 0.5143, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.10355620086193085, "rewards/margins": 0.7153445482254028, "rewards/rejected": -0.8189007639884949, "step": 1390 }, { "epoch": 1.45, "learning_rate": 2.8779181017986984e-07, "logits/chosen": -2.3847761154174805, "logits/rejected": -2.358484983444214, "logps/chosen": -265.6216125488281, "logps/rejected": -226.9099578857422, "loss": 0.5786, "rewards/accuracies": 0.6859375238418579, "rewards/chosen": -0.1487416923046112, "rewards/margins": 0.582770049571991, "rewards/rejected": -0.7315118312835693, "step": 1400 }, { "epoch": 1.46, "learning_rate": 2.858783008036739e-07, "logits/chosen": -2.441329002380371, "logits/rejected": -2.405198335647583, "logps/chosen": -254.2424774169922, "logps/rejected": -216.15487670898438, "loss": 0.5328, "rewards/accuracies": 0.739062488079071, "rewards/chosen": -0.10592655837535858, "rewards/margins": 0.6519125699996948, "rewards/rejected": -0.7578392624855042, "step": 1410 }, { "epoch": 1.47, "learning_rate": 2.8396479142747797e-07, "logits/chosen": -2.353024482727051, "logits/rejected": -2.3756861686706543, "logps/chosen": -271.5851135253906, "logps/rejected": -226.3388214111328, "loss": 0.5633, "rewards/accuracies": 0.71875, "rewards/chosen": -0.11950352042913437, "rewards/margins": 0.5914410948753357, "rewards/rejected": -0.710944652557373, "step": 1420 }, { "epoch": 1.48, "learning_rate": 2.8205128205128203e-07, "logits/chosen": -2.4454894065856934, "logits/rejected": -2.4075827598571777, "logps/chosen": -278.7067565917969, "logps/rejected": -233.1806182861328, "loss": 0.5468, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.1553649604320526, "rewards/margins": 0.6472987532615662, "rewards/rejected": -0.8026638031005859, "step": 1430 }, { "epoch": 1.49, "learning_rate": 2.801377726750861e-07, "logits/chosen": -2.41646671295166, "logits/rejected": -2.380006790161133, "logps/chosen": -259.50830078125, "logps/rejected": -234.66000366210938, "loss": 0.5442, "rewards/accuracies": 0.7265625, "rewards/chosen": -0.11458615958690643, "rewards/margins": 0.6320740580558777, "rewards/rejected": -0.7466602325439453, "step": 1440 }, { "epoch": 1.5, "learning_rate": 2.7822426329889015e-07, "logits/chosen": -2.3705825805664062, "logits/rejected": -2.3389930725097656, "logps/chosen": -264.9784240722656, "logps/rejected": -235.85598754882812, "loss": 0.5239, "rewards/accuracies": 0.746874988079071, "rewards/chosen": -0.11364629119634628, "rewards/margins": 0.6920466423034668, "rewards/rejected": -0.8056928515434265, "step": 1450 }, { "epoch": 1.51, "learning_rate": 2.763107539226942e-07, "logits/chosen": -2.3917994499206543, "logits/rejected": -2.361053705215454, "logps/chosen": -273.13323974609375, "logps/rejected": -237.2021026611328, "loss": 0.5537, "rewards/accuracies": 0.753125011920929, "rewards/chosen": -0.1213529109954834, "rewards/margins": 0.6237030625343323, "rewards/rejected": -0.7450559735298157, "step": 1460 }, { "epoch": 1.52, "learning_rate": 2.743972445464983e-07, "logits/chosen": -2.3829543590545654, "logits/rejected": -2.3598859310150146, "logps/chosen": -256.2921447753906, "logps/rejected": -218.822998046875, "loss": 0.5447, "rewards/accuracies": 0.723437488079071, "rewards/chosen": -0.09320759773254395, "rewards/margins": 0.628312349319458, "rewards/rejected": -0.721519947052002, "step": 1470 }, { "epoch": 1.53, "learning_rate": 2.7248373517030234e-07, "logits/chosen": -2.3986709117889404, "logits/rejected": -2.3675730228424072, "logps/chosen": -261.54193115234375, "logps/rejected": -222.29812622070312, "loss": 0.5696, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.17618440091609955, "rewards/margins": 0.6171834468841553, "rewards/rejected": -0.793367862701416, "step": 1480 }, { "epoch": 1.54, "learning_rate": 2.705702257941064e-07, "logits/chosen": -2.403079032897949, "logits/rejected": -2.344881057739258, "logps/chosen": -271.6820068359375, "logps/rejected": -223.48422241210938, "loss": 0.5317, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.13215352594852448, "rewards/margins": 0.6836920976638794, "rewards/rejected": -0.8158456683158875, "step": 1490 }, { "epoch": 1.55, "learning_rate": 2.686567164179104e-07, "logits/chosen": -2.4172019958496094, "logits/rejected": -2.350555181503296, "logps/chosen": -261.85516357421875, "logps/rejected": -225.0038604736328, "loss": 0.5255, "rewards/accuracies": 0.7578125, "rewards/chosen": -0.08901546150445938, "rewards/margins": 0.6642698049545288, "rewards/rejected": -0.75328528881073, "step": 1500 }, { "epoch": 1.56, "learning_rate": 2.6674320704171447e-07, "logits/chosen": -2.4008395671844482, "logits/rejected": -2.351348876953125, "logps/chosen": -267.31951904296875, "logps/rejected": -227.8149871826172, "loss": 0.5381, "rewards/accuracies": 0.7328125238418579, "rewards/chosen": -0.13619837164878845, "rewards/margins": 0.6516298055648804, "rewards/rejected": -0.7878280878067017, "step": 1510 }, { "epoch": 1.57, "learning_rate": 2.6482969766551853e-07, "logits/chosen": -2.348276138305664, "logits/rejected": -2.329331159591675, "logps/chosen": -263.51080322265625, "logps/rejected": -227.07809448242188, "loss": 0.5122, "rewards/accuracies": 0.7421875, "rewards/chosen": -0.10638642311096191, "rewards/margins": 0.7151543498039246, "rewards/rejected": -0.8215408325195312, "step": 1520 }, { "epoch": 1.58, "learning_rate": 2.629161882893226e-07, "logits/chosen": -2.4366257190704346, "logits/rejected": -2.379861354827881, "logps/chosen": -279.295166015625, "logps/rejected": -239.08352661132812, "loss": 0.5645, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.15942886471748352, "rewards/margins": 0.6090508103370667, "rewards/rejected": -0.7684796452522278, "step": 1530 }, { "epoch": 1.59, "learning_rate": 2.6100267891312666e-07, "logits/chosen": -2.364650249481201, "logits/rejected": -2.3203299045562744, "logps/chosen": -261.32708740234375, "logps/rejected": -223.8793487548828, "loss": 0.5457, "rewards/accuracies": 0.71875, "rewards/chosen": -0.13680413365364075, "rewards/margins": 0.6616954207420349, "rewards/rejected": -0.798499584197998, "step": 1540 }, { "epoch": 1.6, "learning_rate": 2.590891695369307e-07, "logits/chosen": -2.414820432662964, "logits/rejected": -2.3798413276672363, "logps/chosen": -281.36065673828125, "logps/rejected": -240.29238891601562, "loss": 0.5368, "rewards/accuracies": 0.729687511920929, "rewards/chosen": -0.08666771650314331, "rewards/margins": 0.6870118975639343, "rewards/rejected": -0.7736796140670776, "step": 1550 }, { "epoch": 1.61, "learning_rate": 2.571756601607348e-07, "logits/chosen": -2.407886266708374, "logits/rejected": -2.3671507835388184, "logps/chosen": -281.9557189941406, "logps/rejected": -232.68588256835938, "loss": 0.5316, "rewards/accuracies": 0.723437488079071, "rewards/chosen": -0.127783864736557, "rewards/margins": 0.7107834219932556, "rewards/rejected": -0.8385672569274902, "step": 1560 }, { "epoch": 1.62, "learning_rate": 2.5526215078453884e-07, "logits/chosen": -2.37595796585083, "logits/rejected": -2.3402533531188965, "logps/chosen": -275.40106201171875, "logps/rejected": -241.32421875, "loss": 0.5485, "rewards/accuracies": 0.7328125238418579, "rewards/chosen": -0.13988900184631348, "rewards/margins": 0.6486446261405945, "rewards/rejected": -0.788533627986908, "step": 1570 }, { "epoch": 1.63, "learning_rate": 2.533486414083429e-07, "logits/chosen": -2.383958578109741, "logits/rejected": -2.3686203956604004, "logps/chosen": -273.81549072265625, "logps/rejected": -226.5820770263672, "loss": 0.5231, "rewards/accuracies": 0.745312511920929, "rewards/chosen": -0.16685205698013306, "rewards/margins": 0.6781736016273499, "rewards/rejected": -0.8450256586074829, "step": 1580 }, { "epoch": 1.64, "learning_rate": 2.5143513203214697e-07, "logits/chosen": -2.4135966300964355, "logits/rejected": -2.339186429977417, "logps/chosen": -261.7090759277344, "logps/rejected": -232.47018432617188, "loss": 0.5607, "rewards/accuracies": 0.703125, "rewards/chosen": -0.18446998298168182, "rewards/margins": 0.6322883367538452, "rewards/rejected": -0.8167583346366882, "step": 1590 }, { "epoch": 1.65, "learning_rate": 2.49521622655951e-07, "logits/chosen": -2.4073891639709473, "logits/rejected": -2.3973593711853027, "logps/chosen": -263.8055725097656, "logps/rejected": -233.00167846679688, "loss": 0.5567, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.12571503221988678, "rewards/margins": 0.6185272932052612, "rewards/rejected": -0.7442423701286316, "step": 1600 }, { "epoch": 1.66, "learning_rate": 2.4760811327975504e-07, "logits/chosen": -2.3958241939544678, "logits/rejected": -2.356121063232422, "logps/chosen": -276.14556884765625, "logps/rejected": -239.56112670898438, "loss": 0.5407, "rewards/accuracies": 0.729687511920929, "rewards/chosen": -0.11243724822998047, "rewards/margins": 0.6681596040725708, "rewards/rejected": -0.7805968523025513, "step": 1610 }, { "epoch": 1.67, "learning_rate": 2.456946039035591e-07, "logits/chosen": -2.387842893600464, "logits/rejected": -2.3812038898468018, "logps/chosen": -256.08905029296875, "logps/rejected": -216.9521484375, "loss": 0.5135, "rewards/accuracies": 0.7484375238418579, "rewards/chosen": -0.11949145793914795, "rewards/margins": 0.7178138494491577, "rewards/rejected": -0.8373053669929504, "step": 1620 }, { "epoch": 1.68, "learning_rate": 2.4378109452736316e-07, "logits/chosen": -2.3569884300231934, "logits/rejected": -2.3548595905303955, "logps/chosen": -268.0955810546875, "logps/rejected": -234.91317749023438, "loss": 0.5402, "rewards/accuracies": 0.723437488079071, "rewards/chosen": -0.1568536013364792, "rewards/margins": 0.656032145023346, "rewards/rejected": -0.8128856420516968, "step": 1630 }, { "epoch": 1.69, "learning_rate": 2.418675851511672e-07, "logits/chosen": -2.394106388092041, "logits/rejected": -2.338951587677002, "logps/chosen": -258.17071533203125, "logps/rejected": -227.0476531982422, "loss": 0.5266, "rewards/accuracies": 0.7359374761581421, "rewards/chosen": -0.18490514159202576, "rewards/margins": 0.674010157585144, "rewards/rejected": -0.8589152097702026, "step": 1640 }, { "epoch": 1.7, "learning_rate": 2.399540757749713e-07, "logits/chosen": -2.4031760692596436, "logits/rejected": -2.371420383453369, "logps/chosen": -252.31594848632812, "logps/rejected": -225.9946746826172, "loss": 0.5485, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.11656501144170761, "rewards/margins": 0.6458471417427063, "rewards/rejected": -0.7624121308326721, "step": 1650 }, { "epoch": 1.71, "learning_rate": 2.3804056639877535e-07, "logits/chosen": -2.3602213859558105, "logits/rejected": -2.3286445140838623, "logps/chosen": -269.58294677734375, "logps/rejected": -239.6148681640625, "loss": 0.5509, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.10988609492778778, "rewards/margins": 0.6352638006210327, "rewards/rejected": -0.7451499700546265, "step": 1660 }, { "epoch": 1.72, "learning_rate": 2.361270570225794e-07, "logits/chosen": -2.3772807121276855, "logits/rejected": -2.3392374515533447, "logps/chosen": -273.1993713378906, "logps/rejected": -226.94155883789062, "loss": 0.5544, "rewards/accuracies": 0.7109375, "rewards/chosen": -0.15547646582126617, "rewards/margins": 0.6606963872909546, "rewards/rejected": -0.8161728978157043, "step": 1670 }, { "epoch": 1.74, "learning_rate": 2.3421354764638345e-07, "logits/chosen": -2.339207172393799, "logits/rejected": -2.2881035804748535, "logps/chosen": -262.3006896972656, "logps/rejected": -222.82565307617188, "loss": 0.5392, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.1391618549823761, "rewards/margins": 0.6765463948249817, "rewards/rejected": -0.8157082796096802, "step": 1680 }, { "epoch": 1.75, "learning_rate": 2.323000382701875e-07, "logits/chosen": -2.36671781539917, "logits/rejected": -2.30442476272583, "logps/chosen": -262.6791076660156, "logps/rejected": -223.51834106445312, "loss": 0.5352, "rewards/accuracies": 0.7171875238418579, "rewards/chosen": -0.1453666090965271, "rewards/margins": 0.6910194754600525, "rewards/rejected": -0.8363860845565796, "step": 1690 }, { "epoch": 1.76, "learning_rate": 2.3038652889399157e-07, "logits/chosen": -2.377718448638916, "logits/rejected": -2.3732407093048096, "logps/chosen": -264.3118591308594, "logps/rejected": -235.5894775390625, "loss": 0.5258, "rewards/accuracies": 0.7421875, "rewards/chosen": -0.12053600698709488, "rewards/margins": 0.6903436183929443, "rewards/rejected": -0.8108797073364258, "step": 1700 }, { "epoch": 1.77, "learning_rate": 2.2847301951779563e-07, "logits/chosen": -2.4043807983398438, "logits/rejected": -2.3661141395568848, "logps/chosen": -273.5931701660156, "logps/rejected": -232.48287963867188, "loss": 0.535, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.140711709856987, "rewards/margins": 0.7188085317611694, "rewards/rejected": -0.8595201373100281, "step": 1710 }, { "epoch": 1.78, "learning_rate": 2.265595101415997e-07, "logits/chosen": -2.391242265701294, "logits/rejected": -2.33647084236145, "logps/chosen": -269.0169372558594, "logps/rejected": -230.73583984375, "loss": 0.5443, "rewards/accuracies": 0.714062511920929, "rewards/chosen": -0.14084765315055847, "rewards/margins": 0.6776271462440491, "rewards/rejected": -0.8184748888015747, "step": 1720 }, { "epoch": 1.79, "learning_rate": 2.2464600076540373e-07, "logits/chosen": -2.405012845993042, "logits/rejected": -2.3291537761688232, "logps/chosen": -279.62371826171875, "logps/rejected": -237.05722045898438, "loss": 0.554, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -0.10567928850650787, "rewards/margins": 0.6449233293533325, "rewards/rejected": -0.750602662563324, "step": 1730 }, { "epoch": 1.8, "learning_rate": 2.227324913892078e-07, "logits/chosen": -2.3809354305267334, "logits/rejected": -2.341770648956299, "logps/chosen": -272.91741943359375, "logps/rejected": -228.8494873046875, "loss": 0.547, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.1115594357252121, "rewards/margins": 0.6728307604789734, "rewards/rejected": -0.7843901515007019, "step": 1740 }, { "epoch": 1.81, "learning_rate": 2.2081898201301186e-07, "logits/chosen": -2.344855546951294, "logits/rejected": -2.347912549972534, "logps/chosen": -284.1566162109375, "logps/rejected": -242.9143524169922, "loss": 0.559, "rewards/accuracies": 0.7328125238418579, "rewards/chosen": -0.12913444638252258, "rewards/margins": 0.6051042675971985, "rewards/rejected": -0.7342387437820435, "step": 1750 }, { "epoch": 1.82, "learning_rate": 2.1890547263681592e-07, "logits/chosen": -2.3368725776672363, "logits/rejected": -2.3267197608947754, "logps/chosen": -279.5101623535156, "logps/rejected": -228.0315399169922, "loss": 0.5412, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.123216912150383, "rewards/margins": 0.6950885653495789, "rewards/rejected": -0.818305492401123, "step": 1760 }, { "epoch": 1.83, "learning_rate": 2.1699196326061998e-07, "logits/chosen": -2.334354877471924, "logits/rejected": -2.3555445671081543, "logps/chosen": -272.8717956542969, "logps/rejected": -230.3594207763672, "loss": 0.5313, "rewards/accuracies": 0.7359374761581421, "rewards/chosen": -0.12218773365020752, "rewards/margins": 0.7177630662918091, "rewards/rejected": -0.8399508595466614, "step": 1770 }, { "epoch": 1.84, "learning_rate": 2.1507845388442402e-07, "logits/chosen": -2.4097609519958496, "logits/rejected": -2.3510959148406982, "logps/chosen": -276.52862548828125, "logps/rejected": -218.99441528320312, "loss": 0.518, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.06975328177213669, "rewards/margins": 0.6969150304794312, "rewards/rejected": -0.7666682600975037, "step": 1780 }, { "epoch": 1.85, "learning_rate": 2.1316494450822808e-07, "logits/chosen": -2.3507437705993652, "logits/rejected": -2.3511948585510254, "logps/chosen": -271.771240234375, "logps/rejected": -231.90634155273438, "loss": 0.5264, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.12042073160409927, "rewards/margins": 0.7034494876861572, "rewards/rejected": -0.8238701820373535, "step": 1790 }, { "epoch": 1.86, "learning_rate": 2.1125143513203214e-07, "logits/chosen": -2.4258570671081543, "logits/rejected": -2.4029757976531982, "logps/chosen": -264.3330078125, "logps/rejected": -227.8314208984375, "loss": 0.5476, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.10862596333026886, "rewards/margins": 0.6536161303520203, "rewards/rejected": -0.7622420787811279, "step": 1800 }, { "epoch": 1.87, "learning_rate": 2.093379257558362e-07, "logits/chosen": -2.4013142585754395, "logits/rejected": -2.34897518157959, "logps/chosen": -271.6585693359375, "logps/rejected": -241.2907257080078, "loss": 0.5611, "rewards/accuracies": 0.698437511920929, "rewards/chosen": -0.11902491748332977, "rewards/margins": 0.6104603409767151, "rewards/rejected": -0.7294851541519165, "step": 1810 }, { "epoch": 1.88, "learning_rate": 2.0742441637964026e-07, "logits/chosen": -2.386214256286621, "logits/rejected": -2.33040452003479, "logps/chosen": -245.88143920898438, "logps/rejected": -216.9251251220703, "loss": 0.5401, "rewards/accuracies": 0.71875, "rewards/chosen": -0.14682015776634216, "rewards/margins": 0.6507130861282349, "rewards/rejected": -0.7975332736968994, "step": 1820 }, { "epoch": 1.89, "learning_rate": 2.055109070034443e-07, "logits/chosen": -2.4217278957366943, "logits/rejected": -2.3312575817108154, "logps/chosen": -260.94085693359375, "logps/rejected": -222.13607788085938, "loss": 0.5446, "rewards/accuracies": 0.745312511920929, "rewards/chosen": -0.11108909547328949, "rewards/margins": 0.6555716395378113, "rewards/rejected": -0.7666608095169067, "step": 1830 }, { "epoch": 1.9, "learning_rate": 2.0359739762724836e-07, "logits/chosen": -2.406583309173584, "logits/rejected": -2.3424503803253174, "logps/chosen": -289.1400146484375, "logps/rejected": -241.73513793945312, "loss": 0.4941, "rewards/accuracies": 0.7671874761581421, "rewards/chosen": -0.09672559797763824, "rewards/margins": 0.7783478498458862, "rewards/rejected": -0.8750733137130737, "step": 1840 }, { "epoch": 1.91, "learning_rate": 2.0168388825105242e-07, "logits/chosen": -2.405856132507324, "logits/rejected": -2.350475311279297, "logps/chosen": -261.40814208984375, "logps/rejected": -229.8692169189453, "loss": 0.5521, "rewards/accuracies": 0.703125, "rewards/chosen": -0.15710802376270294, "rewards/margins": 0.6592746376991272, "rewards/rejected": -0.8163825869560242, "step": 1850 }, { "epoch": 1.92, "learning_rate": 1.997703788748565e-07, "logits/chosen": -2.3927724361419678, "logits/rejected": -2.332962989807129, "logps/chosen": -261.10699462890625, "logps/rejected": -237.5717010498047, "loss": 0.5378, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.1337103396654129, "rewards/margins": 0.6999514102935791, "rewards/rejected": -0.8336617350578308, "step": 1860 }, { "epoch": 1.93, "learning_rate": 1.9785686949866055e-07, "logits/chosen": -2.4174818992614746, "logits/rejected": -2.361926317214966, "logps/chosen": -275.9540710449219, "logps/rejected": -229.05615234375, "loss": 0.5198, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.1561015248298645, "rewards/margins": 0.7133805155754089, "rewards/rejected": -0.8694820404052734, "step": 1870 }, { "epoch": 1.94, "learning_rate": 1.9594336012246458e-07, "logits/chosen": -2.373378276824951, "logits/rejected": -2.3580093383789062, "logps/chosen": -263.26739501953125, "logps/rejected": -229.62686157226562, "loss": 0.5396, "rewards/accuracies": 0.745312511920929, "rewards/chosen": -0.15732435882091522, "rewards/margins": 0.6500160098075867, "rewards/rejected": -0.8073404431343079, "step": 1880 }, { "epoch": 1.95, "learning_rate": 1.9402985074626865e-07, "logits/chosen": -2.3866982460021973, "logits/rejected": -2.3246593475341797, "logps/chosen": -261.379150390625, "logps/rejected": -227.70016479492188, "loss": 0.5263, "rewards/accuracies": 0.734375, "rewards/chosen": -0.117561474442482, "rewards/margins": 0.7048689723014832, "rewards/rejected": -0.8224304914474487, "step": 1890 }, { "epoch": 1.96, "learning_rate": 1.921163413700727e-07, "logits/chosen": -2.4001078605651855, "logits/rejected": -2.3805463314056396, "logps/chosen": -281.5653381347656, "logps/rejected": -243.9423828125, "loss": 0.5362, "rewards/accuracies": 0.7421875, "rewards/chosen": -0.10007290542125702, "rewards/margins": 0.7120274305343628, "rewards/rejected": -0.8121002316474915, "step": 1900 }, { "epoch": 1.97, "learning_rate": 1.9020283199387677e-07, "logits/chosen": -2.353015899658203, "logits/rejected": -2.3475286960601807, "logps/chosen": -268.6228942871094, "logps/rejected": -238.2252197265625, "loss": 0.5172, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.11127477884292603, "rewards/margins": 0.7399830222129822, "rewards/rejected": -0.8512576818466187, "step": 1910 }, { "epoch": 1.98, "learning_rate": 1.8828932261768083e-07, "logits/chosen": -2.3818321228027344, "logits/rejected": -2.3469431400299072, "logps/chosen": -265.0734558105469, "logps/rejected": -227.3889617919922, "loss": 0.5273, "rewards/accuracies": 0.7421875, "rewards/chosen": -0.12172959744930267, "rewards/margins": 0.716955304145813, "rewards/rejected": -0.8386849164962769, "step": 1920 }, { "epoch": 1.99, "learning_rate": 1.8637581324148487e-07, "logits/chosen": -2.401777744293213, "logits/rejected": -2.3709285259246826, "logps/chosen": -268.879638671875, "logps/rejected": -239.0655517578125, "loss": 0.5587, "rewards/accuracies": 0.692187488079071, "rewards/chosen": -0.1592234969139099, "rewards/margins": 0.6302945017814636, "rewards/rejected": -0.7895179986953735, "step": 1930 }, { "epoch": 2.0, "eval_logits/chosen": -2.042747735977173, "eval_logits/rejected": -1.9887516498565674, "eval_logps/chosen": -265.97637939453125, "eval_logps/rejected": -232.0824737548828, "eval_loss": 0.5326370596885681, "eval_rewards/accuracies": 0.7294999957084656, "eval_rewards/chosen": -0.14086098968982697, "eval_rewards/margins": 0.7020561099052429, "eval_rewards/rejected": -0.8429170250892639, "eval_runtime": 1167.6557, "eval_samples_per_second": 1.713, "eval_steps_per_second": 0.428, "step": 1936 }, { "epoch": 2.0, "learning_rate": 1.8446230386528893e-07, "logits/chosen": -2.411083698272705, "logits/rejected": -2.3344886302948, "logps/chosen": -260.4184265136719, "logps/rejected": -229.34713745117188, "loss": 0.5528, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.18735817074775696, "rewards/margins": 0.6163454055786133, "rewards/rejected": -0.8037036061286926, "step": 1940 }, { "epoch": 2.01, "learning_rate": 1.82548794489093e-07, "logits/chosen": -2.391366481781006, "logits/rejected": -2.3589439392089844, "logps/chosen": -272.74444580078125, "logps/rejected": -228.60281372070312, "loss": 0.5464, "rewards/accuracies": 0.7406250238418579, "rewards/chosen": -0.15350715816020966, "rewards/margins": 0.6585405468940735, "rewards/rejected": -0.8120476603507996, "step": 1950 }, { "epoch": 2.02, "learning_rate": 1.8063528511289706e-07, "logits/chosen": -2.397200107574463, "logits/rejected": -2.3327198028564453, "logps/chosen": -258.4478759765625, "logps/rejected": -224.2578582763672, "loss": 0.5434, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.20096781849861145, "rewards/margins": 0.657593846321106, "rewards/rejected": -0.8585616946220398, "step": 1960 }, { "epoch": 2.03, "learning_rate": 1.7872177573670112e-07, "logits/chosen": -2.400557279586792, "logits/rejected": -2.35810923576355, "logps/chosen": -275.8924865722656, "logps/rejected": -239.3294219970703, "loss": 0.5145, "rewards/accuracies": 0.7578125, "rewards/chosen": -0.11401806026697159, "rewards/margins": 0.747878909111023, "rewards/rejected": -0.8618971109390259, "step": 1970 }, { "epoch": 2.04, "learning_rate": 1.7680826636050515e-07, "logits/chosen": -2.369227647781372, "logits/rejected": -2.3667426109313965, "logps/chosen": -257.553955078125, "logps/rejected": -230.169677734375, "loss": 0.5367, "rewards/accuracies": 0.7328125238418579, "rewards/chosen": -0.15207555890083313, "rewards/margins": 0.6957732439041138, "rewards/rejected": -0.8478488922119141, "step": 1980 }, { "epoch": 2.06, "learning_rate": 1.7489475698430921e-07, "logits/chosen": -2.372884511947632, "logits/rejected": -2.3310484886169434, "logps/chosen": -282.4217224121094, "logps/rejected": -233.046875, "loss": 0.5341, "rewards/accuracies": 0.7203124761581421, "rewards/chosen": -0.12857168912887573, "rewards/margins": 0.752483606338501, "rewards/rejected": -0.8810552358627319, "step": 1990 }, { "epoch": 2.07, "learning_rate": 1.7298124760811328e-07, "logits/chosen": -2.370082378387451, "logits/rejected": -2.3288538455963135, "logps/chosen": -253.7472686767578, "logps/rejected": -234.3776092529297, "loss": 0.5121, "rewards/accuracies": 0.760937511920929, "rewards/chosen": -0.08212677389383316, "rewards/margins": 0.7719866633415222, "rewards/rejected": -0.8541134595870972, "step": 2000 }, { "epoch": 2.08, "learning_rate": 1.7106773823191734e-07, "logits/chosen": -2.378678798675537, "logits/rejected": -2.3208470344543457, "logps/chosen": -267.8801574707031, "logps/rejected": -231.2415771484375, "loss": 0.573, "rewards/accuracies": 0.71875, "rewards/chosen": -0.16784097254276276, "rewards/margins": 0.585302472114563, "rewards/rejected": -0.7531434893608093, "step": 2010 }, { "epoch": 2.09, "learning_rate": 1.691542288557214e-07, "logits/chosen": -2.3666415214538574, "logits/rejected": -2.316760540008545, "logps/chosen": -260.22723388671875, "logps/rejected": -225.22976684570312, "loss": 0.5166, "rewards/accuracies": 0.745312511920929, "rewards/chosen": -0.12360270321369171, "rewards/margins": 0.7310017347335815, "rewards/rejected": -0.8546044230461121, "step": 2020 }, { "epoch": 2.1, "learning_rate": 1.6724071947952544e-07, "logits/chosen": -2.3446133136749268, "logits/rejected": -2.2931389808654785, "logps/chosen": -266.8133239746094, "logps/rejected": -237.9119415283203, "loss": 0.5278, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.11298196017742157, "rewards/margins": 0.7080703973770142, "rewards/rejected": -0.8210523724555969, "step": 2030 }, { "epoch": 2.11, "learning_rate": 1.653272101033295e-07, "logits/chosen": -2.408759832382202, "logits/rejected": -2.363680362701416, "logps/chosen": -262.7159118652344, "logps/rejected": -228.66390991210938, "loss": 0.5595, "rewards/accuracies": 0.6953125, "rewards/chosen": -0.14217299222946167, "rewards/margins": 0.6589146852493286, "rewards/rejected": -0.8010876774787903, "step": 2040 }, { "epoch": 2.12, "learning_rate": 1.6341370072713356e-07, "logits/chosen": -2.408491849899292, "logits/rejected": -2.3210110664367676, "logps/chosen": -246.5405731201172, "logps/rejected": -223.0271453857422, "loss": 0.5248, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.12794676423072815, "rewards/margins": 0.7168751955032349, "rewards/rejected": -0.8448219299316406, "step": 2050 }, { "epoch": 2.13, "learning_rate": 1.6150019135093762e-07, "logits/chosen": -2.3539464473724365, "logits/rejected": -2.3444278240203857, "logps/chosen": -278.1259765625, "logps/rejected": -243.2495880126953, "loss": 0.5504, "rewards/accuracies": 0.7171875238418579, "rewards/chosen": -0.12467856705188751, "rewards/margins": 0.669019341468811, "rewards/rejected": -0.7936979532241821, "step": 2060 }, { "epoch": 2.14, "learning_rate": 1.5958668197474169e-07, "logits/chosen": -2.3732540607452393, "logits/rejected": -2.3456478118896484, "logps/chosen": -286.5888366699219, "logps/rejected": -231.79165649414062, "loss": 0.5346, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.09783172607421875, "rewards/margins": 0.705902099609375, "rewards/rejected": -0.8037338256835938, "step": 2070 }, { "epoch": 2.15, "learning_rate": 1.5767317259854572e-07, "logits/chosen": -2.4102118015289307, "logits/rejected": -2.3785674571990967, "logps/chosen": -252.31881713867188, "logps/rejected": -230.2682342529297, "loss": 0.554, "rewards/accuracies": 0.7265625, "rewards/chosen": -0.15130464732646942, "rewards/margins": 0.668793797492981, "rewards/rejected": -0.8200985193252563, "step": 2080 }, { "epoch": 2.16, "learning_rate": 1.5575966322234978e-07, "logits/chosen": -2.3902785778045654, "logits/rejected": -2.361997127532959, "logps/chosen": -277.2994384765625, "logps/rejected": -236.0117645263672, "loss": 0.5371, "rewards/accuracies": 0.723437488079071, "rewards/chosen": -0.10212769359350204, "rewards/margins": 0.7493409514427185, "rewards/rejected": -0.8514686822891235, "step": 2090 }, { "epoch": 2.17, "learning_rate": 1.5384615384615385e-07, "logits/chosen": -2.371175765991211, "logits/rejected": -2.340148687362671, "logps/chosen": -283.32452392578125, "logps/rejected": -234.09335327148438, "loss": 0.5364, "rewards/accuracies": 0.734375, "rewards/chosen": -0.14591889083385468, "rewards/margins": 0.7080722451210022, "rewards/rejected": -0.8539912104606628, "step": 2100 }, { "epoch": 2.18, "learning_rate": 1.519326444699579e-07, "logits/chosen": -2.3838436603546143, "logits/rejected": -2.368041515350342, "logps/chosen": -277.5657653808594, "logps/rejected": -240.91006469726562, "loss": 0.5296, "rewards/accuracies": 0.7328125238418579, "rewards/chosen": -0.11766266822814941, "rewards/margins": 0.6887077689170837, "rewards/rejected": -0.8063703775405884, "step": 2110 }, { "epoch": 2.19, "learning_rate": 1.5001913509376197e-07, "logits/chosen": -2.4153029918670654, "logits/rejected": -2.3472938537597656, "logps/chosen": -260.43841552734375, "logps/rejected": -222.5975799560547, "loss": 0.5246, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.14667481184005737, "rewards/margins": 0.7188171148300171, "rewards/rejected": -0.8654918670654297, "step": 2120 }, { "epoch": 2.2, "learning_rate": 1.4810562571756603e-07, "logits/chosen": -2.3996524810791016, "logits/rejected": -2.36572003364563, "logps/chosen": -283.7561340332031, "logps/rejected": -229.9889373779297, "loss": 0.5135, "rewards/accuracies": 0.7484375238418579, "rewards/chosen": -0.12039141356945038, "rewards/margins": 0.7521576285362244, "rewards/rejected": -0.8725490570068359, "step": 2130 }, { "epoch": 2.21, "learning_rate": 1.4619211634137007e-07, "logits/chosen": -2.396955966949463, "logits/rejected": -2.325171709060669, "logps/chosen": -268.83880615234375, "logps/rejected": -232.42672729492188, "loss": 0.4993, "rewards/accuracies": 0.7421875, "rewards/chosen": -0.10067176818847656, "rewards/margins": 0.7941768765449524, "rewards/rejected": -0.894848644733429, "step": 2140 }, { "epoch": 2.22, "learning_rate": 1.4427860696517413e-07, "logits/chosen": -2.360407590866089, "logits/rejected": -2.3728294372558594, "logps/chosen": -262.50665283203125, "logps/rejected": -244.90261840820312, "loss": 0.5567, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": -0.15976184606552124, "rewards/margins": 0.6574433445930481, "rewards/rejected": -0.8172051310539246, "step": 2150 }, { "epoch": 2.23, "learning_rate": 1.423650975889782e-07, "logits/chosen": -2.3352928161621094, "logits/rejected": -2.318737745285034, "logps/chosen": -271.6351623535156, "logps/rejected": -232.324951171875, "loss": 0.5505, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.1446889042854309, "rewards/margins": 0.6412814259529114, "rewards/rejected": -0.7859703302383423, "step": 2160 }, { "epoch": 2.24, "learning_rate": 1.4045158821278225e-07, "logits/chosen": -2.396017074584961, "logits/rejected": -2.3626606464385986, "logps/chosen": -280.76287841796875, "logps/rejected": -232.1551513671875, "loss": 0.5443, "rewards/accuracies": 0.707812488079071, "rewards/chosen": -0.1374007910490036, "rewards/margins": 0.6923818588256836, "rewards/rejected": -0.8297826647758484, "step": 2170 }, { "epoch": 2.25, "learning_rate": 1.3853807883658632e-07, "logits/chosen": -2.3831605911254883, "logits/rejected": -2.367901086807251, "logps/chosen": -274.9002990722656, "logps/rejected": -244.9043426513672, "loss": 0.4998, "rewards/accuracies": 0.7718750238418579, "rewards/chosen": -0.06897449493408203, "rewards/margins": 0.7879935503005981, "rewards/rejected": -0.8569680452346802, "step": 2180 }, { "epoch": 2.26, "learning_rate": 1.3662456946039035e-07, "logits/chosen": -2.3475286960601807, "logits/rejected": -2.3350141048431396, "logps/chosen": -258.20428466796875, "logps/rejected": -228.5579071044922, "loss": 0.55, "rewards/accuracies": 0.723437488079071, "rewards/chosen": -0.14758525788784027, "rewards/margins": 0.6847792267799377, "rewards/rejected": -0.8323644399642944, "step": 2190 }, { "epoch": 2.27, "learning_rate": 1.3471106008419441e-07, "logits/chosen": -2.416398763656616, "logits/rejected": -2.3340847492218018, "logps/chosen": -263.5863952636719, "logps/rejected": -223.93826293945312, "loss": 0.54, "rewards/accuracies": 0.7109375, "rewards/chosen": -0.1121305376291275, "rewards/margins": 0.6978212594985962, "rewards/rejected": -0.8099517822265625, "step": 2200 }, { "epoch": 2.28, "learning_rate": 1.3279755070799848e-07, "logits/chosen": -2.3754360675811768, "logits/rejected": -2.3295979499816895, "logps/chosen": -261.3006896972656, "logps/rejected": -228.99472045898438, "loss": 0.5379, "rewards/accuracies": 0.7578125, "rewards/chosen": -0.12667515873908997, "rewards/margins": 0.695867657661438, "rewards/rejected": -0.8225427865982056, "step": 2210 }, { "epoch": 2.29, "learning_rate": 1.3088404133180254e-07, "logits/chosen": -2.373387575149536, "logits/rejected": -2.3520331382751465, "logps/chosen": -273.1501770019531, "logps/rejected": -241.6131591796875, "loss": 0.5105, "rewards/accuracies": 0.770312488079071, "rewards/chosen": -0.08919095993041992, "rewards/margins": 0.7751600742340088, "rewards/rejected": -0.8643510937690735, "step": 2220 }, { "epoch": 2.3, "learning_rate": 1.289705319556066e-07, "logits/chosen": -2.4029157161712646, "logits/rejected": -2.3423054218292236, "logps/chosen": -269.0888671875, "logps/rejected": -238.63894653320312, "loss": 0.5064, "rewards/accuracies": 0.734375, "rewards/chosen": -0.11739423125982285, "rewards/margins": 0.8117318153381348, "rewards/rejected": -0.9291261434555054, "step": 2230 }, { "epoch": 2.31, "learning_rate": 1.2705702257941064e-07, "logits/chosen": -2.3870110511779785, "logits/rejected": -2.3228111267089844, "logps/chosen": -247.74105834960938, "logps/rejected": -220.97531127929688, "loss": 0.5221, "rewards/accuracies": 0.7406250238418579, "rewards/chosen": -0.1162148267030716, "rewards/margins": 0.7085736393928528, "rewards/rejected": -0.8247883915901184, "step": 2240 }, { "epoch": 2.32, "learning_rate": 1.251435132032147e-07, "logits/chosen": -2.408937931060791, "logits/rejected": -2.3306527137756348, "logps/chosen": -299.36395263671875, "logps/rejected": -241.8893585205078, "loss": 0.543, "rewards/accuracies": 0.71875, "rewards/chosen": -0.11270508915185928, "rewards/margins": 0.6921781897544861, "rewards/rejected": -0.8048831820487976, "step": 2250 }, { "epoch": 2.33, "learning_rate": 1.2323000382701873e-07, "logits/chosen": -2.385676383972168, "logits/rejected": -2.3467276096343994, "logps/chosen": -281.59686279296875, "logps/rejected": -241.01278686523438, "loss": 0.5353, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.1093897670507431, "rewards/margins": 0.7332038879394531, "rewards/rejected": -0.842593789100647, "step": 2260 }, { "epoch": 2.34, "learning_rate": 1.213164944508228e-07, "logits/chosen": -2.373408794403076, "logits/rejected": -2.319791316986084, "logps/chosen": -261.96563720703125, "logps/rejected": -234.7034149169922, "loss": 0.5529, "rewards/accuracies": 0.7109375, "rewards/chosen": -0.11686080694198608, "rewards/margins": 0.6834132671356201, "rewards/rejected": -0.8002740740776062, "step": 2270 }, { "epoch": 2.35, "learning_rate": 1.1940298507462686e-07, "logits/chosen": -2.4085376262664795, "logits/rejected": -2.3651652336120605, "logps/chosen": -270.48358154296875, "logps/rejected": -242.1610565185547, "loss": 0.5305, "rewards/accuracies": 0.770312488079071, "rewards/chosen": -0.1516662836074829, "rewards/margins": 0.7256360650062561, "rewards/rejected": -0.877302348613739, "step": 2280 }, { "epoch": 2.37, "learning_rate": 1.1748947569843092e-07, "logits/chosen": -2.3058078289031982, "logits/rejected": -2.2898011207580566, "logps/chosen": -253.01205444335938, "logps/rejected": -220.3304901123047, "loss": 0.5347, "rewards/accuracies": 0.745312511920929, "rewards/chosen": -0.13418254256248474, "rewards/margins": 0.7016364336013794, "rewards/rejected": -0.835818886756897, "step": 2290 }, { "epoch": 2.38, "learning_rate": 1.1557596632223497e-07, "logits/chosen": -2.386352062225342, "logits/rejected": -2.3113696575164795, "logps/chosen": -269.7099304199219, "logps/rejected": -221.75302124023438, "loss": 0.5503, "rewards/accuracies": 0.7203124761581421, "rewards/chosen": -0.17483191192150116, "rewards/margins": 0.7187283635139465, "rewards/rejected": -0.8935602903366089, "step": 2300 }, { "epoch": 2.39, "learning_rate": 1.1366245694603903e-07, "logits/chosen": -2.3636221885681152, "logits/rejected": -2.342933177947998, "logps/chosen": -258.5984802246094, "logps/rejected": -218.21240234375, "loss": 0.5484, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.1345369666814804, "rewards/margins": 0.6814537048339844, "rewards/rejected": -0.8159906268119812, "step": 2310 }, { "epoch": 2.4, "learning_rate": 1.1174894756984308e-07, "logits/chosen": -2.370859384536743, "logits/rejected": -2.3134427070617676, "logps/chosen": -252.6942596435547, "logps/rejected": -211.6784210205078, "loss": 0.5317, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.1507762372493744, "rewards/margins": 0.69977205991745, "rewards/rejected": -0.8505484461784363, "step": 2320 }, { "epoch": 2.41, "learning_rate": 1.0983543819364714e-07, "logits/chosen": -2.4157214164733887, "logits/rejected": -2.365856885910034, "logps/chosen": -278.7106628417969, "logps/rejected": -237.4716796875, "loss": 0.538, "rewards/accuracies": 0.746874988079071, "rewards/chosen": -0.13206318020820618, "rewards/margins": 0.7201939821243286, "rewards/rejected": -0.8522570729255676, "step": 2330 }, { "epoch": 2.42, "learning_rate": 1.079219288174512e-07, "logits/chosen": -2.3600049018859863, "logits/rejected": -2.306662082672119, "logps/chosen": -261.60443115234375, "logps/rejected": -243.1952362060547, "loss": 0.5475, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.16204313933849335, "rewards/margins": 0.6782156825065613, "rewards/rejected": -0.8402588963508606, "step": 2340 }, { "epoch": 2.43, "learning_rate": 1.0600841944125525e-07, "logits/chosen": -2.4186596870422363, "logits/rejected": -2.345165729522705, "logps/chosen": -258.1711730957031, "logps/rejected": -228.2469024658203, "loss": 0.5108, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.13342900574207306, "rewards/margins": 0.770829439163208, "rewards/rejected": -0.9042585492134094, "step": 2350 }, { "epoch": 2.44, "learning_rate": 1.0409491006505931e-07, "logits/chosen": -2.3762617111206055, "logits/rejected": -2.3277175426483154, "logps/chosen": -266.79815673828125, "logps/rejected": -228.57821655273438, "loss": 0.5197, "rewards/accuracies": 0.739062488079071, "rewards/chosen": -0.16075488924980164, "rewards/margins": 0.7380831837654114, "rewards/rejected": -0.8988380432128906, "step": 2360 }, { "epoch": 2.45, "learning_rate": 1.0218140068886336e-07, "logits/chosen": -2.3823940753936768, "logits/rejected": -2.307152271270752, "logps/chosen": -267.8171691894531, "logps/rejected": -229.06973266601562, "loss": 0.5275, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.13006095588207245, "rewards/margins": 0.7317984700202942, "rewards/rejected": -0.8618593215942383, "step": 2370 }, { "epoch": 2.46, "learning_rate": 1.0026789131266743e-07, "logits/chosen": -2.389812469482422, "logits/rejected": -2.358701229095459, "logps/chosen": -259.939453125, "logps/rejected": -227.9673309326172, "loss": 0.5258, "rewards/accuracies": 0.746874988079071, "rewards/chosen": -0.14463508129119873, "rewards/margins": 0.6937167644500732, "rewards/rejected": -0.8383519053459167, "step": 2380 }, { "epoch": 2.47, "learning_rate": 9.835438193647149e-08, "logits/chosen": -2.3749680519104004, "logits/rejected": -2.325307846069336, "logps/chosen": -261.16265869140625, "logps/rejected": -235.45510864257812, "loss": 0.5166, "rewards/accuracies": 0.75, "rewards/chosen": -0.10085193812847137, "rewards/margins": 0.7839605212211609, "rewards/rejected": -0.8848124742507935, "step": 2390 }, { "epoch": 2.48, "learning_rate": 9.644087256027554e-08, "logits/chosen": -2.399411201477051, "logits/rejected": -2.3411877155303955, "logps/chosen": -270.646728515625, "logps/rejected": -242.7877655029297, "loss": 0.5583, "rewards/accuracies": 0.7203124761581421, "rewards/chosen": -0.1165170818567276, "rewards/margins": 0.7015627026557922, "rewards/rejected": -0.8180797696113586, "step": 2400 }, { "epoch": 2.49, "learning_rate": 9.45273631840796e-08, "logits/chosen": -2.378415584564209, "logits/rejected": -2.3074827194213867, "logps/chosen": -256.1658630371094, "logps/rejected": -215.31173706054688, "loss": 0.5382, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.16168564558029175, "rewards/margins": 0.6914165616035461, "rewards/rejected": -0.8531022071838379, "step": 2410 }, { "epoch": 2.5, "learning_rate": 9.261385380788366e-08, "logits/chosen": -2.4177417755126953, "logits/rejected": -2.3251852989196777, "logps/chosen": -267.59588623046875, "logps/rejected": -226.686279296875, "loss": 0.5423, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.16102801263332367, "rewards/margins": 0.7026554942131042, "rewards/rejected": -0.8636835813522339, "step": 2420 }, { "epoch": 2.51, "learning_rate": 9.070034443168771e-08, "logits/chosen": -2.3578057289123535, "logits/rejected": -2.3223681449890137, "logps/chosen": -263.61029052734375, "logps/rejected": -225.79733276367188, "loss": 0.5397, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.19550970196723938, "rewards/margins": 0.7167800664901733, "rewards/rejected": -0.9122897386550903, "step": 2430 }, { "epoch": 2.52, "learning_rate": 8.878683505549177e-08, "logits/chosen": -2.3730854988098145, "logits/rejected": -2.358013153076172, "logps/chosen": -274.9963073730469, "logps/rejected": -241.24533081054688, "loss": 0.5493, "rewards/accuracies": 0.6875, "rewards/chosen": -0.1562972366809845, "rewards/margins": 0.6839101314544678, "rewards/rejected": -0.8402072787284851, "step": 2440 }, { "epoch": 2.53, "learning_rate": 8.687332567929582e-08, "logits/chosen": -2.353519916534424, "logits/rejected": -2.3354268074035645, "logps/chosen": -267.2091064453125, "logps/rejected": -218.87997436523438, "loss": 0.5189, "rewards/accuracies": 0.7109375, "rewards/chosen": -0.12245980650186539, "rewards/margins": 0.7322528958320618, "rewards/rejected": -0.8547126650810242, "step": 2450 }, { "epoch": 2.54, "learning_rate": 8.495981630309988e-08, "logits/chosen": -2.3733015060424805, "logits/rejected": -2.310149669647217, "logps/chosen": -262.130126953125, "logps/rejected": -235.95108032226562, "loss": 0.5279, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.1203254908323288, "rewards/margins": 0.7111250162124634, "rewards/rejected": -0.8314505815505981, "step": 2460 }, { "epoch": 2.55, "learning_rate": 8.304630692690395e-08, "logits/chosen": -2.3978214263916016, "logits/rejected": -2.358588218688965, "logps/chosen": -283.1036682128906, "logps/rejected": -232.8982391357422, "loss": 0.5155, "rewards/accuracies": 0.746874988079071, "rewards/chosen": -0.09982401877641678, "rewards/margins": 0.7901795506477356, "rewards/rejected": -0.8900035619735718, "step": 2470 }, { "epoch": 2.56, "learning_rate": 8.1132797550708e-08, "logits/chosen": -2.3774914741516113, "logits/rejected": -2.3199660778045654, "logps/chosen": -270.4402160644531, "logps/rejected": -229.8076934814453, "loss": 0.5217, "rewards/accuracies": 0.7578125, "rewards/chosen": -0.1376962959766388, "rewards/margins": 0.7224219441413879, "rewards/rejected": -0.8601182699203491, "step": 2480 }, { "epoch": 2.57, "learning_rate": 7.921928817451206e-08, "logits/chosen": -2.3702144622802734, "logits/rejected": -2.3372480869293213, "logps/chosen": -272.0224609375, "logps/rejected": -220.9506072998047, "loss": 0.4807, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.062098145484924316, "rewards/margins": 0.82757568359375, "rewards/rejected": -0.8896737098693848, "step": 2490 }, { "epoch": 2.58, "learning_rate": 7.73057787983161e-08, "logits/chosen": -2.3614370822906494, "logits/rejected": -2.3565754890441895, "logps/chosen": -270.15325927734375, "logps/rejected": -231.0701141357422, "loss": 0.5093, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.09890525788068771, "rewards/margins": 0.8027753829956055, "rewards/rejected": -0.901680588722229, "step": 2500 }, { "epoch": 2.59, "learning_rate": 7.539226942212017e-08, "logits/chosen": -2.379781484603882, "logits/rejected": -2.3308448791503906, "logps/chosen": -271.2726135253906, "logps/rejected": -247.5769805908203, "loss": 0.5444, "rewards/accuracies": 0.7109375, "rewards/chosen": -0.13810952007770538, "rewards/margins": 0.7006896138191223, "rewards/rejected": -0.8387991189956665, "step": 2510 }, { "epoch": 2.6, "learning_rate": 7.347876004592423e-08, "logits/chosen": -2.4164352416992188, "logits/rejected": -2.363954782485962, "logps/chosen": -271.45989990234375, "logps/rejected": -234.0578155517578, "loss": 0.526, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.17485982179641724, "rewards/margins": 0.7395257949829102, "rewards/rejected": -0.9143856167793274, "step": 2520 }, { "epoch": 2.61, "learning_rate": 7.156525066972828e-08, "logits/chosen": -2.429539918899536, "logits/rejected": -2.355285882949829, "logps/chosen": -284.6403503417969, "logps/rejected": -238.6908721923828, "loss": 0.5199, "rewards/accuracies": 0.75, "rewards/chosen": -0.11224106699228287, "rewards/margins": 0.7983044385910034, "rewards/rejected": -0.9105455279350281, "step": 2530 }, { "epoch": 2.62, "learning_rate": 6.965174129353234e-08, "logits/chosen": -2.368342876434326, "logits/rejected": -2.3081254959106445, "logps/chosen": -260.9881286621094, "logps/rejected": -239.78683471679688, "loss": 0.5422, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15342268347740173, "rewards/margins": 0.718788743019104, "rewards/rejected": -0.8722113370895386, "step": 2540 }, { "epoch": 2.63, "learning_rate": 6.773823191733639e-08, "logits/chosen": -2.3808670043945312, "logits/rejected": -2.32783842086792, "logps/chosen": -272.7002868652344, "logps/rejected": -221.1647186279297, "loss": 0.5194, "rewards/accuracies": 0.745312511920929, "rewards/chosen": -0.1014653667807579, "rewards/margins": 0.7284099459648132, "rewards/rejected": -0.8298752903938293, "step": 2550 }, { "epoch": 2.64, "learning_rate": 6.582472254114045e-08, "logits/chosen": -2.453993320465088, "logits/rejected": -2.3969106674194336, "logps/chosen": -275.2949523925781, "logps/rejected": -238.8881378173828, "loss": 0.5372, "rewards/accuracies": 0.71875, "rewards/chosen": -0.09556527435779572, "rewards/margins": 0.740602433681488, "rewards/rejected": -0.8361676931381226, "step": 2560 }, { "epoch": 2.65, "learning_rate": 6.391121316494451e-08, "logits/chosen": -2.3907103538513184, "logits/rejected": -2.350787878036499, "logps/chosen": -250.9322967529297, "logps/rejected": -234.1465606689453, "loss": 0.5312, "rewards/accuracies": 0.71875, "rewards/chosen": -0.15345308184623718, "rewards/margins": 0.7323213815689087, "rewards/rejected": -0.8857744336128235, "step": 2570 }, { "epoch": 2.66, "learning_rate": 6.199770378874856e-08, "logits/chosen": -2.387080669403076, "logits/rejected": -2.35870623588562, "logps/chosen": -269.1571350097656, "logps/rejected": -229.3518524169922, "loss": 0.5205, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.12354423105716705, "rewards/margins": 0.7709532380104065, "rewards/rejected": -0.8944975137710571, "step": 2580 }, { "epoch": 2.68, "learning_rate": 6.008419441255262e-08, "logits/chosen": -2.398855209350586, "logits/rejected": -2.381904125213623, "logps/chosen": -263.2884521484375, "logps/rejected": -231.7559051513672, "loss": 0.5259, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.05873938649892807, "rewards/margins": 0.7153197526931763, "rewards/rejected": -0.7740591168403625, "step": 2590 }, { "epoch": 2.69, "learning_rate": 5.817068503635668e-08, "logits/chosen": -2.376080274581909, "logits/rejected": -2.316380739212036, "logps/chosen": -281.10455322265625, "logps/rejected": -218.64511108398438, "loss": 0.5151, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.09104409068822861, "rewards/margins": 0.7734732627868652, "rewards/rejected": -0.864517331123352, "step": 2600 }, { "epoch": 2.7, "learning_rate": 5.6257175660160735e-08, "logits/chosen": -2.380017042160034, "logits/rejected": -2.3436522483825684, "logps/chosen": -273.26165771484375, "logps/rejected": -228.38821411132812, "loss": 0.5224, "rewards/accuracies": 0.75, "rewards/chosen": -0.13183800876140594, "rewards/margins": 0.7871755957603455, "rewards/rejected": -0.9190136194229126, "step": 2610 }, { "epoch": 2.71, "learning_rate": 5.4343666283964784e-08, "logits/chosen": -2.355607509613037, "logits/rejected": -2.3353257179260254, "logps/chosen": -262.3599548339844, "logps/rejected": -226.27297973632812, "loss": 0.5438, "rewards/accuracies": 0.734375, "rewards/chosen": -0.16399501264095306, "rewards/margins": 0.6985915899276733, "rewards/rejected": -0.8625866174697876, "step": 2620 }, { "epoch": 2.72, "learning_rate": 5.243015690776884e-08, "logits/chosen": -2.3956310749053955, "logits/rejected": -2.3475804328918457, "logps/chosen": -264.69793701171875, "logps/rejected": -217.0175323486328, "loss": 0.5229, "rewards/accuracies": 0.729687511920929, "rewards/chosen": -0.12037453800439835, "rewards/margins": 0.7271707653999329, "rewards/rejected": -0.8475452661514282, "step": 2630 }, { "epoch": 2.73, "learning_rate": 5.05166475315729e-08, "logits/chosen": -2.327115297317505, "logits/rejected": -2.3179469108581543, "logps/chosen": -250.42251586914062, "logps/rejected": -225.97705078125, "loss": 0.5338, "rewards/accuracies": 0.71875, "rewards/chosen": -0.1434091329574585, "rewards/margins": 0.6914544701576233, "rewards/rejected": -0.8348636627197266, "step": 2640 }, { "epoch": 2.74, "learning_rate": 4.860313815537696e-08, "logits/chosen": -2.4228968620300293, "logits/rejected": -2.358617067337036, "logps/chosen": -266.973388671875, "logps/rejected": -219.0054168701172, "loss": 0.5307, "rewards/accuracies": 0.7265625, "rewards/chosen": -0.08964172005653381, "rewards/margins": 0.7375173568725586, "rewards/rejected": -0.8271591067314148, "step": 2650 }, { "epoch": 2.75, "learning_rate": 4.668962877918101e-08, "logits/chosen": -2.3782241344451904, "logits/rejected": -2.3420677185058594, "logps/chosen": -271.78472900390625, "logps/rejected": -224.3458251953125, "loss": 0.4925, "rewards/accuracies": 0.753125011920929, "rewards/chosen": -0.07608253508806229, "rewards/margins": 0.8127967715263367, "rewards/rejected": -0.8888792991638184, "step": 2660 }, { "epoch": 2.76, "learning_rate": 4.477611940298507e-08, "logits/chosen": -2.362567901611328, "logits/rejected": -2.3487753868103027, "logps/chosen": -260.4725646972656, "logps/rejected": -230.2348175048828, "loss": 0.504, "rewards/accuracies": 0.754687488079071, "rewards/chosen": -0.11674080789089203, "rewards/margins": 0.8006342649459839, "rewards/rejected": -0.9173750877380371, "step": 2670 }, { "epoch": 2.77, "learning_rate": 4.2862610026789124e-08, "logits/chosen": -2.368887186050415, "logits/rejected": -2.3095037937164307, "logps/chosen": -267.6027526855469, "logps/rejected": -227.1664276123047, "loss": 0.5355, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12747621536254883, "rewards/margins": 0.7349743843078613, "rewards/rejected": -0.8624505996704102, "step": 2680 }, { "epoch": 2.78, "learning_rate": 4.0949100650593186e-08, "logits/chosen": -2.4295105934143066, "logits/rejected": -2.3712687492370605, "logps/chosen": -271.1334228515625, "logps/rejected": -226.98959350585938, "loss": 0.5366, "rewards/accuracies": 0.71875, "rewards/chosen": -0.14556117355823517, "rewards/margins": 0.7037054896354675, "rewards/rejected": -0.8492666482925415, "step": 2690 }, { "epoch": 2.79, "learning_rate": 3.903559127439724e-08, "logits/chosen": -2.404041290283203, "logits/rejected": -2.3408515453338623, "logps/chosen": -271.45184326171875, "logps/rejected": -231.26318359375, "loss": 0.5223, "rewards/accuracies": 0.754687488079071, "rewards/chosen": -0.15005668997764587, "rewards/margins": 0.7375911474227905, "rewards/rejected": -0.887647807598114, "step": 2700 }, { "epoch": 2.8, "learning_rate": 3.71220818982013e-08, "logits/chosen": -2.4113287925720215, "logits/rejected": -2.363337993621826, "logps/chosen": -279.56695556640625, "logps/rejected": -228.7524871826172, "loss": 0.5678, "rewards/accuracies": 0.6890624761581421, "rewards/chosen": -0.18398186564445496, "rewards/margins": 0.6596510410308838, "rewards/rejected": -0.8436328768730164, "step": 2710 }, { "epoch": 2.81, "learning_rate": 3.520857252200535e-08, "logits/chosen": -2.4288249015808105, "logits/rejected": -2.3564791679382324, "logps/chosen": -271.6515808105469, "logps/rejected": -229.5021514892578, "loss": 0.5407, "rewards/accuracies": 0.7171875238418579, "rewards/chosen": -0.18123161792755127, "rewards/margins": 0.7020525932312012, "rewards/rejected": -0.8832842111587524, "step": 2720 }, { "epoch": 2.82, "learning_rate": 3.3295063145809414e-08, "logits/chosen": -2.3590943813323975, "logits/rejected": -2.322199583053589, "logps/chosen": -273.1612854003906, "logps/rejected": -253.64633178710938, "loss": 0.5437, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.17031243443489075, "rewards/margins": 0.713148295879364, "rewards/rejected": -0.8834608197212219, "step": 2730 }, { "epoch": 2.83, "learning_rate": 3.138155376961347e-08, "logits/chosen": -2.3528659343719482, "logits/rejected": -2.3328776359558105, "logps/chosen": -256.59613037109375, "logps/rejected": -226.8491973876953, "loss": 0.5234, "rewards/accuracies": 0.723437488079071, "rewards/chosen": -0.12790945172309875, "rewards/margins": 0.7292603254318237, "rewards/rejected": -0.8571697473526001, "step": 2740 }, { "epoch": 2.84, "learning_rate": 2.9468044393417525e-08, "logits/chosen": -2.332599639892578, "logits/rejected": -2.328411340713501, "logps/chosen": -260.6733093261719, "logps/rejected": -226.01119995117188, "loss": 0.5406, "rewards/accuracies": 0.7203124761581421, "rewards/chosen": -0.1711007058620453, "rewards/margins": 0.6720742583274841, "rewards/rejected": -0.8431750535964966, "step": 2750 }, { "epoch": 2.85, "learning_rate": 2.755453501722158e-08, "logits/chosen": -2.3848772048950195, "logits/rejected": -2.346205949783325, "logps/chosen": -268.3501281738281, "logps/rejected": -224.84347534179688, "loss": 0.5294, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.13995392620563507, "rewards/margins": 0.7762855887413025, "rewards/rejected": -0.9162395596504211, "step": 2760 }, { "epoch": 2.86, "learning_rate": 2.564102564102564e-08, "logits/chosen": -2.38297438621521, "logits/rejected": -2.3261475563049316, "logps/chosen": -265.07781982421875, "logps/rejected": -244.471923828125, "loss": 0.5524, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13383716344833374, "rewards/margins": 0.6434152722358704, "rewards/rejected": -0.7772524952888489, "step": 2770 }, { "epoch": 2.87, "learning_rate": 2.3727516264829695e-08, "logits/chosen": -2.3448472023010254, "logits/rejected": -2.3202641010284424, "logps/chosen": -266.0987854003906, "logps/rejected": -228.6033172607422, "loss": 0.5201, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.1439387947320938, "rewards/margins": 0.7299402952194214, "rewards/rejected": -0.873879075050354, "step": 2780 }, { "epoch": 2.88, "learning_rate": 2.1814006888633754e-08, "logits/chosen": -2.355379104614258, "logits/rejected": -2.3448832035064697, "logps/chosen": -268.690185546875, "logps/rejected": -234.4865264892578, "loss": 0.5581, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.17621631920337677, "rewards/margins": 0.6596941351890564, "rewards/rejected": -0.835910439491272, "step": 2790 }, { "epoch": 2.89, "learning_rate": 1.990049751243781e-08, "logits/chosen": -2.355900287628174, "logits/rejected": -2.32261061668396, "logps/chosen": -264.06536865234375, "logps/rejected": -232.2172088623047, "loss": 0.5227, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.11503295600414276, "rewards/margins": 0.7390708327293396, "rewards/rejected": -0.8541038632392883, "step": 2800 }, { "epoch": 2.9, "learning_rate": 1.7986988136241865e-08, "logits/chosen": -2.380585193634033, "logits/rejected": -2.325172185897827, "logps/chosen": -271.6562805175781, "logps/rejected": -234.0508575439453, "loss": 0.5377, "rewards/accuracies": 0.7203124761581421, "rewards/chosen": -0.16687723994255066, "rewards/margins": 0.6961434483528137, "rewards/rejected": -0.8630207180976868, "step": 2810 }, { "epoch": 2.91, "learning_rate": 1.6073478760045924e-08, "logits/chosen": -2.3646774291992188, "logits/rejected": -2.3574256896972656, "logps/chosen": -282.1201171875, "logps/rejected": -234.2088165283203, "loss": 0.5145, "rewards/accuracies": 0.7421875, "rewards/chosen": -0.048953305929899216, "rewards/margins": 0.788312554359436, "rewards/rejected": -0.8372658491134644, "step": 2820 }, { "epoch": 2.92, "learning_rate": 1.4159969383849981e-08, "logits/chosen": -2.371241569519043, "logits/rejected": -2.355045795440674, "logps/chosen": -280.1076965332031, "logps/rejected": -234.8966522216797, "loss": 0.5564, "rewards/accuracies": 0.7109375, "rewards/chosen": -0.15984012186527252, "rewards/margins": 0.6876562833786011, "rewards/rejected": -0.8474963903427124, "step": 2830 }, { "epoch": 2.93, "learning_rate": 1.2246460007654037e-08, "logits/chosen": -2.360264778137207, "logits/rejected": -2.332968235015869, "logps/chosen": -278.0101013183594, "logps/rejected": -239.9487762451172, "loss": 0.5575, "rewards/accuracies": 0.714062511920929, "rewards/chosen": -0.13736246526241302, "rewards/margins": 0.6766383051872253, "rewards/rejected": -0.8140007853507996, "step": 2840 }, { "epoch": 2.94, "learning_rate": 1.0332950631458094e-08, "logits/chosen": -2.400036334991455, "logits/rejected": -2.3746438026428223, "logps/chosen": -267.7570495605469, "logps/rejected": -229.16140747070312, "loss": 0.5313, "rewards/accuracies": 0.7484375238418579, "rewards/chosen": -0.12042717635631561, "rewards/margins": 0.7909914255142212, "rewards/rejected": -0.9114185571670532, "step": 2850 }, { "epoch": 2.95, "learning_rate": 8.419441255262151e-09, "logits/chosen": -2.3523342609405518, "logits/rejected": -2.3188953399658203, "logps/chosen": -260.3684387207031, "logps/rejected": -233.06326293945312, "loss": 0.5271, "rewards/accuracies": 0.7328125238418579, "rewards/chosen": -0.15820932388305664, "rewards/margins": 0.7365429997444153, "rewards/rejected": -0.8947523236274719, "step": 2860 }, { "epoch": 2.96, "learning_rate": 6.505931879066207e-09, "logits/chosen": -2.3432793617248535, "logits/rejected": -2.33192777633667, "logps/chosen": -278.02117919921875, "logps/rejected": -233.4646453857422, "loss": 0.5247, "rewards/accuracies": 0.739062488079071, "rewards/chosen": -0.08122755587100983, "rewards/margins": 0.7956343293190002, "rewards/rejected": -0.8768618702888489, "step": 2870 }, { "epoch": 2.97, "learning_rate": 4.592422502870264e-09, "logits/chosen": -2.4073646068573, "logits/rejected": -2.375094175338745, "logps/chosen": -280.04608154296875, "logps/rejected": -233.2005615234375, "loss": 0.5261, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.13482233881950378, "rewards/margins": 0.7431889772415161, "rewards/rejected": -0.8780113458633423, "step": 2880 }, { "epoch": 2.98, "learning_rate": 2.6789131266743202e-09, "logits/chosen": -2.374481439590454, "logits/rejected": -2.320697784423828, "logps/chosen": -255.5072784423828, "logps/rejected": -207.7611083984375, "loss": 0.5271, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.14782702922821045, "rewards/margins": 0.7393444180488586, "rewards/rejected": -0.8871713876724243, "step": 2890 }, { "epoch": 3.0, "learning_rate": 7.654037504783773e-10, "logits/chosen": -2.381277561187744, "logits/rejected": -2.313739061355591, "logps/chosen": -267.82568359375, "logps/rejected": -234.2742156982422, "loss": 0.5194, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.10935642570257187, "rewards/margins": 0.7639234662055969, "rewards/rejected": -0.873279869556427, "step": 2900 }, { "epoch": 3.0, "eval_logits/chosen": -2.0344715118408203, "eval_logits/rejected": -1.9804012775421143, "eval_logps/chosen": -265.97662353515625, "eval_logps/rejected": -232.47203063964844, "eval_loss": 0.5272051095962524, "eval_rewards/accuracies": 0.734000027179718, "eval_rewards/chosen": -0.1408846527338028, "eval_rewards/margins": 0.7409887909889221, "eval_rewards/rejected": -0.8818734884262085, "eval_runtime": 1090.2134, "eval_samples_per_second": 1.835, "eval_steps_per_second": 0.459, "step": 2904 }, { "epoch": 3.0, "step": 2904, "total_flos": 0.0, "train_loss": 0.5639242924154626, "train_runtime": 165279.5111, "train_samples_per_second": 1.125, "train_steps_per_second": 0.018 } ], "logging_steps": 10, "max_steps": 2904, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }