{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 5811, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 8.591065292096219e-10, "logits/chosen": -2.5853981971740723, "logits/rejected": -2.470163345336914, "logps/chosen": -144.5498046875, "logps/rejected": -91.19886779785156, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 8.59106529209622e-09, "logits/chosen": -2.9029548168182373, "logits/rejected": -2.959444522857666, "logps/chosen": -362.0794982910156, "logps/rejected": -262.45947265625, "loss": 0.6935, "rewards/accuracies": 0.4444444477558136, "rewards/chosen": 0.005129138007760048, "rewards/margins": 0.016535116359591484, "rewards/rejected": -0.011405976489186287, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.718213058419244e-08, "logits/chosen": -2.7615294456481934, "logits/rejected": -2.725064754486084, "logps/chosen": -269.28619384765625, "logps/rejected": -202.5450897216797, "loss": 0.6933, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -0.019564008340239525, "rewards/margins": -0.026170048862695694, "rewards/rejected": 0.006606035865843296, "step": 20 }, { "epoch": 0.02, "learning_rate": 2.5773195876288656e-08, "logits/chosen": -2.966618776321411, "logits/rejected": -2.9526820182800293, "logps/chosen": -271.7405090332031, "logps/rejected": -236.72415161132812, "loss": 0.689, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.009709501639008522, "rewards/margins": -0.013466158881783485, "rewards/rejected": 0.0037566572427749634, "step": 30 }, { "epoch": 0.02, "learning_rate": 3.436426116838488e-08, "logits/chosen": -2.7589166164398193, "logits/rejected": -2.7409415245056152, "logps/chosen": -278.0064697265625, "logps/rejected": -256.8775939941406, "loss": 0.6829, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.011239729821681976, "rewards/margins": 3.3503398299217224e-05, "rewards/rejected": -0.011273231357336044, "step": 40 }, { "epoch": 0.03, "learning_rate": 4.29553264604811e-08, "logits/chosen": -2.9569365978240967, "logits/rejected": -2.904877185821533, "logps/chosen": -293.72796630859375, "logps/rejected": -220.7270050048828, "loss": 0.6639, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.036680832505226135, "rewards/margins": 0.07365237176418304, "rewards/rejected": -0.03697153925895691, "step": 50 }, { "epoch": 0.03, "learning_rate": 5.154639175257731e-08, "logits/chosen": -2.841106414794922, "logits/rejected": -2.8289294242858887, "logps/chosen": -277.8804016113281, "logps/rejected": -258.37542724609375, "loss": 0.6525, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.057021915912628174, "rewards/margins": 0.10140474885702133, "rewards/rejected": -0.044382836669683456, "step": 60 }, { "epoch": 0.04, "learning_rate": 6.013745704467354e-08, "logits/chosen": -2.944255828857422, "logits/rejected": -2.9709508419036865, "logps/chosen": -341.34271240234375, "logps/rejected": -246.50521850585938, "loss": 0.6418, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.0887238085269928, "rewards/margins": 0.16679176688194275, "rewards/rejected": -0.07806795090436935, "step": 70 }, { "epoch": 0.04, "learning_rate": 6.872852233676976e-08, "logits/chosen": -2.864830493927002, "logits/rejected": -2.8411145210266113, "logps/chosen": -332.96881103515625, "logps/rejected": -261.268798828125, "loss": 0.6062, "rewards/accuracies": 0.625, "rewards/chosen": 0.09977231174707413, "rewards/margins": 0.1671280413866043, "rewards/rejected": -0.06735573709011078, "step": 80 }, { "epoch": 0.05, "learning_rate": 7.731958762886598e-08, "logits/chosen": -2.901690721511841, "logits/rejected": -2.916935682296753, "logps/chosen": -302.84075927734375, "logps/rejected": -185.1248321533203, "loss": 0.6045, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0786793902516365, "rewards/margins": 0.2470569908618927, "rewards/rejected": -0.1683776080608368, "step": 90 }, { "epoch": 0.05, "learning_rate": 8.59106529209622e-08, "logits/chosen": -2.76128888130188, "logits/rejected": -2.780914068222046, "logps/chosen": -248.6319122314453, "logps/rejected": -200.15841674804688, "loss": 0.5933, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.028152083978056908, "rewards/margins": 0.2552409768104553, "rewards/rejected": -0.22708889842033386, "step": 100 }, { "epoch": 0.06, "learning_rate": 9.450171821305841e-08, "logits/chosen": -2.8498265743255615, "logits/rejected": -2.935537576675415, "logps/chosen": -282.4373779296875, "logps/rejected": -253.15170288085938, "loss": 0.5897, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.03426863253116608, "rewards/margins": 0.40692123770713806, "rewards/rejected": -0.3726526200771332, "step": 110 }, { "epoch": 0.06, "learning_rate": 1.0309278350515462e-07, "logits/chosen": -2.805190324783325, "logits/rejected": -2.925807476043701, "logps/chosen": -213.3897247314453, "logps/rejected": -224.468017578125, "loss": 0.5578, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.009538640268146992, "rewards/margins": 0.30600637197494507, "rewards/rejected": -0.3155450224876404, "step": 120 }, { "epoch": 0.07, "learning_rate": 1.1168384879725086e-07, "logits/chosen": -2.9988574981689453, "logits/rejected": -2.885953426361084, "logps/chosen": -334.03802490234375, "logps/rejected": -216.3173370361328, "loss": 0.5695, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.3172129988670349, "rewards/margins": 0.7236066460609436, "rewards/rejected": -0.4063936173915863, "step": 130 }, { "epoch": 0.07, "learning_rate": 1.202749140893471e-07, "logits/chosen": -2.866483449935913, "logits/rejected": -2.8898892402648926, "logps/chosen": -259.0267639160156, "logps/rejected": -230.06961059570312, "loss": 0.5536, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.06076876074075699, "rewards/margins": 0.6484912633895874, "rewards/rejected": -0.7092598676681519, "step": 140 }, { "epoch": 0.08, "learning_rate": 1.2886597938144328e-07, "logits/chosen": -2.9667582511901855, "logits/rejected": -2.937042236328125, "logps/chosen": -295.5273742675781, "logps/rejected": -256.44903564453125, "loss": 0.5298, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.3221905827522278, "rewards/margins": 0.941267192363739, "rewards/rejected": -0.619076669216156, "step": 150 }, { "epoch": 0.08, "learning_rate": 1.3745704467353952e-07, "logits/chosen": -2.8627123832702637, "logits/rejected": -2.849468946456909, "logps/chosen": -308.3032531738281, "logps/rejected": -260.61444091796875, "loss": 0.5207, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.28995996713638306, "rewards/margins": 0.9411754608154297, "rewards/rejected": -0.6512155532836914, "step": 160 }, { "epoch": 0.09, "learning_rate": 1.4604810996563573e-07, "logits/chosen": -2.8260326385498047, "logits/rejected": -2.799665927886963, "logps/chosen": -239.97119140625, "logps/rejected": -209.0448760986328, "loss": 0.4792, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.3469822108745575, "rewards/margins": 1.0210093259811401, "rewards/rejected": -0.6740272045135498, "step": 170 }, { "epoch": 0.09, "learning_rate": 1.5463917525773197e-07, "logits/chosen": -2.8760595321655273, "logits/rejected": -2.821604013442993, "logps/chosen": -206.1426239013672, "logps/rejected": -221.94351196289062, "loss": 0.4943, "rewards/accuracies": 0.75, "rewards/chosen": 0.24601764976978302, "rewards/margins": 0.881085991859436, "rewards/rejected": -0.6350683569908142, "step": 180 }, { "epoch": 0.1, "learning_rate": 1.6323024054982818e-07, "logits/chosen": -2.867799758911133, "logits/rejected": -2.9438815116882324, "logps/chosen": -249.8555145263672, "logps/rejected": -231.462158203125, "loss": 0.5777, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.23328867554664612, "rewards/margins": 0.6879739761352539, "rewards/rejected": -0.45468538999557495, "step": 190 }, { "epoch": 0.1, "learning_rate": 1.718213058419244e-07, "logits/chosen": -2.684551954269409, "logits/rejected": -2.760136604309082, "logps/chosen": -290.9544372558594, "logps/rejected": -195.71920776367188, "loss": 0.4786, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.26035887002944946, "rewards/margins": 0.8763397336006165, "rewards/rejected": -0.615980863571167, "step": 200 }, { "epoch": 0.11, "learning_rate": 1.804123711340206e-07, "logits/chosen": -2.7473480701446533, "logits/rejected": -2.615626811981201, "logps/chosen": -260.44232177734375, "logps/rejected": -212.9258575439453, "loss": 0.5687, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.25246724486351013, "rewards/margins": 0.5547892451286316, "rewards/rejected": -0.8072565197944641, "step": 210 }, { "epoch": 0.11, "learning_rate": 1.8900343642611682e-07, "logits/chosen": -2.894285202026367, "logits/rejected": -2.859070301055908, "logps/chosen": -330.01751708984375, "logps/rejected": -225.8118133544922, "loss": 0.5265, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.13710172474384308, "rewards/margins": 0.8895937204360962, "rewards/rejected": -0.7524920701980591, "step": 220 }, { "epoch": 0.12, "learning_rate": 1.9759450171821303e-07, "logits/chosen": -2.868990898132324, "logits/rejected": -2.8889577388763428, "logps/chosen": -262.4832458496094, "logps/rejected": -243.00057983398438, "loss": 0.5065, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.04553854465484619, "rewards/margins": 0.5204902291297913, "rewards/rejected": -0.4749516546726227, "step": 230 }, { "epoch": 0.12, "learning_rate": 2.0618556701030925e-07, "logits/chosen": -2.8463659286499023, "logits/rejected": -2.8954005241394043, "logps/chosen": -322.6001892089844, "logps/rejected": -220.10531616210938, "loss": 0.5306, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08190703392028809, "rewards/margins": 0.6268805265426636, "rewards/rejected": -0.7087875604629517, "step": 240 }, { "epoch": 0.13, "learning_rate": 2.1477663230240549e-07, "logits/chosen": -2.980902671813965, "logits/rejected": -2.9693779945373535, "logps/chosen": -273.3463439941406, "logps/rejected": -282.342529296875, "loss": 0.5035, "rewards/accuracies": 0.75, "rewards/chosen": 0.22112369537353516, "rewards/margins": 0.9812121391296387, "rewards/rejected": -0.7600885033607483, "step": 250 }, { "epoch": 0.13, "learning_rate": 2.2336769759450173e-07, "logits/chosen": -2.956479072570801, "logits/rejected": -2.9315035343170166, "logps/chosen": -250.66293334960938, "logps/rejected": -227.837646484375, "loss": 0.54, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.41840487718582153, "rewards/margins": 1.014347791671753, "rewards/rejected": -0.5959428548812866, "step": 260 }, { "epoch": 0.14, "learning_rate": 2.3195876288659794e-07, "logits/chosen": -2.898144483566284, "logits/rejected": -2.8302385807037354, "logps/chosen": -291.5244140625, "logps/rejected": -249.4423065185547, "loss": 0.4798, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.2764865756034851, "rewards/margins": 0.9568204879760742, "rewards/rejected": -0.6803339719772339, "step": 270 }, { "epoch": 0.14, "learning_rate": 2.405498281786942e-07, "logits/chosen": -2.9385907649993896, "logits/rejected": -2.8712496757507324, "logps/chosen": -314.6116027832031, "logps/rejected": -226.3020477294922, "loss": 0.5331, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.25499850511550903, "rewards/margins": 0.9774402379989624, "rewards/rejected": -0.7224417924880981, "step": 280 }, { "epoch": 0.15, "learning_rate": 2.4914089347079036e-07, "logits/chosen": -2.724156141281128, "logits/rejected": -2.797109603881836, "logps/chosen": -264.0115661621094, "logps/rejected": -231.69497680664062, "loss": 0.5239, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.23494203388690948, "rewards/margins": 0.8044212460517883, "rewards/rejected": -0.5694791078567505, "step": 290 }, { "epoch": 0.15, "learning_rate": 2.5773195876288655e-07, "logits/chosen": -2.8613712787628174, "logits/rejected": -2.929839849472046, "logps/chosen": -316.16357421875, "logps/rejected": -230.9220733642578, "loss": 0.5286, "rewards/accuracies": 0.75, "rewards/chosen": 0.43644723296165466, "rewards/margins": 1.0967363119125366, "rewards/rejected": -0.6602891087532043, "step": 300 }, { "epoch": 0.16, "learning_rate": 2.663230240549828e-07, "logits/chosen": -2.952826976776123, "logits/rejected": -2.980451822280884, "logps/chosen": -276.19171142578125, "logps/rejected": -212.08193969726562, "loss": 0.5129, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.25965097546577454, "rewards/margins": 0.9357119798660278, "rewards/rejected": -0.6760609149932861, "step": 310 }, { "epoch": 0.17, "learning_rate": 2.7491408934707903e-07, "logits/chosen": -2.910007953643799, "logits/rejected": -2.9905753135681152, "logps/chosen": -309.59637451171875, "logps/rejected": -234.2860870361328, "loss": 0.4637, "rewards/accuracies": 0.75, "rewards/chosen": 0.0740666389465332, "rewards/margins": 0.9599502682685852, "rewards/rejected": -0.885883629322052, "step": 320 }, { "epoch": 0.17, "learning_rate": 2.835051546391752e-07, "logits/chosen": -2.8667492866516113, "logits/rejected": -2.910284996032715, "logps/chosen": -275.6033935546875, "logps/rejected": -221.75222778320312, "loss": 0.4938, "rewards/accuracies": 0.75, "rewards/chosen": 0.19026069343090057, "rewards/margins": 1.2155379056930542, "rewards/rejected": -1.0252773761749268, "step": 330 }, { "epoch": 0.18, "learning_rate": 2.9209621993127146e-07, "logits/chosen": -2.825681209564209, "logits/rejected": -2.8391976356506348, "logps/chosen": -303.2389221191406, "logps/rejected": -253.29177856445312, "loss": 0.5263, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.33694472908973694, "rewards/margins": 1.066750168800354, "rewards/rejected": -0.7298054695129395, "step": 340 }, { "epoch": 0.18, "learning_rate": 3.006872852233677e-07, "logits/chosen": -2.945141315460205, "logits/rejected": -2.9210100173950195, "logps/chosen": -197.90316772460938, "logps/rejected": -210.59317016601562, "loss": 0.4741, "rewards/accuracies": 0.75, "rewards/chosen": -0.015146581456065178, "rewards/margins": 0.900895893573761, "rewards/rejected": -0.9160425066947937, "step": 350 }, { "epoch": 0.19, "learning_rate": 3.0927835051546394e-07, "logits/chosen": -2.970536708831787, "logits/rejected": -2.9317080974578857, "logps/chosen": -322.3656921386719, "logps/rejected": -263.9642028808594, "loss": 0.4635, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.3962644636631012, "rewards/margins": 1.5553843975067139, "rewards/rejected": -1.159119963645935, "step": 360 }, { "epoch": 0.19, "learning_rate": 3.178694158075601e-07, "logits/chosen": -2.888442277908325, "logits/rejected": -2.83204984664917, "logps/chosen": -233.41043090820312, "logps/rejected": -189.86026000976562, "loss": 0.4999, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.4106314182281494, "rewards/margins": 1.5635592937469482, "rewards/rejected": -1.1529278755187988, "step": 370 }, { "epoch": 0.2, "learning_rate": 3.2646048109965636e-07, "logits/chosen": -2.8554654121398926, "logits/rejected": -2.790348529815674, "logps/chosen": -225.08139038085938, "logps/rejected": -242.3370361328125, "loss": 0.5777, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.026898954063653946, "rewards/margins": 0.547136127948761, "rewards/rejected": -0.5202370882034302, "step": 380 }, { "epoch": 0.2, "learning_rate": 3.3505154639175255e-07, "logits/chosen": -2.8897109031677246, "logits/rejected": -2.8853700160980225, "logps/chosen": -288.1002502441406, "logps/rejected": -220.94772338867188, "loss": 0.4449, "rewards/accuracies": 0.875, "rewards/chosen": 0.2607569098472595, "rewards/margins": 1.4408096075057983, "rewards/rejected": -1.1800527572631836, "step": 390 }, { "epoch": 0.21, "learning_rate": 3.436426116838488e-07, "logits/chosen": -2.944121837615967, "logits/rejected": -2.8930344581604004, "logps/chosen": -233.96829223632812, "logps/rejected": -208.0071258544922, "loss": 0.5594, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.15474456548690796, "rewards/margins": 0.6924557685852051, "rewards/rejected": -0.8472002744674683, "step": 400 }, { "epoch": 0.21, "learning_rate": 3.5223367697594503e-07, "logits/chosen": -2.7703442573547363, "logits/rejected": -2.687839984893799, "logps/chosen": -319.0751953125, "logps/rejected": -231.11868286132812, "loss": 0.4436, "rewards/accuracies": 0.75, "rewards/chosen": 0.28324756026268005, "rewards/margins": 1.0761455297470093, "rewards/rejected": -0.7928978204727173, "step": 410 }, { "epoch": 0.22, "learning_rate": 3.608247422680412e-07, "logits/chosen": -2.8674497604370117, "logits/rejected": -2.8678717613220215, "logps/chosen": -281.7210388183594, "logps/rejected": -257.6501159667969, "loss": 0.5064, "rewards/accuracies": 0.75, "rewards/chosen": -0.058492355048656464, "rewards/margins": 1.1918013095855713, "rewards/rejected": -1.2502937316894531, "step": 420 }, { "epoch": 0.22, "learning_rate": 3.6941580756013745e-07, "logits/chosen": -2.916782855987549, "logits/rejected": -2.892936944961548, "logps/chosen": -205.8938751220703, "logps/rejected": -157.27175903320312, "loss": 0.5008, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06811252981424332, "rewards/margins": 1.2915557622909546, "rewards/rejected": -1.3596681356430054, "step": 430 }, { "epoch": 0.23, "learning_rate": 3.7800687285223364e-07, "logits/chosen": -2.830094814300537, "logits/rejected": -2.850872278213501, "logps/chosen": -303.74920654296875, "logps/rejected": -294.87884521484375, "loss": 0.6483, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.01641862280666828, "rewards/margins": 0.8298453092575073, "rewards/rejected": -0.8462640643119812, "step": 440 }, { "epoch": 0.23, "learning_rate": 3.865979381443299e-07, "logits/chosen": -2.8804867267608643, "logits/rejected": -2.8648791313171387, "logps/chosen": -284.320556640625, "logps/rejected": -258.94122314453125, "loss": 0.5104, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2616979479789734, "rewards/margins": 0.7781568169593811, "rewards/rejected": -1.0398547649383545, "step": 450 }, { "epoch": 0.24, "learning_rate": 3.9518900343642607e-07, "logits/chosen": -2.888380527496338, "logits/rejected": -2.9484517574310303, "logps/chosen": -274.451416015625, "logps/rejected": -266.95623779296875, "loss": 0.5914, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.061025023460388184, "rewards/margins": 1.4974867105484009, "rewards/rejected": -1.558511734008789, "step": 460 }, { "epoch": 0.24, "learning_rate": 4.037800687285223e-07, "logits/chosen": -2.905761480331421, "logits/rejected": -2.9156107902526855, "logps/chosen": -322.3829650878906, "logps/rejected": -198.50205993652344, "loss": 0.4749, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05220061540603638, "rewards/margins": 1.2210773229599, "rewards/rejected": -1.2732778787612915, "step": 470 }, { "epoch": 0.25, "learning_rate": 4.123711340206185e-07, "logits/chosen": -2.93009614944458, "logits/rejected": -2.9415712356567383, "logps/chosen": -253.5304718017578, "logps/rejected": -240.3785400390625, "loss": 0.5669, "rewards/accuracies": 0.625, "rewards/chosen": 0.2351093739271164, "rewards/margins": 0.7879358530044556, "rewards/rejected": -0.5528265237808228, "step": 480 }, { "epoch": 0.25, "learning_rate": 4.209621993127148e-07, "logits/chosen": -2.872180461883545, "logits/rejected": -2.8901145458221436, "logps/chosen": -302.1181640625, "logps/rejected": -239.98046875, "loss": 0.4494, "rewards/accuracies": 0.75, "rewards/chosen": 0.07415000349283218, "rewards/margins": 1.3110253810882568, "rewards/rejected": -1.236875295639038, "step": 490 }, { "epoch": 0.26, "learning_rate": 4.2955326460481097e-07, "logits/chosen": -2.9795875549316406, "logits/rejected": -2.9768428802490234, "logps/chosen": -296.80718994140625, "logps/rejected": -264.24664306640625, "loss": 0.5631, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.027627941220998764, "rewards/margins": 1.2383382320404053, "rewards/rejected": -1.2107102870941162, "step": 500 }, { "epoch": 0.26, "eval_logits/chosen": -2.957718849182129, "eval_logits/rejected": -2.9466848373413086, "eval_logps/chosen": -298.3453369140625, "eval_logps/rejected": -251.98329162597656, "eval_loss": 0.5260158777236938, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": 0.028815001249313354, "eval_rewards/margins": 1.2370529174804688, "eval_rewards/rejected": -1.208237886428833, "eval_runtime": 218.0881, "eval_samples_per_second": 9.171, "eval_steps_per_second": 0.289, "step": 500 }, { "epoch": 0.26, "learning_rate": 4.381443298969072e-07, "logits/chosen": -2.8311476707458496, "logits/rejected": -2.818819284439087, "logps/chosen": -294.35272216796875, "logps/rejected": -245.2067413330078, "loss": 0.6016, "rewards/accuracies": 0.625, "rewards/chosen": -0.3445579707622528, "rewards/margins": 0.7032971978187561, "rewards/rejected": -1.047855257987976, "step": 510 }, { "epoch": 0.27, "learning_rate": 4.4673539518900345e-07, "logits/chosen": -2.9695799350738525, "logits/rejected": -3.041219711303711, "logps/chosen": -228.91506958007812, "logps/rejected": -206.9320831298828, "loss": 0.5592, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.006498217582702637, "rewards/margins": 1.6344130039215088, "rewards/rejected": -1.6279146671295166, "step": 520 }, { "epoch": 0.27, "learning_rate": 4.5532646048109964e-07, "logits/chosen": -2.9132516384124756, "logits/rejected": -2.9083011150360107, "logps/chosen": -263.5189208984375, "logps/rejected": -193.60177612304688, "loss": 0.5477, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.0026242255698889494, "rewards/margins": 1.3856732845306396, "rewards/rejected": -1.3830487728118896, "step": 530 }, { "epoch": 0.28, "learning_rate": 4.639175257731959e-07, "logits/chosen": -2.9057679176330566, "logits/rejected": -2.905860424041748, "logps/chosen": -238.1947784423828, "logps/rejected": -240.0609130859375, "loss": 0.6173, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4448246955871582, "rewards/margins": 0.9073933362960815, "rewards/rejected": -1.3522180318832397, "step": 540 }, { "epoch": 0.28, "learning_rate": 4.7250859106529206e-07, "logits/chosen": -2.9397683143615723, "logits/rejected": -2.9519639015197754, "logps/chosen": -315.84698486328125, "logps/rejected": -261.22149658203125, "loss": 0.6392, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08306878060102463, "rewards/margins": 1.4375866651535034, "rewards/rejected": -1.5206555128097534, "step": 550 }, { "epoch": 0.29, "learning_rate": 4.810996563573884e-07, "logits/chosen": -2.993764877319336, "logits/rejected": -2.986448049545288, "logps/chosen": -309.62298583984375, "logps/rejected": -256.41461181640625, "loss": 0.5609, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2558799386024475, "rewards/margins": 1.2214542627334595, "rewards/rejected": -1.4773342609405518, "step": 560 }, { "epoch": 0.29, "learning_rate": 4.896907216494845e-07, "logits/chosen": -2.9518275260925293, "logits/rejected": -2.824982166290283, "logps/chosen": -334.22686767578125, "logps/rejected": -242.75833129882812, "loss": 0.5569, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5716447830200195, "rewards/margins": 0.9872225522994995, "rewards/rejected": -1.5588672161102295, "step": 570 }, { "epoch": 0.3, "learning_rate": 4.982817869415807e-07, "logits/chosen": -2.9013009071350098, "logits/rejected": -3.0167527198791504, "logps/chosen": -269.56573486328125, "logps/rejected": -236.23318481445312, "loss": 0.6727, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1722039431333542, "rewards/margins": 1.2019895315170288, "rewards/rejected": -1.374193549156189, "step": 580 }, { "epoch": 0.3, "learning_rate": 4.992350353796136e-07, "logits/chosen": -2.9110538959503174, "logits/rejected": -3.0132861137390137, "logps/chosen": -226.79293823242188, "logps/rejected": -207.0228271484375, "loss": 0.5651, "rewards/accuracies": 0.875, "rewards/chosen": 0.3664969801902771, "rewards/margins": 1.9526106119155884, "rewards/rejected": -1.586113691329956, "step": 590 }, { "epoch": 0.31, "learning_rate": 4.982788296041308e-07, "logits/chosen": -2.8610830307006836, "logits/rejected": -2.8405697345733643, "logps/chosen": -256.3024597167969, "logps/rejected": -247.03085327148438, "loss": 0.5457, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.07645522803068161, "rewards/margins": 1.704345464706421, "rewards/rejected": -1.6278903484344482, "step": 600 }, { "epoch": 0.31, "learning_rate": 4.973226238286479e-07, "logits/chosen": -2.923488140106201, "logits/rejected": -2.950122117996216, "logps/chosen": -353.5453186035156, "logps/rejected": -298.95477294921875, "loss": 0.5858, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09489689767360687, "rewards/margins": 1.4458928108215332, "rewards/rejected": -1.5407898426055908, "step": 610 }, { "epoch": 0.32, "learning_rate": 4.96366418053165e-07, "logits/chosen": -2.972114086151123, "logits/rejected": -3.030524253845215, "logps/chosen": -311.63153076171875, "logps/rejected": -240.6847381591797, "loss": 0.6625, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.015180367045104504, "rewards/margins": 1.1330465078353882, "rewards/rejected": -1.1482269763946533, "step": 620 }, { "epoch": 0.33, "learning_rate": 4.954102122776821e-07, "logits/chosen": -2.8993327617645264, "logits/rejected": -2.9407591819763184, "logps/chosen": -220.35940551757812, "logps/rejected": -179.3049774169922, "loss": 0.5138, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.39531436562538147, "rewards/margins": 1.0542871952056885, "rewards/rejected": -1.4496015310287476, "step": 630 }, { "epoch": 0.33, "learning_rate": 4.944540065021993e-07, "logits/chosen": -2.7829689979553223, "logits/rejected": -2.729430675506592, "logps/chosen": -244.7413330078125, "logps/rejected": -210.92080688476562, "loss": 0.5693, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.12804606556892395, "rewards/margins": 1.9121665954589844, "rewards/rejected": -1.784120798110962, "step": 640 }, { "epoch": 0.34, "learning_rate": 4.934978007267163e-07, "logits/chosen": -2.859757900238037, "logits/rejected": -2.943504571914673, "logps/chosen": -283.7872314453125, "logps/rejected": -251.78982543945312, "loss": 0.5549, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.581717848777771, "rewards/margins": 1.621364951133728, "rewards/rejected": -1.039647102355957, "step": 650 }, { "epoch": 0.34, "learning_rate": 4.925415949512335e-07, "logits/chosen": -2.8729991912841797, "logits/rejected": -2.932633876800537, "logps/chosen": -305.8207702636719, "logps/rejected": -248.5808868408203, "loss": 0.5541, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.07670129835605621, "rewards/margins": 1.1095455884933472, "rewards/rejected": -1.0328443050384521, "step": 660 }, { "epoch": 0.35, "learning_rate": 4.915853891757506e-07, "logits/chosen": -2.7519607543945312, "logits/rejected": -2.755323886871338, "logps/chosen": -168.88475036621094, "logps/rejected": -209.848876953125, "loss": 0.6278, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.14984068274497986, "rewards/margins": 0.7654241323471069, "rewards/rejected": -0.6155833601951599, "step": 670 }, { "epoch": 0.35, "learning_rate": 4.906291834002677e-07, "logits/chosen": -2.7901368141174316, "logits/rejected": -2.767921209335327, "logps/chosen": -291.7945251464844, "logps/rejected": -237.0704803466797, "loss": 0.5762, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1925654113292694, "rewards/margins": 0.9999873042106628, "rewards/rejected": -1.1925528049468994, "step": 680 }, { "epoch": 0.36, "learning_rate": 4.896729776247848e-07, "logits/chosen": -2.902186155319214, "logits/rejected": -2.9201323986053467, "logps/chosen": -265.80633544921875, "logps/rejected": -185.19403076171875, "loss": 0.6003, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.33004412055015564, "rewards/margins": 1.19220769405365, "rewards/rejected": -1.522251844406128, "step": 690 }, { "epoch": 0.36, "learning_rate": 4.88716771849302e-07, "logits/chosen": -2.8365702629089355, "logits/rejected": -2.880140781402588, "logps/chosen": -322.38421630859375, "logps/rejected": -299.58990478515625, "loss": 0.5984, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.023646574467420578, "rewards/margins": 1.6279172897338867, "rewards/rejected": -1.6515636444091797, "step": 700 }, { "epoch": 0.37, "learning_rate": 4.87760566073819e-07, "logits/chosen": -2.8931069374084473, "logits/rejected": -2.9269511699676514, "logps/chosen": -271.537841796875, "logps/rejected": -249.77090454101562, "loss": 0.5546, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3147915303707123, "rewards/margins": 1.2212846279144287, "rewards/rejected": -1.5360761880874634, "step": 710 }, { "epoch": 0.37, "learning_rate": 4.868043602983362e-07, "logits/chosen": -2.847012996673584, "logits/rejected": -2.8868696689605713, "logps/chosen": -287.21282958984375, "logps/rejected": -295.8669738769531, "loss": 0.6139, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21573197841644287, "rewards/margins": 2.241217851638794, "rewards/rejected": -2.4569497108459473, "step": 720 }, { "epoch": 0.38, "learning_rate": 4.858481545228533e-07, "logits/chosen": -2.949655771255493, "logits/rejected": -2.920367479324341, "logps/chosen": -289.61383056640625, "logps/rejected": -283.63714599609375, "loss": 0.5586, "rewards/accuracies": 0.75, "rewards/chosen": -0.4029797911643982, "rewards/margins": 1.6155755519866943, "rewards/rejected": -2.0185556411743164, "step": 730 }, { "epoch": 0.38, "learning_rate": 4.848919487473704e-07, "logits/chosen": -2.8386573791503906, "logits/rejected": -2.833543300628662, "logps/chosen": -272.10321044921875, "logps/rejected": -241.79733276367188, "loss": 0.6765, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6702455878257751, "rewards/margins": 0.9414161443710327, "rewards/rejected": -1.6116619110107422, "step": 740 }, { "epoch": 0.39, "learning_rate": 4.839357429718875e-07, "logits/chosen": -2.8777010440826416, "logits/rejected": -2.9025700092315674, "logps/chosen": -286.5823974609375, "logps/rejected": -260.0926208496094, "loss": 0.5844, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.14236275851726532, "rewards/margins": 2.0217671394348145, "rewards/rejected": -2.1641299724578857, "step": 750 }, { "epoch": 0.39, "learning_rate": 4.829795371964047e-07, "logits/chosen": -2.8598580360412598, "logits/rejected": -2.91709566116333, "logps/chosen": -309.28656005859375, "logps/rejected": -249.3534698486328, "loss": 0.6013, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.171111062169075, "rewards/margins": 1.5951813459396362, "rewards/rejected": -1.7662923336029053, "step": 760 }, { "epoch": 0.4, "learning_rate": 4.820233314209217e-07, "logits/chosen": -2.7693912982940674, "logits/rejected": -2.7938313484191895, "logps/chosen": -234.9104766845703, "logps/rejected": -214.0842742919922, "loss": 0.6131, "rewards/accuracies": 0.75, "rewards/chosen": -0.1327628642320633, "rewards/margins": 1.7671611309051514, "rewards/rejected": -1.8999239206314087, "step": 770 }, { "epoch": 0.4, "learning_rate": 4.810671256454389e-07, "logits/chosen": -2.7556939125061035, "logits/rejected": -2.7517266273498535, "logps/chosen": -295.79791259765625, "logps/rejected": -242.08694458007812, "loss": 0.5581, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.006750267930328846, "rewards/margins": 2.7443861961364746, "rewards/rejected": -2.751136302947998, "step": 780 }, { "epoch": 0.41, "learning_rate": 4.80110919869956e-07, "logits/chosen": -2.7430715560913086, "logits/rejected": -2.8337535858154297, "logps/chosen": -257.67437744140625, "logps/rejected": -220.5835418701172, "loss": 0.6226, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.34579893946647644, "rewards/margins": 1.5945725440979004, "rewards/rejected": -1.9403712749481201, "step": 790 }, { "epoch": 0.41, "learning_rate": 4.791547140944731e-07, "logits/chosen": -2.6196367740631104, "logits/rejected": -2.7076234817504883, "logps/chosen": -232.72207641601562, "logps/rejected": -255.9217987060547, "loss": 0.64, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.08904242515563965, "rewards/margins": 1.5046972036361694, "rewards/rejected": -1.4156547784805298, "step": 800 }, { "epoch": 0.42, "learning_rate": 4.781985083189902e-07, "logits/chosen": -2.788634777069092, "logits/rejected": -2.8340401649475098, "logps/chosen": -240.91201782226562, "logps/rejected": -233.1719512939453, "loss": 0.6055, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.03584769368171692, "rewards/margins": 1.7759116888046265, "rewards/rejected": -1.811759352684021, "step": 810 }, { "epoch": 0.42, "learning_rate": 4.772423025435074e-07, "logits/chosen": -2.765285015106201, "logits/rejected": -2.7674434185028076, "logps/chosen": -253.8185577392578, "logps/rejected": -223.858642578125, "loss": 0.6672, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.6709930300712585, "rewards/margins": 0.025269517675042152, "rewards/rejected": -0.6962625980377197, "step": 820 }, { "epoch": 0.43, "learning_rate": 4.762860967680244e-07, "logits/chosen": -2.8905787467956543, "logits/rejected": -2.8797848224639893, "logps/chosen": -206.35836791992188, "logps/rejected": -200.47171020507812, "loss": 0.6469, "rewards/accuracies": 0.625, "rewards/chosen": -0.09678085148334503, "rewards/margins": 0.6925548315048218, "rewards/rejected": -0.7893357276916504, "step": 830 }, { "epoch": 0.43, "learning_rate": 4.7532989099254154e-07, "logits/chosen": -2.965175151824951, "logits/rejected": -2.9960315227508545, "logps/chosen": -281.5271911621094, "logps/rejected": -253.3331298828125, "loss": 0.6327, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5533876419067383, "rewards/margins": 1.022857666015625, "rewards/rejected": -1.5762451887130737, "step": 840 }, { "epoch": 0.44, "learning_rate": 4.7437368521705866e-07, "logits/chosen": -2.822181224822998, "logits/rejected": -2.9410040378570557, "logps/chosen": -230.7578582763672, "logps/rejected": -242.03726196289062, "loss": 0.6023, "rewards/accuracies": 0.75, "rewards/chosen": 0.11291544139385223, "rewards/margins": 1.7286335229873657, "rewards/rejected": -1.615717887878418, "step": 850 }, { "epoch": 0.44, "learning_rate": 4.7341747944157577e-07, "logits/chosen": -2.80778431892395, "logits/rejected": -2.801071882247925, "logps/chosen": -237.62393188476562, "logps/rejected": -235.6117401123047, "loss": 0.6497, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22725868225097656, "rewards/margins": 1.3354547023773193, "rewards/rejected": -1.5627135038375854, "step": 860 }, { "epoch": 0.45, "learning_rate": 4.724612736660929e-07, "logits/chosen": -2.878175735473633, "logits/rejected": -2.906575918197632, "logps/chosen": -274.377197265625, "logps/rejected": -188.13107299804688, "loss": 0.5401, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.6024242043495178, "rewards/margins": 2.734527587890625, "rewards/rejected": -2.132103204727173, "step": 870 }, { "epoch": 0.45, "learning_rate": 4.7150506789061006e-07, "logits/chosen": -2.790346622467041, "logits/rejected": -2.76853609085083, "logps/chosen": -254.47323608398438, "logps/rejected": -271.0780944824219, "loss": 0.5927, "rewards/accuracies": 0.75, "rewards/chosen": 0.1932486593723297, "rewards/margins": 2.158299446105957, "rewards/rejected": -1.9650509357452393, "step": 880 }, { "epoch": 0.46, "learning_rate": 4.7054886211512717e-07, "logits/chosen": -2.8026018142700195, "logits/rejected": -2.8157997131347656, "logps/chosen": -284.82037353515625, "logps/rejected": -261.57513427734375, "loss": 0.6557, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.060963429510593414, "rewards/margins": 0.8900277018547058, "rewards/rejected": -0.8290642499923706, "step": 890 }, { "epoch": 0.46, "learning_rate": 4.695926563396443e-07, "logits/chosen": -2.9850142002105713, "logits/rejected": -3.0215537548065186, "logps/chosen": -235.89859008789062, "logps/rejected": -222.79476928710938, "loss": 0.595, "rewards/accuracies": 0.75, "rewards/chosen": -0.18551968038082123, "rewards/margins": 1.2160269021987915, "rewards/rejected": -1.4015467166900635, "step": 900 }, { "epoch": 0.47, "learning_rate": 4.686364505641614e-07, "logits/chosen": -2.9402737617492676, "logits/rejected": -2.9274373054504395, "logps/chosen": -294.6170349121094, "logps/rejected": -215.5508575439453, "loss": 0.5671, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.08820157498121262, "rewards/margins": 1.4755268096923828, "rewards/rejected": -1.5637282133102417, "step": 910 }, { "epoch": 0.47, "learning_rate": 4.676802447886785e-07, "logits/chosen": -2.8315272331237793, "logits/rejected": -2.892688512802124, "logps/chosen": -266.54571533203125, "logps/rejected": -204.66717529296875, "loss": 0.6862, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.32338160276412964, "rewards/margins": 1.7928798198699951, "rewards/rejected": -1.4694981575012207, "step": 920 }, { "epoch": 0.48, "learning_rate": 4.6672403901319564e-07, "logits/chosen": -2.8591322898864746, "logits/rejected": -2.8798279762268066, "logps/chosen": -284.8063659667969, "logps/rejected": -212.33853149414062, "loss": 0.5101, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.332103967666626, "rewards/margins": 2.160444736480713, "rewards/rejected": -1.8283405303955078, "step": 930 }, { "epoch": 0.49, "learning_rate": 4.6576783323771275e-07, "logits/chosen": -2.767289638519287, "logits/rejected": -2.8271992206573486, "logps/chosen": -251.9027099609375, "logps/rejected": -190.04098510742188, "loss": 0.5335, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.32957202196121216, "rewards/margins": 1.1496002674102783, "rewards/rejected": -1.4791723489761353, "step": 940 }, { "epoch": 0.49, "learning_rate": 4.6481162746222987e-07, "logits/chosen": -2.817572832107544, "logits/rejected": -2.8548407554626465, "logps/chosen": -295.7524719238281, "logps/rejected": -238.9110107421875, "loss": 0.5596, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.19818595051765442, "rewards/margins": 2.651278257369995, "rewards/rejected": -2.453092336654663, "step": 950 }, { "epoch": 0.5, "learning_rate": 4.63855421686747e-07, "logits/chosen": -2.7868804931640625, "logits/rejected": -2.8993027210235596, "logps/chosen": -293.16888427734375, "logps/rejected": -254.9773406982422, "loss": 0.5739, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.07755409181118011, "rewards/margins": 1.6788995265960693, "rewards/rejected": -1.601345419883728, "step": 960 }, { "epoch": 0.5, "learning_rate": 4.628992159112641e-07, "logits/chosen": -2.8409125804901123, "logits/rejected": -2.7442398071289062, "logps/chosen": -285.7856140136719, "logps/rejected": -226.0821533203125, "loss": 0.6429, "rewards/accuracies": 0.625, "rewards/chosen": -0.05490158870816231, "rewards/margins": 1.3684966564178467, "rewards/rejected": -1.4233982563018799, "step": 970 }, { "epoch": 0.51, "learning_rate": 4.6194301013578116e-07, "logits/chosen": -2.872086763381958, "logits/rejected": -2.79237699508667, "logps/chosen": -339.43365478515625, "logps/rejected": -262.66766357421875, "loss": 0.5547, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.03792442008852959, "rewards/margins": 2.0819664001464844, "rewards/rejected": -2.0440421104431152, "step": 980 }, { "epoch": 0.51, "learning_rate": 4.609868043602983e-07, "logits/chosen": -2.854628324508667, "logits/rejected": -2.843848705291748, "logps/chosen": -233.026611328125, "logps/rejected": -227.99008178710938, "loss": 0.5319, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.05646134167909622, "rewards/margins": 1.7762739658355713, "rewards/rejected": -1.7198127508163452, "step": 990 }, { "epoch": 0.52, "learning_rate": 4.600305985848154e-07, "logits/chosen": -2.756126880645752, "logits/rejected": -2.9117398262023926, "logps/chosen": -217.8252410888672, "logps/rejected": -191.711669921875, "loss": 0.5432, "rewards/accuracies": 0.625, "rewards/chosen": -0.2646777033805847, "rewards/margins": 0.8738463521003723, "rewards/rejected": -1.1385241746902466, "step": 1000 }, { "epoch": 0.52, "eval_logits/chosen": -2.847585678100586, "eval_logits/rejected": -2.8465394973754883, "eval_logps/chosen": -298.6568298339844, "eval_logps/rejected": -255.18309020996094, "eval_loss": 0.5888190269470215, "eval_rewards/accuracies": 0.7539682388305664, "eval_rewards/chosen": -0.03348641097545624, "eval_rewards/margins": 1.8147083520889282, "eval_rewards/rejected": -1.8481948375701904, "eval_runtime": 217.5601, "eval_samples_per_second": 9.193, "eval_steps_per_second": 0.29, "step": 1000 }, { "epoch": 0.52, "learning_rate": 4.590743928093325e-07, "logits/chosen": -2.80271315574646, "logits/rejected": -2.7094879150390625, "logps/chosen": -297.522705078125, "logps/rejected": -240.9547576904297, "loss": 0.6857, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8173992037773132, "rewards/margins": 0.8880969285964966, "rewards/rejected": -1.705496072769165, "step": 1010 }, { "epoch": 0.53, "learning_rate": 4.581181870338497e-07, "logits/chosen": -2.8086001873016357, "logits/rejected": -2.9041824340820312, "logps/chosen": -263.6119384765625, "logps/rejected": -251.357421875, "loss": 0.6192, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.15594568848609924, "rewards/margins": 1.9435104131698608, "rewards/rejected": -1.7875648736953735, "step": 1020 }, { "epoch": 0.53, "learning_rate": 4.571619812583668e-07, "logits/chosen": -2.837925910949707, "logits/rejected": -2.8686470985412598, "logps/chosen": -250.3289337158203, "logps/rejected": -233.561767578125, "loss": 0.5624, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.02483411133289337, "rewards/margins": 2.197080135345459, "rewards/rejected": -2.172245502471924, "step": 1030 }, { "epoch": 0.54, "learning_rate": 4.562057754828839e-07, "logits/chosen": -2.7975571155548096, "logits/rejected": -2.8900492191314697, "logps/chosen": -248.82400512695312, "logps/rejected": -262.0280456542969, "loss": 0.6051, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.04311724752187729, "rewards/margins": 2.246140241622925, "rewards/rejected": -2.2030229568481445, "step": 1040 }, { "epoch": 0.54, "learning_rate": 4.55249569707401e-07, "logits/chosen": -2.8506252765655518, "logits/rejected": -2.853006601333618, "logps/chosen": -238.81204223632812, "logps/rejected": -221.2905731201172, "loss": 0.5727, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.1139549016952515, "rewards/margins": 1.4150480031967163, "rewards/rejected": -2.5290026664733887, "step": 1050 }, { "epoch": 0.55, "learning_rate": 4.5429336393191814e-07, "logits/chosen": -2.769659996032715, "logits/rejected": -2.8557846546173096, "logps/chosen": -247.4389190673828, "logps/rejected": -191.01991271972656, "loss": 0.6656, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2663023769855499, "rewards/margins": 1.4014960527420044, "rewards/rejected": -1.6677982807159424, "step": 1060 }, { "epoch": 0.55, "learning_rate": 4.5333715815643525e-07, "logits/chosen": -2.843215227127075, "logits/rejected": -2.920480489730835, "logps/chosen": -302.1959533691406, "logps/rejected": -237.12496948242188, "loss": 0.6203, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5315932035446167, "rewards/margins": 1.479473352432251, "rewards/rejected": -2.011066436767578, "step": 1070 }, { "epoch": 0.56, "learning_rate": 4.5238095238095237e-07, "logits/chosen": -2.90852427482605, "logits/rejected": -2.9359192848205566, "logps/chosen": -252.1295928955078, "logps/rejected": -236.31625366210938, "loss": 0.6002, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6265767812728882, "rewards/margins": 1.0830936431884766, "rewards/rejected": -1.7096704244613647, "step": 1080 }, { "epoch": 0.56, "learning_rate": 4.514247466054695e-07, "logits/chosen": -2.874357223510742, "logits/rejected": -2.8359124660491943, "logps/chosen": -220.4912109375, "logps/rejected": -213.4995880126953, "loss": 0.5834, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7575775384902954, "rewards/margins": 1.3033814430236816, "rewards/rejected": -2.0609588623046875, "step": 1090 }, { "epoch": 0.57, "learning_rate": 4.504685408299866e-07, "logits/chosen": -2.8305556774139404, "logits/rejected": -2.8685882091522217, "logps/chosen": -304.5544128417969, "logps/rejected": -278.60675048828125, "loss": 0.6029, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.8350940942764282, "rewards/margins": 1.1936551332473755, "rewards/rejected": -2.0287489891052246, "step": 1100 }, { "epoch": 0.57, "learning_rate": 4.495123350545037e-07, "logits/chosen": -2.8294761180877686, "logits/rejected": -2.833220958709717, "logps/chosen": -306.3173828125, "logps/rejected": -255.7045135498047, "loss": 0.5678, "rewards/accuracies": 0.75, "rewards/chosen": -0.4116579592227936, "rewards/margins": 1.4458935260772705, "rewards/rejected": -1.8575513362884521, "step": 1110 }, { "epoch": 0.58, "learning_rate": 4.4855612927902083e-07, "logits/chosen": -2.7491233348846436, "logits/rejected": -2.7054574489593506, "logps/chosen": -285.42779541015625, "logps/rejected": -257.0852355957031, "loss": 1.2474, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2621387541294098, "rewards/margins": 1.8200814723968506, "rewards/rejected": -2.0822200775146484, "step": 1120 }, { "epoch": 0.58, "learning_rate": 4.4759992350353795e-07, "logits/chosen": -2.77081561088562, "logits/rejected": -2.829399824142456, "logps/chosen": -267.37384033203125, "logps/rejected": -256.3465270996094, "loss": 0.5097, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4821946620941162, "rewards/margins": 1.6790558099746704, "rewards/rejected": -2.161250591278076, "step": 1130 }, { "epoch": 0.59, "learning_rate": 4.46643717728055e-07, "logits/chosen": -2.660407304763794, "logits/rejected": -2.7651076316833496, "logps/chosen": -228.09249877929688, "logps/rejected": -246.08883666992188, "loss": 0.5661, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.7026903033256531, "rewards/margins": 1.6223033666610718, "rewards/rejected": -2.32499361038208, "step": 1140 }, { "epoch": 0.59, "learning_rate": 4.4568751195257213e-07, "logits/chosen": -2.778298854827881, "logits/rejected": -2.8174757957458496, "logps/chosen": -298.8221435546875, "logps/rejected": -201.99879455566406, "loss": 0.6314, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3881188929080963, "rewards/margins": 1.6859171390533447, "rewards/rejected": -2.074036121368408, "step": 1150 }, { "epoch": 0.6, "learning_rate": 4.447313061770893e-07, "logits/chosen": -2.791391134262085, "logits/rejected": -2.630458354949951, "logps/chosen": -269.7861633300781, "logps/rejected": -264.52484130859375, "loss": 0.6332, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6700159907341003, "rewards/margins": 1.6704028844833374, "rewards/rejected": -2.340418815612793, "step": 1160 }, { "epoch": 0.6, "learning_rate": 4.437751004016064e-07, "logits/chosen": -2.573615789413452, "logits/rejected": -2.726407766342163, "logps/chosen": -216.47705078125, "logps/rejected": -234.5958251953125, "loss": 0.5339, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06952729821205139, "rewards/margins": 1.8820451498031616, "rewards/rejected": -1.9515727758407593, "step": 1170 }, { "epoch": 0.61, "learning_rate": 4.4281889462612353e-07, "logits/chosen": -2.8222270011901855, "logits/rejected": -2.877793312072754, "logps/chosen": -277.1562194824219, "logps/rejected": -207.08447265625, "loss": 0.5704, "rewards/accuracies": 0.75, "rewards/chosen": -0.3084719181060791, "rewards/margins": 1.7400152683258057, "rewards/rejected": -2.0484869480133057, "step": 1180 }, { "epoch": 0.61, "learning_rate": 4.4186268885064064e-07, "logits/chosen": -2.9520726203918457, "logits/rejected": -2.902529239654541, "logps/chosen": -282.25469970703125, "logps/rejected": -226.1280975341797, "loss": 0.6336, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8003433346748352, "rewards/margins": 1.045416235923767, "rewards/rejected": -1.845759630203247, "step": 1190 }, { "epoch": 0.62, "learning_rate": 4.4090648307515776e-07, "logits/chosen": -2.8176891803741455, "logits/rejected": -2.8945231437683105, "logps/chosen": -213.12429809570312, "logps/rejected": -175.6876983642578, "loss": 0.5776, "rewards/accuracies": 0.75, "rewards/chosen": -1.2775752544403076, "rewards/margins": 0.9386798739433289, "rewards/rejected": -2.216254949569702, "step": 1200 }, { "epoch": 0.62, "learning_rate": 4.399502772996749e-07, "logits/chosen": -2.8975610733032227, "logits/rejected": -2.8700690269470215, "logps/chosen": -280.55194091796875, "logps/rejected": -269.6097717285156, "loss": 0.5842, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7831735610961914, "rewards/margins": 1.660369873046875, "rewards/rejected": -2.4435436725616455, "step": 1210 }, { "epoch": 0.63, "learning_rate": 4.38994071524192e-07, "logits/chosen": -2.883453130722046, "logits/rejected": -2.962209939956665, "logps/chosen": -285.9277648925781, "logps/rejected": -229.5140838623047, "loss": 0.7004, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7747269868850708, "rewards/margins": 1.7977510690689087, "rewards/rejected": -2.5724778175354004, "step": 1220 }, { "epoch": 0.64, "learning_rate": 4.380378657487091e-07, "logits/chosen": -2.9898650646209717, "logits/rejected": -2.9422051906585693, "logps/chosen": -321.74664306640625, "logps/rejected": -337.42620849609375, "loss": 0.5917, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.40765270590782166, "rewards/margins": 1.6362041234970093, "rewards/rejected": -2.0438568592071533, "step": 1230 }, { "epoch": 0.64, "learning_rate": 4.370816599732262e-07, "logits/chosen": -2.9018731117248535, "logits/rejected": -2.9015612602233887, "logps/chosen": -271.43804931640625, "logps/rejected": -233.69143676757812, "loss": 0.6393, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.1562955379486084, "rewards/margins": 1.2955151796340942, "rewards/rejected": -2.451810598373413, "step": 1240 }, { "epoch": 0.65, "learning_rate": 4.3612545419774334e-07, "logits/chosen": -2.7853074073791504, "logits/rejected": -2.750446319580078, "logps/chosen": -241.9004669189453, "logps/rejected": -301.3990783691406, "loss": 0.6279, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.9854481816291809, "rewards/margins": 1.3398268222808838, "rewards/rejected": -2.32527494430542, "step": 1250 }, { "epoch": 0.65, "learning_rate": 4.3516924842226045e-07, "logits/chosen": -2.740088939666748, "logits/rejected": -2.8612887859344482, "logps/chosen": -291.9454650878906, "logps/rejected": -230.01675415039062, "loss": 0.6272, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7180348038673401, "rewards/margins": 1.8446018695831299, "rewards/rejected": -2.5626368522644043, "step": 1260 }, { "epoch": 0.66, "learning_rate": 4.3421304264677757e-07, "logits/chosen": -2.8873705863952637, "logits/rejected": -2.757368803024292, "logps/chosen": -262.8088684082031, "logps/rejected": -228.0838623046875, "loss": 0.5759, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.9115442037582397, "rewards/margins": 1.8955144882202148, "rewards/rejected": -2.807058572769165, "step": 1270 }, { "epoch": 0.66, "learning_rate": 4.332568368712947e-07, "logits/chosen": -2.869631767272949, "logits/rejected": -2.9278321266174316, "logps/chosen": -300.78094482421875, "logps/rejected": -286.11224365234375, "loss": 0.7003, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.4378373622894287, "rewards/margins": 1.4002349376678467, "rewards/rejected": -2.8380722999572754, "step": 1280 }, { "epoch": 0.67, "learning_rate": 4.323006310958118e-07, "logits/chosen": -2.889538288116455, "logits/rejected": -2.952970027923584, "logps/chosen": -342.14373779296875, "logps/rejected": -262.52960205078125, "loss": 0.5762, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.7595783472061157, "rewards/margins": 1.8629432916641235, "rewards/rejected": -2.6225216388702393, "step": 1290 }, { "epoch": 0.67, "learning_rate": 4.313444253203289e-07, "logits/chosen": -2.895522117614746, "logits/rejected": -2.7695717811584473, "logps/chosen": -251.97232055664062, "logps/rejected": -249.22775268554688, "loss": 0.5322, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9817905426025391, "rewards/margins": 1.8322546482086182, "rewards/rejected": -2.814044952392578, "step": 1300 }, { "epoch": 0.68, "learning_rate": 4.3038821954484603e-07, "logits/chosen": -2.882139205932617, "logits/rejected": -2.8528878688812256, "logps/chosen": -244.2353973388672, "logps/rejected": -250.3789520263672, "loss": 0.5442, "rewards/accuracies": 0.625, "rewards/chosen": -1.030928373336792, "rewards/margins": 1.146545171737671, "rewards/rejected": -2.177473545074463, "step": 1310 }, { "epoch": 0.68, "learning_rate": 4.2943201376936315e-07, "logits/chosen": -2.8502607345581055, "logits/rejected": -2.83538556098938, "logps/chosen": -267.75384521484375, "logps/rejected": -256.6767578125, "loss": 0.6736, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8556143045425415, "rewards/margins": 1.707345962524414, "rewards/rejected": -2.562960386276245, "step": 1320 }, { "epoch": 0.69, "learning_rate": 4.2847580799388026e-07, "logits/chosen": -2.8500895500183105, "logits/rejected": -2.85542368888855, "logps/chosen": -261.53204345703125, "logps/rejected": -243.7827911376953, "loss": 0.6892, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.5335882902145386, "rewards/margins": 0.587089478969574, "rewards/rejected": -2.120677947998047, "step": 1330 }, { "epoch": 0.69, "learning_rate": 4.275196022183974e-07, "logits/chosen": -2.8312344551086426, "logits/rejected": -2.8955295085906982, "logps/chosen": -351.7067565917969, "logps/rejected": -237.8406524658203, "loss": 0.6193, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.3393104076385498, "rewards/margins": 0.9419676065444946, "rewards/rejected": -2.281277656555176, "step": 1340 }, { "epoch": 0.7, "learning_rate": 4.265633964429145e-07, "logits/chosen": -2.8178093433380127, "logits/rejected": -2.8798580169677734, "logps/chosen": -290.2537536621094, "logps/rejected": -217.72128295898438, "loss": 0.7113, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.7937359809875488, "rewards/margins": 1.3023329973220825, "rewards/rejected": -2.096068859100342, "step": 1350 }, { "epoch": 0.7, "learning_rate": 4.256071906674316e-07, "logits/chosen": -2.9177136421203613, "logits/rejected": -2.876124143600464, "logps/chosen": -345.4211120605469, "logps/rejected": -294.0853271484375, "loss": 0.7343, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7909644842147827, "rewards/margins": 1.53469717502594, "rewards/rejected": -2.3256616592407227, "step": 1360 }, { "epoch": 0.71, "learning_rate": 4.246509848919487e-07, "logits/chosen": -2.9307923316955566, "logits/rejected": -2.9114553928375244, "logps/chosen": -313.3360595703125, "logps/rejected": -229.73330688476562, "loss": 0.6555, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8673244714736938, "rewards/margins": 1.4944034814834595, "rewards/rejected": -2.3617279529571533, "step": 1370 }, { "epoch": 0.71, "learning_rate": 4.2369477911646584e-07, "logits/chosen": -2.8465042114257812, "logits/rejected": -2.927686929702759, "logps/chosen": -271.71807861328125, "logps/rejected": -244.21066284179688, "loss": 0.6364, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0488981008529663, "rewards/margins": 1.2555683851242065, "rewards/rejected": -2.304466485977173, "step": 1380 }, { "epoch": 0.72, "learning_rate": 4.2273857334098296e-07, "logits/chosen": -2.851271152496338, "logits/rejected": -2.84839129447937, "logps/chosen": -274.51861572265625, "logps/rejected": -216.4557647705078, "loss": 0.651, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1298227310180664, "rewards/margins": 1.343589186668396, "rewards/rejected": -2.473412036895752, "step": 1390 }, { "epoch": 0.72, "learning_rate": 4.2178236756550007e-07, "logits/chosen": -2.8680837154388428, "logits/rejected": -2.8205838203430176, "logps/chosen": -285.1997375488281, "logps/rejected": -279.20831298828125, "loss": 0.7663, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.7554957270622253, "rewards/margins": 0.6390464901924133, "rewards/rejected": -1.3945419788360596, "step": 1400 }, { "epoch": 0.73, "learning_rate": 4.208261617900172e-07, "logits/chosen": -2.8597347736358643, "logits/rejected": -2.916684627532959, "logps/chosen": -277.04498291015625, "logps/rejected": -220.5397491455078, "loss": 0.5349, "rewards/accuracies": 0.75, "rewards/chosen": -0.8732470273971558, "rewards/margins": 1.7728040218353271, "rewards/rejected": -2.6460509300231934, "step": 1410 }, { "epoch": 0.73, "learning_rate": 4.198699560145343e-07, "logits/chosen": -2.795240879058838, "logits/rejected": -2.7627010345458984, "logps/chosen": -254.906494140625, "logps/rejected": -226.43496704101562, "loss": 0.6975, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.9776231646537781, "rewards/margins": 1.5017640590667725, "rewards/rejected": -2.4793875217437744, "step": 1420 }, { "epoch": 0.74, "learning_rate": 4.189137502390514e-07, "logits/chosen": -2.8324992656707764, "logits/rejected": -2.779787063598633, "logps/chosen": -252.84201049804688, "logps/rejected": -293.3067932128906, "loss": 0.6673, "rewards/accuracies": 0.625, "rewards/chosen": -0.8433632850646973, "rewards/margins": 0.9194127321243286, "rewards/rejected": -1.7627757787704468, "step": 1430 }, { "epoch": 0.74, "learning_rate": 4.179575444635686e-07, "logits/chosen": -2.825773000717163, "logits/rejected": -2.8201498985290527, "logps/chosen": -339.226318359375, "logps/rejected": -279.8710021972656, "loss": 0.7201, "rewards/accuracies": 0.75, "rewards/chosen": -0.3010396361351013, "rewards/margins": 1.9969072341918945, "rewards/rejected": -2.2979469299316406, "step": 1440 }, { "epoch": 0.75, "learning_rate": 4.170013386880857e-07, "logits/chosen": -2.7612528800964355, "logits/rejected": -2.843858242034912, "logps/chosen": -241.0049285888672, "logps/rejected": -258.4814453125, "loss": 0.5739, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.837487518787384, "rewards/margins": 1.5697721242904663, "rewards/rejected": -2.407259702682495, "step": 1450 }, { "epoch": 0.75, "learning_rate": 4.1604513291260277e-07, "logits/chosen": -2.8152997493743896, "logits/rejected": -2.84366774559021, "logps/chosen": -245.88607788085938, "logps/rejected": -237.09329223632812, "loss": 0.7015, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.6707072257995605, "rewards/margins": 1.8989670276641846, "rewards/rejected": -2.569674253463745, "step": 1460 }, { "epoch": 0.76, "learning_rate": 4.150889271371199e-07, "logits/chosen": -2.8110063076019287, "logits/rejected": -2.838031530380249, "logps/chosen": -261.3493347167969, "logps/rejected": -255.3643341064453, "loss": 0.6881, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8193343281745911, "rewards/margins": 1.385074257850647, "rewards/rejected": -2.2044084072113037, "step": 1470 }, { "epoch": 0.76, "learning_rate": 4.14132721361637e-07, "logits/chosen": -2.832944869995117, "logits/rejected": -2.9401321411132812, "logps/chosen": -316.7156677246094, "logps/rejected": -231.45065307617188, "loss": 0.5062, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6513665318489075, "rewards/margins": 2.372331142425537, "rewards/rejected": -3.0236973762512207, "step": 1480 }, { "epoch": 0.77, "learning_rate": 4.131765155861541e-07, "logits/chosen": -2.6967649459838867, "logits/rejected": -2.842420816421509, "logps/chosen": -249.27249145507812, "logps/rejected": -204.11135864257812, "loss": 0.5513, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2104392945766449, "rewards/margins": 2.1391537189483643, "rewards/rejected": -2.349592924118042, "step": 1490 }, { "epoch": 0.77, "learning_rate": 4.1222030981067123e-07, "logits/chosen": -2.775503635406494, "logits/rejected": -2.7504892349243164, "logps/chosen": -320.334228515625, "logps/rejected": -291.64080810546875, "loss": 0.5368, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4635298252105713, "rewards/margins": 0.9672223925590515, "rewards/rejected": -2.4307522773742676, "step": 1500 }, { "epoch": 0.77, "eval_logits/chosen": -2.8444581031799316, "eval_logits/rejected": -2.845541000366211, "eval_logps/chosen": -300.9072570800781, "eval_logps/rejected": -257.592041015625, "eval_loss": 0.5860165953636169, "eval_rewards/accuracies": 0.761904776096344, "eval_rewards/chosen": -0.4835660457611084, "eval_rewards/margins": 1.8464233875274658, "eval_rewards/rejected": -2.329989194869995, "eval_runtime": 217.5823, "eval_samples_per_second": 9.192, "eval_steps_per_second": 0.29, "step": 1500 }, { "epoch": 0.78, "learning_rate": 4.1126410403518835e-07, "logits/chosen": -2.781780242919922, "logits/rejected": -2.6765735149383545, "logps/chosen": -261.59130859375, "logps/rejected": -262.6100158691406, "loss": 0.6188, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.7182345986366272, "rewards/margins": 2.0275397300720215, "rewards/rejected": -2.745774269104004, "step": 1510 }, { "epoch": 0.78, "learning_rate": 4.1030789825970546e-07, "logits/chosen": -2.8515138626098633, "logits/rejected": -2.8549370765686035, "logps/chosen": -265.7052307128906, "logps/rejected": -269.73602294921875, "loss": 0.6769, "rewards/accuracies": 0.625, "rewards/chosen": -0.644514262676239, "rewards/margins": 1.1639376878738403, "rewards/rejected": -1.8084518909454346, "step": 1520 }, { "epoch": 0.79, "learning_rate": 4.093516924842226e-07, "logits/chosen": -2.63688325881958, "logits/rejected": -2.6839888095855713, "logps/chosen": -241.33218383789062, "logps/rejected": -242.0855712890625, "loss": 0.55, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.38341224193573, "rewards/margins": 1.7259324789047241, "rewards/rejected": -2.109344959259033, "step": 1530 }, { "epoch": 0.8, "learning_rate": 4.083954867087397e-07, "logits/chosen": -2.9009692668914795, "logits/rejected": -2.900012493133545, "logps/chosen": -228.05783081054688, "logps/rejected": -256.7281494140625, "loss": 0.5737, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.7317920923233032, "rewards/margins": 0.674118161201477, "rewards/rejected": -1.4059102535247803, "step": 1540 }, { "epoch": 0.8, "learning_rate": 4.074392809332568e-07, "logits/chosen": -2.7877659797668457, "logits/rejected": -2.843630790710449, "logps/chosen": -314.5323486328125, "logps/rejected": -220.87265014648438, "loss": 0.6052, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5956573486328125, "rewards/margins": 2.074176788330078, "rewards/rejected": -2.6698338985443115, "step": 1550 }, { "epoch": 0.81, "learning_rate": 4.064830751577739e-07, "logits/chosen": -2.8032829761505127, "logits/rejected": -2.897947311401367, "logps/chosen": -218.86270141601562, "logps/rejected": -240.29110717773438, "loss": 0.6209, "rewards/accuracies": 0.875, "rewards/chosen": -0.6553669571876526, "rewards/margins": 1.5234209299087524, "rewards/rejected": -2.17878794670105, "step": 1560 }, { "epoch": 0.81, "learning_rate": 4.0552686938229104e-07, "logits/chosen": -2.8207037448883057, "logits/rejected": -2.805596351623535, "logps/chosen": -247.5675811767578, "logps/rejected": -219.2466278076172, "loss": 0.5239, "rewards/accuracies": 0.875, "rewards/chosen": -0.06444098055362701, "rewards/margins": 2.0179104804992676, "rewards/rejected": -2.0823514461517334, "step": 1570 }, { "epoch": 0.82, "learning_rate": 4.045706636068082e-07, "logits/chosen": -2.8735411167144775, "logits/rejected": -2.7937862873077393, "logps/chosen": -249.93789672851562, "logps/rejected": -250.5798797607422, "loss": 0.5408, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.504967987537384, "rewards/margins": 2.1664693355560303, "rewards/rejected": -2.6714372634887695, "step": 1580 }, { "epoch": 0.82, "learning_rate": 4.036144578313253e-07, "logits/chosen": -2.7197256088256836, "logits/rejected": -2.771433115005493, "logps/chosen": -276.3327331542969, "logps/rejected": -262.82745361328125, "loss": 0.6799, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6452814340591431, "rewards/margins": 1.896369218826294, "rewards/rejected": -2.5416505336761475, "step": 1590 }, { "epoch": 0.83, "learning_rate": 4.0265825205584244e-07, "logits/chosen": -2.7867817878723145, "logits/rejected": -2.8295509815216064, "logps/chosen": -258.8713073730469, "logps/rejected": -244.15274047851562, "loss": 0.6208, "rewards/accuracies": 0.75, "rewards/chosen": -0.5796841979026794, "rewards/margins": 1.6541448831558228, "rewards/rejected": -2.2338290214538574, "step": 1600 }, { "epoch": 0.83, "learning_rate": 4.0170204628035956e-07, "logits/chosen": -2.7563138008117676, "logits/rejected": -2.8291854858398438, "logps/chosen": -204.390380859375, "logps/rejected": -203.91371154785156, "loss": 0.6143, "rewards/accuracies": 0.625, "rewards/chosen": -0.8684107661247253, "rewards/margins": 1.3346433639526367, "rewards/rejected": -2.203054189682007, "step": 1610 }, { "epoch": 0.84, "learning_rate": 4.007458405048766e-07, "logits/chosen": -2.867945432662964, "logits/rejected": -2.9335618019104004, "logps/chosen": -336.86126708984375, "logps/rejected": -275.42034912109375, "loss": 0.5998, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6653950214385986, "rewards/margins": 1.793474793434143, "rewards/rejected": -2.458869695663452, "step": 1620 }, { "epoch": 0.84, "learning_rate": 3.9978963472939373e-07, "logits/chosen": -2.7078988552093506, "logits/rejected": -2.733445644378662, "logps/chosen": -242.2951202392578, "logps/rejected": -237.00595092773438, "loss": 0.605, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5769153833389282, "rewards/margins": 1.0708200931549072, "rewards/rejected": -1.647735357284546, "step": 1630 }, { "epoch": 0.85, "learning_rate": 3.9883342895391085e-07, "logits/chosen": -2.801527738571167, "logits/rejected": -2.7758069038391113, "logps/chosen": -322.3177490234375, "logps/rejected": -236.0830078125, "loss": 0.6412, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8352056741714478, "rewards/margins": 1.7625186443328857, "rewards/rejected": -2.597724199295044, "step": 1640 }, { "epoch": 0.85, "learning_rate": 3.9787722317842796e-07, "logits/chosen": -2.877664089202881, "logits/rejected": -2.956833839416504, "logps/chosen": -298.65057373046875, "logps/rejected": -194.07290649414062, "loss": 0.6583, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11642839759588242, "rewards/margins": 2.321591854095459, "rewards/rejected": -2.4380202293395996, "step": 1650 }, { "epoch": 0.86, "learning_rate": 3.969210174029451e-07, "logits/chosen": -2.8761415481567383, "logits/rejected": -2.8178863525390625, "logps/chosen": -224.84701538085938, "logps/rejected": -250.48324584960938, "loss": 0.6195, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.8668567538261414, "rewards/margins": 1.9914608001708984, "rewards/rejected": -2.8583176136016846, "step": 1660 }, { "epoch": 0.86, "learning_rate": 3.959648116274622e-07, "logits/chosen": -2.726407527923584, "logits/rejected": -2.817777395248413, "logps/chosen": -228.1207733154297, "logps/rejected": -203.44581604003906, "loss": 0.7566, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.3446123600006104, "rewards/margins": 0.8250246047973633, "rewards/rejected": -2.1696372032165527, "step": 1670 }, { "epoch": 0.87, "learning_rate": 3.950086058519793e-07, "logits/chosen": -2.7981009483337402, "logits/rejected": -2.838646650314331, "logps/chosen": -253.6993408203125, "logps/rejected": -226.14205932617188, "loss": 0.7449, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.7549091577529907, "rewards/margins": 1.5974199771881104, "rewards/rejected": -2.3523292541503906, "step": 1680 }, { "epoch": 0.87, "learning_rate": 3.9405240007649643e-07, "logits/chosen": -2.8806567192077637, "logits/rejected": -2.896660327911377, "logps/chosen": -249.8539276123047, "logps/rejected": -246.86587524414062, "loss": 0.5332, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.41273418068885803, "rewards/margins": 2.331174612045288, "rewards/rejected": -2.7439088821411133, "step": 1690 }, { "epoch": 0.88, "learning_rate": 3.9309619430101354e-07, "logits/chosen": -2.867082118988037, "logits/rejected": -2.85441255569458, "logps/chosen": -236.4873809814453, "logps/rejected": -197.86598205566406, "loss": 0.651, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5962778925895691, "rewards/margins": 1.6670020818710327, "rewards/rejected": -2.263280153274536, "step": 1700 }, { "epoch": 0.88, "learning_rate": 3.9213998852553066e-07, "logits/chosen": -2.912658214569092, "logits/rejected": -2.878035545349121, "logps/chosen": -301.4076843261719, "logps/rejected": -269.9781494140625, "loss": 0.6361, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.7174623012542725, "rewards/margins": 1.713463544845581, "rewards/rejected": -2.4309258460998535, "step": 1710 }, { "epoch": 0.89, "learning_rate": 3.9118378275004783e-07, "logits/chosen": -2.9461216926574707, "logits/rejected": -2.939534902572632, "logps/chosen": -305.78533935546875, "logps/rejected": -254.8824005126953, "loss": 0.6718, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6530225872993469, "rewards/margins": 1.5185779333114624, "rewards/rejected": -2.171600818634033, "step": 1720 }, { "epoch": 0.89, "learning_rate": 3.9022757697456494e-07, "logits/chosen": -2.822605609893799, "logits/rejected": -2.845618963241577, "logps/chosen": -341.7474365234375, "logps/rejected": -285.81787109375, "loss": 0.5988, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.632053017616272, "rewards/margins": 1.855086088180542, "rewards/rejected": -2.4871389865875244, "step": 1730 }, { "epoch": 0.9, "learning_rate": 3.8927137119908206e-07, "logits/chosen": -2.87507963180542, "logits/rejected": -2.9020707607269287, "logps/chosen": -315.70379638671875, "logps/rejected": -205.2082061767578, "loss": 0.5804, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7224348783493042, "rewards/margins": 1.805499792098999, "rewards/rejected": -2.527935028076172, "step": 1740 }, { "epoch": 0.9, "learning_rate": 3.883151654235992e-07, "logits/chosen": -2.858518362045288, "logits/rejected": -2.8852877616882324, "logps/chosen": -345.0863952636719, "logps/rejected": -283.49554443359375, "loss": 0.6036, "rewards/accuracies": 0.875, "rewards/chosen": -0.5720993876457214, "rewards/margins": 2.419344902038574, "rewards/rejected": -2.9914443492889404, "step": 1750 }, { "epoch": 0.91, "learning_rate": 3.873589596481163e-07, "logits/chosen": -2.9621615409851074, "logits/rejected": -2.916796922683716, "logps/chosen": -300.87945556640625, "logps/rejected": -227.98318481445312, "loss": 0.6589, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.0655115842819214, "rewards/margins": 1.4646036624908447, "rewards/rejected": -2.5301153659820557, "step": 1760 }, { "epoch": 0.91, "learning_rate": 3.864027538726334e-07, "logits/chosen": -2.869260787963867, "logits/rejected": -2.9153895378112793, "logps/chosen": -231.5424346923828, "logps/rejected": -251.66708374023438, "loss": 0.6614, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.7593679428100586, "rewards/margins": 2.1713595390319824, "rewards/rejected": -2.930727243423462, "step": 1770 }, { "epoch": 0.92, "learning_rate": 3.8544654809715047e-07, "logits/chosen": -2.89825439453125, "logits/rejected": -2.8985471725463867, "logps/chosen": -336.5444030761719, "logps/rejected": -288.14251708984375, "loss": 0.6138, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.2097070217132568, "rewards/margins": 2.103384017944336, "rewards/rejected": -3.313091278076172, "step": 1780 }, { "epoch": 0.92, "learning_rate": 3.844903423216676e-07, "logits/chosen": -2.8448703289031982, "logits/rejected": -2.8999435901641846, "logps/chosen": -230.13436889648438, "logps/rejected": -220.8369140625, "loss": 0.6235, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8140104413032532, "rewards/margins": 1.2665331363677979, "rewards/rejected": -2.0805437564849854, "step": 1790 }, { "epoch": 0.93, "learning_rate": 3.835341365461847e-07, "logits/chosen": -2.8749191761016846, "logits/rejected": -2.901770830154419, "logps/chosen": -269.7549133300781, "logps/rejected": -208.7238311767578, "loss": 0.593, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1777963638305664, "rewards/margins": 2.0235533714294434, "rewards/rejected": -2.2013497352600098, "step": 1800 }, { "epoch": 0.93, "learning_rate": 3.825779307707018e-07, "logits/chosen": -2.8150510787963867, "logits/rejected": -2.7607598304748535, "logps/chosen": -152.94459533691406, "logps/rejected": -186.07669067382812, "loss": 0.6177, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5417309403419495, "rewards/margins": 1.2329695224761963, "rewards/rejected": -1.7747001647949219, "step": 1810 }, { "epoch": 0.94, "learning_rate": 3.8162172499521893e-07, "logits/chosen": -2.8732874393463135, "logits/rejected": -2.834174871444702, "logps/chosen": -263.18890380859375, "logps/rejected": -225.11953735351562, "loss": 0.6484, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.9690064191818237, "rewards/margins": 1.3681566715240479, "rewards/rejected": -2.3371634483337402, "step": 1820 }, { "epoch": 0.94, "learning_rate": 3.8066551921973605e-07, "logits/chosen": -2.811633825302124, "logits/rejected": -2.837749719619751, "logps/chosen": -246.95175170898438, "logps/rejected": -214.7392578125, "loss": 0.5281, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3380643427371979, "rewards/margins": 1.5887610912322998, "rewards/rejected": -1.926825761795044, "step": 1830 }, { "epoch": 0.95, "learning_rate": 3.7970931344425316e-07, "logits/chosen": -2.8069844245910645, "logits/rejected": -2.8743038177490234, "logps/chosen": -272.55523681640625, "logps/rejected": -217.64169311523438, "loss": 0.5512, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6675989031791687, "rewards/margins": 0.8704580068588257, "rewards/rejected": -1.5380569696426392, "step": 1840 }, { "epoch": 0.96, "learning_rate": 3.787531076687703e-07, "logits/chosen": -2.632551670074463, "logits/rejected": -2.784874200820923, "logps/chosen": -280.067138671875, "logps/rejected": -218.7887725830078, "loss": 0.688, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8508871793746948, "rewards/margins": 0.9731461405754089, "rewards/rejected": -1.824033498764038, "step": 1850 }, { "epoch": 0.96, "learning_rate": 3.7779690189328745e-07, "logits/chosen": -2.927870512008667, "logits/rejected": -2.959735870361328, "logps/chosen": -283.48797607421875, "logps/rejected": -238.8175506591797, "loss": 0.5138, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4625054895877838, "rewards/margins": 1.5064427852630615, "rewards/rejected": -1.9689483642578125, "step": 1860 }, { "epoch": 0.97, "learning_rate": 3.7684069611780456e-07, "logits/chosen": -2.826491117477417, "logits/rejected": -2.9152965545654297, "logps/chosen": -269.662841796875, "logps/rejected": -202.84742736816406, "loss": 0.562, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6988733410835266, "rewards/margins": 1.1059156656265259, "rewards/rejected": -1.8047891855239868, "step": 1870 }, { "epoch": 0.97, "learning_rate": 3.758844903423217e-07, "logits/chosen": -2.8463423252105713, "logits/rejected": -2.8597524166107178, "logps/chosen": -257.9284973144531, "logps/rejected": -242.43838500976562, "loss": 0.6034, "rewards/accuracies": 0.75, "rewards/chosen": -1.051194429397583, "rewards/margins": 1.7188211679458618, "rewards/rejected": -2.7700157165527344, "step": 1880 }, { "epoch": 0.98, "learning_rate": 3.749282845668388e-07, "logits/chosen": -2.8787481784820557, "logits/rejected": -2.8675646781921387, "logps/chosen": -279.16253662109375, "logps/rejected": -263.882568359375, "loss": 0.5665, "rewards/accuracies": 0.75, "rewards/chosen": -0.7178214192390442, "rewards/margins": 1.551257848739624, "rewards/rejected": -2.2690792083740234, "step": 1890 }, { "epoch": 0.98, "learning_rate": 3.739720787913559e-07, "logits/chosen": -2.8247861862182617, "logits/rejected": -2.97629976272583, "logps/chosen": -266.18304443359375, "logps/rejected": -228.7005615234375, "loss": 0.5644, "rewards/accuracies": 0.75, "rewards/chosen": -0.5098610520362854, "rewards/margins": 1.5873671770095825, "rewards/rejected": -2.0972282886505127, "step": 1900 }, { "epoch": 0.99, "learning_rate": 3.73015873015873e-07, "logits/chosen": -2.952709197998047, "logits/rejected": -2.942049503326416, "logps/chosen": -287.5732421875, "logps/rejected": -249.9008026123047, "loss": 0.6396, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4959273338317871, "rewards/margins": 0.7513211369514465, "rewards/rejected": -1.2472484111785889, "step": 1910 }, { "epoch": 0.99, "learning_rate": 3.7205966724039014e-07, "logits/chosen": -2.792046546936035, "logits/rejected": -2.8531010150909424, "logps/chosen": -281.8520202636719, "logps/rejected": -232.72994995117188, "loss": 0.5827, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7002347707748413, "rewards/margins": 1.4923832416534424, "rewards/rejected": -2.1926181316375732, "step": 1920 }, { "epoch": 1.0, "learning_rate": 3.711034614649072e-07, "logits/chosen": -2.8734848499298096, "logits/rejected": -2.806027889251709, "logps/chosen": -275.4158935546875, "logps/rejected": -222.2928009033203, "loss": 0.5198, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.6465750932693481, "rewards/margins": 2.2874600887298584, "rewards/rejected": -2.934035301208496, "step": 1930 }, { "epoch": 1.0, "learning_rate": 3.701472556894243e-07, "logits/chosen": -2.883420944213867, "logits/rejected": -2.8368773460388184, "logps/chosen": -223.2138214111328, "logps/rejected": -249.0345916748047, "loss": 0.4761, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.6173292398452759, "rewards/margins": 4.069156169891357, "rewards/rejected": -3.451826810836792, "step": 1940 }, { "epoch": 1.01, "learning_rate": 3.6919104991394144e-07, "logits/chosen": -2.9283018112182617, "logits/rejected": -2.9259495735168457, "logps/chosen": -214.7909393310547, "logps/rejected": -229.945068359375, "loss": 0.0768, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 2.4348597526550293, "rewards/margins": 8.571413040161133, "rewards/rejected": -6.136553764343262, "step": 1950 }, { "epoch": 1.01, "learning_rate": 3.6823484413845855e-07, "logits/chosen": -2.8366754055023193, "logits/rejected": -2.852508306503296, "logps/chosen": -272.7106628417969, "logps/rejected": -307.4251708984375, "loss": 0.0683, "rewards/accuracies": 1.0, "rewards/chosen": 1.3836749792099, "rewards/margins": 7.853079795837402, "rewards/rejected": -6.469404697418213, "step": 1960 }, { "epoch": 1.02, "learning_rate": 3.6727863836297567e-07, "logits/chosen": -2.834152936935425, "logits/rejected": -2.8273613452911377, "logps/chosen": -244.5872344970703, "logps/rejected": -234.9590301513672, "loss": 0.0905, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.32602596282959, "rewards/margins": 8.823614120483398, "rewards/rejected": -6.497588157653809, "step": 1970 }, { "epoch": 1.02, "learning_rate": 3.663224325874928e-07, "logits/chosen": -2.8812031745910645, "logits/rejected": -2.834937572479248, "logps/chosen": -242.8521728515625, "logps/rejected": -279.22393798828125, "loss": 0.082, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.4316458702087402, "rewards/margins": 6.866148471832275, "rewards/rejected": -5.434502601623535, "step": 1980 }, { "epoch": 1.03, "learning_rate": 3.653662268120099e-07, "logits/chosen": -2.7576725482940674, "logits/rejected": -2.815948963165283, "logps/chosen": -240.4783477783203, "logps/rejected": -309.4148864746094, "loss": 0.0902, "rewards/accuracies": 1.0, "rewards/chosen": 2.815195322036743, "rewards/margins": 8.677802085876465, "rewards/rejected": -5.862607002258301, "step": 1990 }, { "epoch": 1.03, "learning_rate": 3.6441002103652707e-07, "logits/chosen": -2.766345262527466, "logits/rejected": -2.7719063758850098, "logps/chosen": -239.728515625, "logps/rejected": -268.7380676269531, "loss": 0.0615, "rewards/accuracies": 1.0, "rewards/chosen": 2.324842691421509, "rewards/margins": 8.273411750793457, "rewards/rejected": -5.948569297790527, "step": 2000 }, { "epoch": 1.03, "eval_logits/chosen": -2.8639116287231445, "eval_logits/rejected": -2.8686611652374268, "eval_logps/chosen": -301.4748840332031, "eval_logps/rejected": -259.4017639160156, "eval_loss": 0.6024442315101624, "eval_rewards/accuracies": 0.7777777910232544, "eval_rewards/chosen": -0.5970897078514099, "eval_rewards/margins": 2.0948448181152344, "eval_rewards/rejected": -2.691934823989868, "eval_runtime": 217.645, "eval_samples_per_second": 9.189, "eval_steps_per_second": 0.289, "step": 2000 }, { "epoch": 1.04, "learning_rate": 3.634538152610442e-07, "logits/chosen": -2.8193464279174805, "logits/rejected": -2.942736864089966, "logps/chosen": -268.42852783203125, "logps/rejected": -279.1741027832031, "loss": 0.046, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 3.551281452178955, "rewards/margins": 9.481260299682617, "rewards/rejected": -5.9299798011779785, "step": 2010 }, { "epoch": 1.04, "learning_rate": 3.624976094855613e-07, "logits/chosen": -2.844984292984009, "logits/rejected": -2.822812557220459, "logps/chosen": -246.75039672851562, "logps/rejected": -248.1599578857422, "loss": 0.0642, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.2839322090148926, "rewards/margins": 9.039632797241211, "rewards/rejected": -6.755700588226318, "step": 2020 }, { "epoch": 1.05, "learning_rate": 3.615414037100784e-07, "logits/chosen": -2.83333683013916, "logits/rejected": -2.878652334213257, "logps/chosen": -288.70623779296875, "logps/rejected": -251.38748168945312, "loss": 0.0691, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.2578727006912231, "rewards/margins": 7.318972110748291, "rewards/rejected": -6.061100006103516, "step": 2030 }, { "epoch": 1.05, "learning_rate": 3.6058519793459553e-07, "logits/chosen": -2.7812256813049316, "logits/rejected": -2.7956368923187256, "logps/chosen": -203.9931182861328, "logps/rejected": -231.3068389892578, "loss": 0.0671, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.1192617416381836, "rewards/margins": 8.319083213806152, "rewards/rejected": -6.199821472167969, "step": 2040 }, { "epoch": 1.06, "learning_rate": 3.5962899215911265e-07, "logits/chosen": -2.7568271160125732, "logits/rejected": -2.823901653289795, "logps/chosen": -245.1362762451172, "logps/rejected": -272.96942138671875, "loss": 0.0492, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 2.207984447479248, "rewards/margins": 8.376309394836426, "rewards/rejected": -6.1683244705200195, "step": 2050 }, { "epoch": 1.06, "learning_rate": 3.5867278638362976e-07, "logits/chosen": -2.7440109252929688, "logits/rejected": -2.844982624053955, "logps/chosen": -292.696044921875, "logps/rejected": -340.18450927734375, "loss": 0.0795, "rewards/accuracies": 1.0, "rewards/chosen": 2.5816397666931152, "rewards/margins": 9.935778617858887, "rewards/rejected": -7.354138374328613, "step": 2060 }, { "epoch": 1.07, "learning_rate": 3.577165806081469e-07, "logits/chosen": -2.864179849624634, "logits/rejected": -2.906625270843506, "logps/chosen": -217.19314575195312, "logps/rejected": -231.2805938720703, "loss": 0.0719, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.5726702213287354, "rewards/margins": 7.601313591003418, "rewards/rejected": -6.0286431312561035, "step": 2070 }, { "epoch": 1.07, "learning_rate": 3.56760374832664e-07, "logits/chosen": -2.84359073638916, "logits/rejected": -2.8455586433410645, "logps/chosen": -296.8313293457031, "logps/rejected": -280.0687255859375, "loss": 0.0345, "rewards/accuracies": 1.0, "rewards/chosen": 4.307840347290039, "rewards/margins": 12.213151931762695, "rewards/rejected": -7.905311584472656, "step": 2080 }, { "epoch": 1.08, "learning_rate": 3.5580416905718106e-07, "logits/chosen": -2.7166662216186523, "logits/rejected": -2.890003204345703, "logps/chosen": -304.8444519042969, "logps/rejected": -253.0063934326172, "loss": 0.0762, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.852630138397217, "rewards/margins": 9.45458984375, "rewards/rejected": -6.601960182189941, "step": 2090 }, { "epoch": 1.08, "learning_rate": 3.5484796328169817e-07, "logits/chosen": -2.775017261505127, "logits/rejected": -2.9256277084350586, "logps/chosen": -213.930908203125, "logps/rejected": -244.6073760986328, "loss": 0.0553, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.39541494846344, "rewards/margins": 7.692966461181641, "rewards/rejected": -6.297551155090332, "step": 2100 }, { "epoch": 1.09, "learning_rate": 3.538917575062153e-07, "logits/chosen": -2.7707183361053467, "logits/rejected": -2.860555410385132, "logps/chosen": -222.232666015625, "logps/rejected": -277.15673828125, "loss": 0.0524, "rewards/accuracies": 1.0, "rewards/chosen": 1.953575849533081, "rewards/margins": 9.444633483886719, "rewards/rejected": -7.491057395935059, "step": 2110 }, { "epoch": 1.09, "learning_rate": 3.529355517307324e-07, "logits/chosen": -2.778550624847412, "logits/rejected": -2.742030382156372, "logps/chosen": -227.607421875, "logps/rejected": -284.8799743652344, "loss": 0.0459, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.8729333877563477, "rewards/margins": 9.644989967346191, "rewards/rejected": -7.772056579589844, "step": 2120 }, { "epoch": 1.1, "learning_rate": 3.519793459552495e-07, "logits/chosen": -2.7962563037872314, "logits/rejected": -2.7782444953918457, "logps/chosen": -202.91845703125, "logps/rejected": -298.21551513671875, "loss": 0.0772, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.817417323589325, "rewards/margins": 7.259305000305176, "rewards/rejected": -6.441886901855469, "step": 2130 }, { "epoch": 1.1, "learning_rate": 3.510231401797667e-07, "logits/chosen": -2.763411283493042, "logits/rejected": -2.769122838973999, "logps/chosen": -334.50274658203125, "logps/rejected": -307.247802734375, "loss": 0.0751, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.0035067796707153, "rewards/margins": 10.06065559387207, "rewards/rejected": -9.057147979736328, "step": 2140 }, { "epoch": 1.11, "learning_rate": 3.500669344042838e-07, "logits/chosen": -2.804309368133545, "logits/rejected": -2.7383761405944824, "logps/chosen": -303.35400390625, "logps/rejected": -271.607421875, "loss": 0.0538, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.8834270238876343, "rewards/margins": 7.979116916656494, "rewards/rejected": -7.095690727233887, "step": 2150 }, { "epoch": 1.12, "learning_rate": 3.491107286288009e-07, "logits/chosen": -2.8362884521484375, "logits/rejected": -2.798858880996704, "logps/chosen": -219.6747589111328, "logps/rejected": -253.2039794921875, "loss": 0.0676, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.297351360321045, "rewards/margins": 10.74948501586914, "rewards/rejected": -8.452133178710938, "step": 2160 }, { "epoch": 1.12, "learning_rate": 3.4815452285331803e-07, "logits/chosen": -2.8299400806427, "logits/rejected": -2.8571524620056152, "logps/chosen": -313.93414306640625, "logps/rejected": -275.51861572265625, "loss": 0.0669, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 1.4165822267532349, "rewards/margins": 7.2616400718688965, "rewards/rejected": -5.845057487487793, "step": 2170 }, { "epoch": 1.13, "learning_rate": 3.4719831707783515e-07, "logits/chosen": -2.632450819015503, "logits/rejected": -2.730884552001953, "logps/chosen": -273.5526123046875, "logps/rejected": -258.4808349609375, "loss": 0.0667, "rewards/accuracies": 1.0, "rewards/chosen": 2.159768581390381, "rewards/margins": 8.972365379333496, "rewards/rejected": -6.812596797943115, "step": 2180 }, { "epoch": 1.13, "learning_rate": 3.4624211130235227e-07, "logits/chosen": -2.8070766925811768, "logits/rejected": -2.8275675773620605, "logps/chosen": -169.80580139160156, "logps/rejected": -181.44944763183594, "loss": 0.0761, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.0720124244689941, "rewards/margins": 8.138480186462402, "rewards/rejected": -7.06646728515625, "step": 2190 }, { "epoch": 1.14, "learning_rate": 3.452859055268694e-07, "logits/chosen": -2.8337655067443848, "logits/rejected": -2.8809292316436768, "logps/chosen": -197.19129943847656, "logps/rejected": -220.0069122314453, "loss": 0.0672, "rewards/accuracies": 1.0, "rewards/chosen": 0.7570802569389343, "rewards/margins": 8.503564834594727, "rewards/rejected": -7.746484279632568, "step": 2200 }, { "epoch": 1.14, "learning_rate": 3.443296997513865e-07, "logits/chosen": -2.881232500076294, "logits/rejected": -2.835149049758911, "logps/chosen": -278.3822326660156, "logps/rejected": -304.79852294921875, "loss": 0.065, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 2.2739017009735107, "rewards/margins": 11.037015914916992, "rewards/rejected": -8.763113975524902, "step": 2210 }, { "epoch": 1.15, "learning_rate": 3.433734939759036e-07, "logits/chosen": -2.82194447517395, "logits/rejected": -2.8195812702178955, "logps/chosen": -297.4380798339844, "logps/rejected": -266.0953369140625, "loss": 0.1081, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.1260581016540527, "rewards/margins": 8.836637496948242, "rewards/rejected": -6.710579872131348, "step": 2220 }, { "epoch": 1.15, "learning_rate": 3.4241728820042073e-07, "logits/chosen": -2.7940726280212402, "logits/rejected": -2.7838902473449707, "logps/chosen": -231.32003784179688, "logps/rejected": -283.4253845214844, "loss": 0.0814, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.5174341797828674, "rewards/margins": 7.280348777770996, "rewards/rejected": -6.762915134429932, "step": 2230 }, { "epoch": 1.16, "learning_rate": 3.4146108242493784e-07, "logits/chosen": -2.8232216835021973, "logits/rejected": -2.8321261405944824, "logps/chosen": -223.7116241455078, "logps/rejected": -267.0711669921875, "loss": 0.0835, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.0258145332336426, "rewards/margins": 9.584681510925293, "rewards/rejected": -7.558867454528809, "step": 2240 }, { "epoch": 1.16, "learning_rate": 3.405048766494549e-07, "logits/chosen": -2.687310218811035, "logits/rejected": -2.666550397872925, "logps/chosen": -245.4330596923828, "logps/rejected": -249.4916534423828, "loss": 0.1153, "rewards/accuracies": 1.0, "rewards/chosen": 1.9064534902572632, "rewards/margins": 8.866552352905273, "rewards/rejected": -6.960099220275879, "step": 2250 }, { "epoch": 1.17, "learning_rate": 3.39548670873972e-07, "logits/chosen": -2.7485148906707764, "logits/rejected": -2.738548755645752, "logps/chosen": -316.63946533203125, "logps/rejected": -399.0462341308594, "loss": 0.1264, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 2.945350170135498, "rewards/margins": 12.575675010681152, "rewards/rejected": -9.63032341003418, "step": 2260 }, { "epoch": 1.17, "learning_rate": 3.3859246509848914e-07, "logits/chosen": -2.7252113819122314, "logits/rejected": -2.794020652770996, "logps/chosen": -260.09027099609375, "logps/rejected": -282.75201416015625, "loss": 0.074, "rewards/accuracies": 1.0, "rewards/chosen": 1.8539409637451172, "rewards/margins": 9.42973518371582, "rewards/rejected": -7.5757951736450195, "step": 2270 }, { "epoch": 1.18, "learning_rate": 3.376362593230063e-07, "logits/chosen": -2.7548434734344482, "logits/rejected": -2.7015717029571533, "logps/chosen": -215.8569793701172, "logps/rejected": -320.8585205078125, "loss": 0.0624, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 1.395317792892456, "rewards/margins": 9.333052635192871, "rewards/rejected": -7.937734127044678, "step": 2280 }, { "epoch": 1.18, "learning_rate": 3.366800535475234e-07, "logits/chosen": -2.7667157649993896, "logits/rejected": -2.743626117706299, "logps/chosen": -330.3866271972656, "logps/rejected": -277.27423095703125, "loss": 0.0616, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.2812180519104004, "rewards/margins": 10.883448600769043, "rewards/rejected": -8.602230072021484, "step": 2290 }, { "epoch": 1.19, "learning_rate": 3.3572384777204054e-07, "logits/chosen": -2.783461570739746, "logits/rejected": -2.7846217155456543, "logps/chosen": -313.592529296875, "logps/rejected": -256.70355224609375, "loss": 0.0522, "rewards/accuracies": 1.0, "rewards/chosen": 2.5473904609680176, "rewards/margins": 9.968744277954102, "rewards/rejected": -7.4213547706604, "step": 2300 }, { "epoch": 1.19, "learning_rate": 3.3476764199655765e-07, "logits/chosen": -2.6445019245147705, "logits/rejected": -2.675950765609741, "logps/chosen": -212.31478881835938, "logps/rejected": -210.2075958251953, "loss": 0.0583, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 1.8660532236099243, "rewards/margins": 8.678271293640137, "rewards/rejected": -6.81221866607666, "step": 2310 }, { "epoch": 1.2, "learning_rate": 3.3381143622107477e-07, "logits/chosen": -2.7665135860443115, "logits/rejected": -2.7206473350524902, "logps/chosen": -334.072998046875, "logps/rejected": -348.7887878417969, "loss": 0.0419, "rewards/accuracies": 1.0, "rewards/chosen": 2.038207530975342, "rewards/margins": 11.963887214660645, "rewards/rejected": -9.925680160522461, "step": 2320 }, { "epoch": 1.2, "learning_rate": 3.328552304455919e-07, "logits/chosen": -2.7567098140716553, "logits/rejected": -2.7200610637664795, "logps/chosen": -222.26730346679688, "logps/rejected": -237.3346405029297, "loss": 0.0623, "rewards/accuracies": 1.0, "rewards/chosen": 2.507472515106201, "rewards/margins": 10.436367988586426, "rewards/rejected": -7.928895473480225, "step": 2330 }, { "epoch": 1.21, "learning_rate": 3.31899024670109e-07, "logits/chosen": -2.7776715755462646, "logits/rejected": -2.722181558609009, "logps/chosen": -262.3143005371094, "logps/rejected": -255.64309692382812, "loss": 0.0642, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.5882365703582764, "rewards/margins": 9.245880126953125, "rewards/rejected": -7.6576433181762695, "step": 2340 }, { "epoch": 1.21, "learning_rate": 3.309428188946261e-07, "logits/chosen": -2.527749538421631, "logits/rejected": -2.5494728088378906, "logps/chosen": -216.6414794921875, "logps/rejected": -218.4356689453125, "loss": 0.0808, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.5761357545852661, "rewards/margins": 6.512837886810303, "rewards/rejected": -5.936702728271484, "step": 2350 }, { "epoch": 1.22, "learning_rate": 3.2998661311914323e-07, "logits/chosen": -2.7653374671936035, "logits/rejected": -2.70037579536438, "logps/chosen": -229.9672393798828, "logps/rejected": -267.19720458984375, "loss": 0.0569, "rewards/accuracies": 1.0, "rewards/chosen": 2.17948579788208, "rewards/margins": 12.446728706359863, "rewards/rejected": -10.267244338989258, "step": 2360 }, { "epoch": 1.22, "learning_rate": 3.2903040734366035e-07, "logits/chosen": -2.8731327056884766, "logits/rejected": -2.8409879207611084, "logps/chosen": -270.32586669921875, "logps/rejected": -310.6203918457031, "loss": 0.0533, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.5364960432052612, "rewards/margins": 9.386345863342285, "rewards/rejected": -7.849849700927734, "step": 2370 }, { "epoch": 1.23, "learning_rate": 3.2807420156817746e-07, "logits/chosen": -2.7654192447662354, "logits/rejected": -2.765045166015625, "logps/chosen": -230.9626007080078, "logps/rejected": -273.068603515625, "loss": 0.1255, "rewards/accuracies": 1.0, "rewards/chosen": 1.6310447454452515, "rewards/margins": 8.72553825378418, "rewards/rejected": -7.094493865966797, "step": 2380 }, { "epoch": 1.23, "learning_rate": 3.271179957926946e-07, "logits/chosen": -2.6663436889648438, "logits/rejected": -2.797468423843384, "logps/chosen": -272.3541564941406, "logps/rejected": -292.03155517578125, "loss": 0.0919, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.019334077835083, "rewards/margins": 9.053455352783203, "rewards/rejected": -8.034120559692383, "step": 2390 }, { "epoch": 1.24, "learning_rate": 3.261617900172117e-07, "logits/chosen": -2.8381245136260986, "logits/rejected": -2.7584311962127686, "logps/chosen": -225.3228302001953, "logps/rejected": -280.5133056640625, "loss": 0.2103, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.5505833625793457, "rewards/margins": 8.765647888183594, "rewards/rejected": -7.21506404876709, "step": 2400 }, { "epoch": 1.24, "learning_rate": 3.2520558424172876e-07, "logits/chosen": -2.884033679962158, "logits/rejected": -2.840308666229248, "logps/chosen": -202.35057067871094, "logps/rejected": -229.30661010742188, "loss": 0.093, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.9804633855819702, "rewards/margins": 8.038414001464844, "rewards/rejected": -7.057950019836426, "step": 2410 }, { "epoch": 1.25, "learning_rate": 3.242493784662459e-07, "logits/chosen": -2.8548994064331055, "logits/rejected": -2.8542709350585938, "logps/chosen": -228.2629852294922, "logps/rejected": -269.2410583496094, "loss": 0.0581, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.472010850906372, "rewards/margins": 9.07557487487793, "rewards/rejected": -7.6035637855529785, "step": 2420 }, { "epoch": 1.25, "learning_rate": 3.2329317269076304e-07, "logits/chosen": -2.892120838165283, "logits/rejected": -2.798053741455078, "logps/chosen": -267.05633544921875, "logps/rejected": -296.55108642578125, "loss": 0.0671, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.2721009254455566, "rewards/margins": 10.419027328491211, "rewards/rejected": -8.14692497253418, "step": 2430 }, { "epoch": 1.26, "learning_rate": 3.2233696691528016e-07, "logits/chosen": -2.8099141120910645, "logits/rejected": -2.8631486892700195, "logps/chosen": -252.46932983398438, "logps/rejected": -329.03936767578125, "loss": 0.054, "rewards/accuracies": 1.0, "rewards/chosen": 3.635554790496826, "rewards/margins": 12.528615951538086, "rewards/rejected": -8.893061637878418, "step": 2440 }, { "epoch": 1.26, "learning_rate": 3.2138076113979727e-07, "logits/chosen": -2.7621617317199707, "logits/rejected": -2.7229347229003906, "logps/chosen": -243.74124145507812, "logps/rejected": -298.3870544433594, "loss": 0.1001, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.0718708038330078, "rewards/margins": 9.034757614135742, "rewards/rejected": -7.962886810302734, "step": 2450 }, { "epoch": 1.27, "learning_rate": 3.204245553643144e-07, "logits/chosen": -2.8180460929870605, "logits/rejected": -2.8537211418151855, "logps/chosen": -277.44879150390625, "logps/rejected": -323.1278991699219, "loss": 0.0771, "rewards/accuracies": 1.0, "rewards/chosen": 1.6137592792510986, "rewards/margins": 8.935612678527832, "rewards/rejected": -7.321854591369629, "step": 2460 }, { "epoch": 1.28, "learning_rate": 3.194683495888315e-07, "logits/chosen": -2.8711585998535156, "logits/rejected": -2.836691379547119, "logps/chosen": -301.29248046875, "logps/rejected": -234.97311401367188, "loss": 0.0672, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.8811954259872437, "rewards/margins": 9.141138076782227, "rewards/rejected": -7.25994348526001, "step": 2470 }, { "epoch": 1.28, "learning_rate": 3.185121438133486e-07, "logits/chosen": -2.7138116359710693, "logits/rejected": -2.7769246101379395, "logps/chosen": -266.66094970703125, "logps/rejected": -347.24310302734375, "loss": 0.0878, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.6878013610839844, "rewards/margins": 11.373885154724121, "rewards/rejected": -8.68608570098877, "step": 2480 }, { "epoch": 1.29, "learning_rate": 3.1755593803786574e-07, "logits/chosen": -2.799628734588623, "logits/rejected": -2.820836305618286, "logps/chosen": -183.5235595703125, "logps/rejected": -263.58477783203125, "loss": 0.0918, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.4970555901527405, "rewards/margins": 7.879460334777832, "rewards/rejected": -7.3824052810668945, "step": 2490 }, { "epoch": 1.29, "learning_rate": 3.1659973226238285e-07, "logits/chosen": -2.701214075088501, "logits/rejected": -2.7091012001037598, "logps/chosen": -210.8098907470703, "logps/rejected": -258.5179748535156, "loss": 0.0817, "rewards/accuracies": 0.875, "rewards/chosen": 1.5346753597259521, "rewards/margins": 8.877952575683594, "rewards/rejected": -7.343277931213379, "step": 2500 }, { "epoch": 1.29, "eval_logits/chosen": -2.825404405593872, "eval_logits/rejected": -2.825698137283325, "eval_logps/chosen": -305.26666259765625, "eval_logps/rejected": -265.15521240234375, "eval_loss": 0.6655394434928894, "eval_rewards/accuracies": 0.773809552192688, "eval_rewards/chosen": -1.3554495573043823, "eval_rewards/margins": 2.4871773719787598, "eval_rewards/rejected": -3.8426268100738525, "eval_runtime": 217.4133, "eval_samples_per_second": 9.199, "eval_steps_per_second": 0.29, "step": 2500 }, { "epoch": 1.3, "learning_rate": 3.1564352648689997e-07, "logits/chosen": -2.887439012527466, "logits/rejected": -2.8315329551696777, "logps/chosen": -249.19839477539062, "logps/rejected": -233.50259399414062, "loss": 0.0951, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 1.8421556949615479, "rewards/margins": 8.87971305847168, "rewards/rejected": -7.0375566482543945, "step": 2510 }, { "epoch": 1.3, "learning_rate": 3.146873207114171e-07, "logits/chosen": -2.830362319946289, "logits/rejected": -2.8775646686553955, "logps/chosen": -269.3638916015625, "logps/rejected": -237.9186248779297, "loss": 0.0678, "rewards/accuracies": 1.0, "rewards/chosen": 1.7232071161270142, "rewards/margins": 8.593416213989258, "rewards/rejected": -6.870208740234375, "step": 2520 }, { "epoch": 1.31, "learning_rate": 3.137311149359342e-07, "logits/chosen": -2.885897159576416, "logits/rejected": -2.8861446380615234, "logps/chosen": -329.35772705078125, "logps/rejected": -326.6771545410156, "loss": 0.0826, "rewards/accuracies": 1.0, "rewards/chosen": 1.5491175651550293, "rewards/margins": 8.285898208618164, "rewards/rejected": -6.736781120300293, "step": 2530 }, { "epoch": 1.31, "learning_rate": 3.127749091604513e-07, "logits/chosen": -2.8595659732818604, "logits/rejected": -2.8982152938842773, "logps/chosen": -275.5293884277344, "logps/rejected": -290.2986755371094, "loss": 0.0828, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.8002418279647827, "rewards/margins": 10.767290115356445, "rewards/rejected": -8.967049598693848, "step": 2540 }, { "epoch": 1.32, "learning_rate": 3.1181870338496843e-07, "logits/chosen": -2.8094890117645264, "logits/rejected": -2.795132637023926, "logps/chosen": -194.06175231933594, "logps/rejected": -261.2168884277344, "loss": 0.0996, "rewards/accuracies": 1.0, "rewards/chosen": 0.9291478395462036, "rewards/margins": 8.161163330078125, "rewards/rejected": -7.2320146560668945, "step": 2550 }, { "epoch": 1.32, "learning_rate": 3.108624976094856e-07, "logits/chosen": -2.7061691284179688, "logits/rejected": -2.6929404735565186, "logps/chosen": -278.30389404296875, "logps/rejected": -262.9192199707031, "loss": 0.0785, "rewards/accuracies": 1.0, "rewards/chosen": 1.9108970165252686, "rewards/margins": 8.77668571472168, "rewards/rejected": -6.865788459777832, "step": 2560 }, { "epoch": 1.33, "learning_rate": 3.0990629183400266e-07, "logits/chosen": -2.6665143966674805, "logits/rejected": -2.6631417274475098, "logps/chosen": -304.42266845703125, "logps/rejected": -281.06915283203125, "loss": 0.0857, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.79115891456604, "rewards/margins": 10.763957977294922, "rewards/rejected": -7.972798824310303, "step": 2570 }, { "epoch": 1.33, "learning_rate": 3.089500860585198e-07, "logits/chosen": -2.7755050659179688, "logits/rejected": -2.7990615367889404, "logps/chosen": -205.9984130859375, "logps/rejected": -233.8455352783203, "loss": 0.0925, "rewards/accuracies": 1.0, "rewards/chosen": 0.7710639238357544, "rewards/margins": 7.724207401275635, "rewards/rejected": -6.953144073486328, "step": 2580 }, { "epoch": 1.34, "learning_rate": 3.079938802830369e-07, "logits/chosen": -2.770127773284912, "logits/rejected": -2.762481927871704, "logps/chosen": -201.91415405273438, "logps/rejected": -215.83712768554688, "loss": 0.0562, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.6502482891082764, "rewards/margins": 7.537081718444824, "rewards/rejected": -6.886833190917969, "step": 2590 }, { "epoch": 1.34, "learning_rate": 3.07037674507554e-07, "logits/chosen": -2.7648098468780518, "logits/rejected": -2.755012035369873, "logps/chosen": -304.14727783203125, "logps/rejected": -344.8402404785156, "loss": 0.1352, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.7347332239151, "rewards/margins": 10.5750150680542, "rewards/rejected": -8.840280532836914, "step": 2600 }, { "epoch": 1.35, "learning_rate": 3.060814687320711e-07, "logits/chosen": -2.6266608238220215, "logits/rejected": -2.6018710136413574, "logps/chosen": -263.6053771972656, "logps/rejected": -259.5660400390625, "loss": 0.0828, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.80942964553833, "rewards/margins": 11.295743942260742, "rewards/rejected": -8.48631477355957, "step": 2610 }, { "epoch": 1.35, "learning_rate": 3.0512526295658824e-07, "logits/chosen": -2.670837879180908, "logits/rejected": -2.6270341873168945, "logps/chosen": -243.7268524169922, "logps/rejected": -280.92059326171875, "loss": 0.1222, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 2.4703638553619385, "rewards/margins": 11.03471851348877, "rewards/rejected": -8.564353942871094, "step": 2620 }, { "epoch": 1.36, "learning_rate": 3.0416905718110536e-07, "logits/chosen": -2.7124526500701904, "logits/rejected": -2.7607948780059814, "logps/chosen": -230.36959838867188, "logps/rejected": -223.21713256835938, "loss": 0.0758, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.9104559421539307, "rewards/margins": 9.06286907196045, "rewards/rejected": -7.152412414550781, "step": 2630 }, { "epoch": 1.36, "learning_rate": 3.0321285140562247e-07, "logits/chosen": -2.760730743408203, "logits/rejected": -2.729675531387329, "logps/chosen": -260.95703125, "logps/rejected": -284.439453125, "loss": 0.1003, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.830008864402771, "rewards/margins": 7.717930793762207, "rewards/rejected": -6.887921333312988, "step": 2640 }, { "epoch": 1.37, "learning_rate": 3.022566456301396e-07, "logits/chosen": -2.742011308670044, "logits/rejected": -2.7945969104766846, "logps/chosen": -255.4883575439453, "logps/rejected": -264.54864501953125, "loss": 0.0966, "rewards/accuracies": 1.0, "rewards/chosen": 1.9193328619003296, "rewards/margins": 9.547621726989746, "rewards/rejected": -7.628289222717285, "step": 2650 }, { "epoch": 1.37, "learning_rate": 3.013004398546567e-07, "logits/chosen": -2.665055513381958, "logits/rejected": -2.681821346282959, "logps/chosen": -316.40069580078125, "logps/rejected": -302.1031494140625, "loss": 0.075, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.3142540454864502, "rewards/margins": 8.91224193572998, "rewards/rejected": -7.597988128662109, "step": 2660 }, { "epoch": 1.38, "learning_rate": 3.003442340791738e-07, "logits/chosen": -2.7507271766662598, "logits/rejected": -2.7632174491882324, "logps/chosen": -220.7988739013672, "logps/rejected": -245.73281860351562, "loss": 0.0561, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.9091943502426147, "rewards/margins": 9.140470504760742, "rewards/rejected": -8.231274604797363, "step": 2670 }, { "epoch": 1.38, "learning_rate": 2.9938802830369093e-07, "logits/chosen": -2.761070489883423, "logits/rejected": -2.739316701889038, "logps/chosen": -197.91165161132812, "logps/rejected": -252.8719024658203, "loss": 0.0945, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.6190016269683838, "rewards/margins": 8.90224838256836, "rewards/rejected": -8.283246994018555, "step": 2680 }, { "epoch": 1.39, "learning_rate": 2.9843182252820805e-07, "logits/chosen": -2.6564173698425293, "logits/rejected": -2.6826603412628174, "logps/chosen": -261.28460693359375, "logps/rejected": -276.734130859375, "loss": 0.0607, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.1821815967559814, "rewards/margins": 10.108437538146973, "rewards/rejected": -8.92625617980957, "step": 2690 }, { "epoch": 1.39, "learning_rate": 2.974756167527252e-07, "logits/chosen": -2.792201519012451, "logits/rejected": -2.789391040802002, "logps/chosen": -249.32687377929688, "logps/rejected": -265.88897705078125, "loss": 0.0659, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.5784503817558289, "rewards/margins": 7.7233381271362305, "rewards/rejected": -7.144888401031494, "step": 2700 }, { "epoch": 1.4, "learning_rate": 2.9651941097724233e-07, "logits/chosen": -2.7236266136169434, "logits/rejected": -2.754272937774658, "logps/chosen": -274.2682189941406, "logps/rejected": -247.7708282470703, "loss": 0.1082, "rewards/accuracies": 1.0, "rewards/chosen": 1.2451229095458984, "rewards/margins": 8.562277793884277, "rewards/rejected": -7.317155361175537, "step": 2710 }, { "epoch": 1.4, "learning_rate": 2.9556320520175945e-07, "logits/chosen": -2.66971755027771, "logits/rejected": -2.7084593772888184, "logps/chosen": -264.0753479003906, "logps/rejected": -289.4743957519531, "loss": 0.1105, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.5378565788269043, "rewards/margins": 7.262510776519775, "rewards/rejected": -7.8003668785095215, "step": 2720 }, { "epoch": 1.41, "learning_rate": 2.946069994262765e-07, "logits/chosen": -2.741020679473877, "logits/rejected": -2.6548125743865967, "logps/chosen": -313.64556884765625, "logps/rejected": -337.761474609375, "loss": 0.0706, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.526160717010498, "rewards/margins": 10.978344917297363, "rewards/rejected": -8.452184677124023, "step": 2730 }, { "epoch": 1.41, "learning_rate": 2.9365079365079363e-07, "logits/chosen": -2.7509303092956543, "logits/rejected": -2.701751947402954, "logps/chosen": -317.73651123046875, "logps/rejected": -244.7392120361328, "loss": 0.0993, "rewards/accuracies": 1.0, "rewards/chosen": 3.711538791656494, "rewards/margins": 11.599513053894043, "rewards/rejected": -7.887974739074707, "step": 2740 }, { "epoch": 1.42, "learning_rate": 2.9269458787531074e-07, "logits/chosen": -2.766720771789551, "logits/rejected": -2.761768341064453, "logps/chosen": -292.88800048828125, "logps/rejected": -321.57855224609375, "loss": 0.1135, "rewards/accuracies": 0.875, "rewards/chosen": 1.4988124370574951, "rewards/margins": 9.010907173156738, "rewards/rejected": -7.5120954513549805, "step": 2750 }, { "epoch": 1.42, "learning_rate": 2.9173838209982786e-07, "logits/chosen": -2.776931047439575, "logits/rejected": -2.808295965194702, "logps/chosen": -234.4710693359375, "logps/rejected": -209.95736694335938, "loss": 0.0965, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.3970197439193726, "rewards/margins": 8.639843940734863, "rewards/rejected": -7.242823123931885, "step": 2760 }, { "epoch": 1.43, "learning_rate": 2.90782176324345e-07, "logits/chosen": -2.8999898433685303, "logits/rejected": -2.8510196208953857, "logps/chosen": -288.8046875, "logps/rejected": -264.20550537109375, "loss": 0.0871, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.1829469203948975, "rewards/margins": 9.268820762634277, "rewards/rejected": -8.0858736038208, "step": 2770 }, { "epoch": 1.44, "learning_rate": 2.898259705488621e-07, "logits/chosen": -2.8797316551208496, "logits/rejected": -2.8782153129577637, "logps/chosen": -281.7368469238281, "logps/rejected": -233.17752075195312, "loss": 0.0731, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.6002839803695679, "rewards/margins": 8.053850173950195, "rewards/rejected": -7.4535651206970215, "step": 2780 }, { "epoch": 1.44, "learning_rate": 2.888697647733792e-07, "logits/chosen": -2.771524429321289, "logits/rejected": -2.766451358795166, "logps/chosen": -343.028076171875, "logps/rejected": -322.9091491699219, "loss": 0.0661, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.140158176422119, "rewards/margins": 11.231983184814453, "rewards/rejected": -9.091826438903809, "step": 2790 }, { "epoch": 1.45, "learning_rate": 2.879135589978963e-07, "logits/chosen": -2.820176362991333, "logits/rejected": -2.7664897441864014, "logps/chosen": -280.60577392578125, "logps/rejected": -251.74691772460938, "loss": 0.0922, "rewards/accuracies": 1.0, "rewards/chosen": 1.6660699844360352, "rewards/margins": 8.87816047668457, "rewards/rejected": -7.212090969085693, "step": 2800 }, { "epoch": 1.45, "learning_rate": 2.8695735322241344e-07, "logits/chosen": -2.7457170486450195, "logits/rejected": -2.747769832611084, "logps/chosen": -291.2057800292969, "logps/rejected": -268.2013244628906, "loss": 0.1417, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.9127013683319092, "rewards/margins": 10.081315994262695, "rewards/rejected": -8.168614387512207, "step": 2810 }, { "epoch": 1.46, "learning_rate": 2.8600114744693055e-07, "logits/chosen": -2.723422050476074, "logits/rejected": -2.807495594024658, "logps/chosen": -289.26080322265625, "logps/rejected": -301.3398742675781, "loss": 0.0752, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.1681814193725586, "rewards/margins": 11.451757431030273, "rewards/rejected": -9.283575057983398, "step": 2820 }, { "epoch": 1.46, "learning_rate": 2.8504494167144767e-07, "logits/chosen": -2.7436156272888184, "logits/rejected": -2.7432618141174316, "logps/chosen": -261.23052978515625, "logps/rejected": -317.21588134765625, "loss": 0.0884, "rewards/accuracies": 1.0, "rewards/chosen": 1.8200260400772095, "rewards/margins": 11.07886028289795, "rewards/rejected": -9.258834838867188, "step": 2830 }, { "epoch": 1.47, "learning_rate": 2.8408873589596484e-07, "logits/chosen": -2.780362367630005, "logits/rejected": -2.7789127826690674, "logps/chosen": -248.43612670898438, "logps/rejected": -268.502685546875, "loss": 0.0611, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.7744905948638916, "rewards/margins": 9.7242431640625, "rewards/rejected": -7.9497528076171875, "step": 2840 }, { "epoch": 1.47, "learning_rate": 2.8313253012048195e-07, "logits/chosen": -2.705899477005005, "logits/rejected": -2.772587537765503, "logps/chosen": -216.2031707763672, "logps/rejected": -305.090087890625, "loss": 0.1077, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 1.7985403537750244, "rewards/margins": 10.163469314575195, "rewards/rejected": -8.364928245544434, "step": 2850 }, { "epoch": 1.48, "learning_rate": 2.8217632434499907e-07, "logits/chosen": -2.7169694900512695, "logits/rejected": -2.6625046730041504, "logps/chosen": -286.58148193359375, "logps/rejected": -295.5797424316406, "loss": 0.0609, "rewards/accuracies": 1.0, "rewards/chosen": 0.99732506275177, "rewards/margins": 8.176431655883789, "rewards/rejected": -7.179105281829834, "step": 2860 }, { "epoch": 1.48, "learning_rate": 2.812201185695162e-07, "logits/chosen": -2.7098686695098877, "logits/rejected": -2.783830165863037, "logps/chosen": -197.52723693847656, "logps/rejected": -286.0131530761719, "loss": 0.079, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 1.9220987558364868, "rewards/margins": 8.922266006469727, "rewards/rejected": -7.000166416168213, "step": 2870 }, { "epoch": 1.49, "learning_rate": 2.802639127940333e-07, "logits/chosen": -2.8137047290802, "logits/rejected": -2.7263553142547607, "logps/chosen": -272.9234619140625, "logps/rejected": -223.7790985107422, "loss": 0.089, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 1.8738445043563843, "rewards/margins": 10.640775680541992, "rewards/rejected": -8.76693058013916, "step": 2880 }, { "epoch": 1.49, "learning_rate": 2.7930770701855036e-07, "logits/chosen": -2.8235201835632324, "logits/rejected": -2.8233487606048584, "logps/chosen": -281.5433044433594, "logps/rejected": -245.4292449951172, "loss": 0.1254, "rewards/accuracies": 1.0, "rewards/chosen": 2.013465642929077, "rewards/margins": 9.75710678100586, "rewards/rejected": -7.743639945983887, "step": 2890 }, { "epoch": 1.5, "learning_rate": 2.783515012430675e-07, "logits/chosen": -2.6952970027923584, "logits/rejected": -2.6778531074523926, "logps/chosen": -227.57217407226562, "logps/rejected": -233.82666015625, "loss": 0.1159, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.5577604174613953, "rewards/margins": 6.89922571182251, "rewards/rejected": -6.341464996337891, "step": 2900 }, { "epoch": 1.5, "learning_rate": 2.773952954675846e-07, "logits/chosen": -2.8005638122558594, "logits/rejected": -2.869779109954834, "logps/chosen": -288.06671142578125, "logps/rejected": -220.03268432617188, "loss": 0.0974, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.9301376342773438, "rewards/margins": 8.675138473510742, "rewards/rejected": -6.74500036239624, "step": 2910 }, { "epoch": 1.51, "learning_rate": 2.764390896921017e-07, "logits/chosen": -2.7519824504852295, "logits/rejected": -2.7377419471740723, "logps/chosen": -290.820068359375, "logps/rejected": -291.615966796875, "loss": 0.1115, "rewards/accuracies": 1.0, "rewards/chosen": 4.328765392303467, "rewards/margins": 12.472002029418945, "rewards/rejected": -8.143235206604004, "step": 2920 }, { "epoch": 1.51, "learning_rate": 2.754828839166188e-07, "logits/chosen": -2.7895121574401855, "logits/rejected": -2.7734270095825195, "logps/chosen": -232.82510375976562, "logps/rejected": -227.81820678710938, "loss": 0.1025, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.8885366916656494, "rewards/margins": 10.8139009475708, "rewards/rejected": -7.925364017486572, "step": 2930 }, { "epoch": 1.52, "learning_rate": 2.7452667814113594e-07, "logits/chosen": -2.8098020553588867, "logits/rejected": -2.9195146560668945, "logps/chosen": -235.73355102539062, "logps/rejected": -235.16580200195312, "loss": 0.0936, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.9349721074104309, "rewards/margins": 6.663336277008057, "rewards/rejected": -5.72836446762085, "step": 2940 }, { "epoch": 1.52, "learning_rate": 2.7357047236565306e-07, "logits/chosen": -2.683117389678955, "logits/rejected": -2.7429752349853516, "logps/chosen": -268.73150634765625, "logps/rejected": -295.7161560058594, "loss": 0.07, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.1788768768310547, "rewards/margins": 8.738747596740723, "rewards/rejected": -6.559870719909668, "step": 2950 }, { "epoch": 1.53, "learning_rate": 2.7261426659017017e-07, "logits/chosen": -2.747555732727051, "logits/rejected": -2.8072476387023926, "logps/chosen": -287.62353515625, "logps/rejected": -246.19345092773438, "loss": 0.0784, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 2.1814064979553223, "rewards/margins": 8.878744125366211, "rewards/rejected": -6.6973371505737305, "step": 2960 }, { "epoch": 1.53, "learning_rate": 2.716580608146873e-07, "logits/chosen": -2.7727088928222656, "logits/rejected": -2.7909743785858154, "logps/chosen": -330.4422607421875, "logps/rejected": -276.64410400390625, "loss": 0.0662, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 3.657055377960205, "rewards/margins": 11.456523895263672, "rewards/rejected": -7.799468994140625, "step": 2970 }, { "epoch": 1.54, "learning_rate": 2.7070185503920446e-07, "logits/chosen": -2.5788490772247314, "logits/rejected": -2.6384220123291016, "logps/chosen": -245.68936157226562, "logps/rejected": -298.78729248046875, "loss": 0.0635, "rewards/accuracies": 1.0, "rewards/chosen": 2.223114252090454, "rewards/margins": 10.831456184387207, "rewards/rejected": -8.608342170715332, "step": 2980 }, { "epoch": 1.54, "learning_rate": 2.6974564926372157e-07, "logits/chosen": -2.7926948070526123, "logits/rejected": -2.7871780395507812, "logps/chosen": -282.8335876464844, "logps/rejected": -262.1839904785156, "loss": 0.0729, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.1277525424957275, "rewards/margins": 8.252248764038086, "rewards/rejected": -7.124497413635254, "step": 2990 }, { "epoch": 1.55, "learning_rate": 2.687894434882387e-07, "logits/chosen": -2.743727684020996, "logits/rejected": -2.712907075881958, "logps/chosen": -246.3362274169922, "logps/rejected": -242.5818328857422, "loss": 0.0617, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.10309407860040665, "rewards/margins": 6.269896984100342, "rewards/rejected": -6.372990608215332, "step": 3000 }, { "epoch": 1.55, "eval_logits/chosen": -2.768277168273926, "eval_logits/rejected": -2.7743842601776123, "eval_logps/chosen": -304.76513671875, "eval_logps/rejected": -264.7488098144531, "eval_loss": 0.6421293020248413, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": -1.2551521062850952, "eval_rewards/margins": 2.5061919689178467, "eval_rewards/rejected": -3.7613439559936523, "eval_runtime": 217.9227, "eval_samples_per_second": 9.178, "eval_steps_per_second": 0.289, "step": 3000 }, { "epoch": 1.55, "learning_rate": 2.678332377127558e-07, "logits/chosen": -2.7960705757141113, "logits/rejected": -2.8275809288024902, "logps/chosen": -271.56878662109375, "logps/rejected": -254.76614379882812, "loss": 0.0561, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 2.7444472312927246, "rewards/margins": 10.54531478881836, "rewards/rejected": -7.800868034362793, "step": 3010 }, { "epoch": 1.56, "learning_rate": 2.668770319372729e-07, "logits/chosen": -2.6941845417022705, "logits/rejected": -2.7289962768554688, "logps/chosen": -231.16708374023438, "logps/rejected": -205.00234985351562, "loss": 0.0639, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 1.4271122217178345, "rewards/margins": 7.393969535827637, "rewards/rejected": -5.966856479644775, "step": 3020 }, { "epoch": 1.56, "learning_rate": 2.6592082616179004e-07, "logits/chosen": -2.8552334308624268, "logits/rejected": -2.7763783931732178, "logps/chosen": -204.55636596679688, "logps/rejected": -252.6420440673828, "loss": 0.087, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.9946534037590027, "rewards/margins": 8.126154899597168, "rewards/rejected": -7.1315016746521, "step": 3030 }, { "epoch": 1.57, "learning_rate": 2.649646203863071e-07, "logits/chosen": -2.7002310752868652, "logits/rejected": -2.6688289642333984, "logps/chosen": -275.127685546875, "logps/rejected": -310.6485290527344, "loss": 0.0692, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 2.3441951274871826, "rewards/margins": 9.95146656036377, "rewards/rejected": -7.607271671295166, "step": 3040 }, { "epoch": 1.57, "learning_rate": 2.640084146108242e-07, "logits/chosen": -2.8274757862091064, "logits/rejected": -2.7921900749206543, "logps/chosen": -322.8690185546875, "logps/rejected": -258.6299133300781, "loss": 0.0643, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.8817846775054932, "rewards/margins": 10.3388671875, "rewards/rejected": -8.45708179473877, "step": 3050 }, { "epoch": 1.58, "learning_rate": 2.6305220883534133e-07, "logits/chosen": -2.8685293197631836, "logits/rejected": -2.913038969039917, "logps/chosen": -263.8501281738281, "logps/rejected": -243.12026977539062, "loss": 0.0602, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.6231520175933838, "rewards/margins": 7.873045444488525, "rewards/rejected": -6.249893665313721, "step": 3060 }, { "epoch": 1.58, "learning_rate": 2.6209600305985845e-07, "logits/chosen": -2.801408052444458, "logits/rejected": -2.7723915576934814, "logps/chosen": -278.9658203125, "logps/rejected": -317.4752197265625, "loss": 0.0815, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.718047857284546, "rewards/margins": 14.852376937866211, "rewards/rejected": -12.134328842163086, "step": 3070 }, { "epoch": 1.59, "learning_rate": 2.6113979728437556e-07, "logits/chosen": -2.9154725074768066, "logits/rejected": -2.9344992637634277, "logps/chosen": -283.52862548828125, "logps/rejected": -295.277099609375, "loss": 0.0795, "rewards/accuracies": 1.0, "rewards/chosen": 2.83547043800354, "rewards/margins": 10.664634704589844, "rewards/rejected": -7.829165458679199, "step": 3080 }, { "epoch": 1.6, "learning_rate": 2.601835915088927e-07, "logits/chosen": -2.7709438800811768, "logits/rejected": -2.8170695304870605, "logps/chosen": -264.36090087890625, "logps/rejected": -258.5511169433594, "loss": 0.1273, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 2.3656535148620605, "rewards/margins": 9.661222457885742, "rewards/rejected": -7.29556941986084, "step": 3090 }, { "epoch": 1.6, "learning_rate": 2.592273857334098e-07, "logits/chosen": -2.757802963256836, "logits/rejected": -2.804326057434082, "logps/chosen": -237.77841186523438, "logps/rejected": -249.50381469726562, "loss": 0.1111, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 3.2772223949432373, "rewards/margins": 10.901510238647461, "rewards/rejected": -7.624286651611328, "step": 3100 }, { "epoch": 1.61, "learning_rate": 2.582711799579269e-07, "logits/chosen": -2.8544392585754395, "logits/rejected": -2.903831720352173, "logps/chosen": -302.3487854003906, "logps/rejected": -274.150390625, "loss": 0.0762, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.2709031105041504, "rewards/margins": 10.55983829498291, "rewards/rejected": -8.288934707641602, "step": 3110 }, { "epoch": 1.61, "learning_rate": 2.573149741824441e-07, "logits/chosen": -2.878246784210205, "logits/rejected": -2.8712260723114014, "logps/chosen": -292.5808410644531, "logps/rejected": -244.95993041992188, "loss": 0.0604, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.7662618160247803, "rewards/margins": 9.28374195098877, "rewards/rejected": -7.517480373382568, "step": 3120 }, { "epoch": 1.62, "learning_rate": 2.563587684069612e-07, "logits/chosen": -2.818538188934326, "logits/rejected": -2.8021979331970215, "logps/chosen": -297.29669189453125, "logps/rejected": -280.97515869140625, "loss": 0.0523, "rewards/accuracies": 1.0, "rewards/chosen": 1.8910287618637085, "rewards/margins": 9.954824447631836, "rewards/rejected": -8.063794136047363, "step": 3130 }, { "epoch": 1.62, "learning_rate": 2.554025626314783e-07, "logits/chosen": -2.8551039695739746, "logits/rejected": -2.8618719577789307, "logps/chosen": -267.74383544921875, "logps/rejected": -318.0247497558594, "loss": 0.0987, "rewards/accuracies": 1.0, "rewards/chosen": 1.0268582105636597, "rewards/margins": 9.777185440063477, "rewards/rejected": -8.750327110290527, "step": 3140 }, { "epoch": 1.63, "learning_rate": 2.544463568559954e-07, "logits/chosen": -2.856924057006836, "logits/rejected": -2.859058380126953, "logps/chosen": -299.3072204589844, "logps/rejected": -320.89056396484375, "loss": 0.0578, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.0914320945739746, "rewards/margins": 11.473106384277344, "rewards/rejected": -9.381673812866211, "step": 3150 }, { "epoch": 1.63, "learning_rate": 2.5349015108051254e-07, "logits/chosen": -2.784090518951416, "logits/rejected": -2.8050949573516846, "logps/chosen": -272.7767333984375, "logps/rejected": -272.4282531738281, "loss": 0.067, "rewards/accuracies": 1.0, "rewards/chosen": 1.9306879043579102, "rewards/margins": 10.905633926391602, "rewards/rejected": -8.974946975708008, "step": 3160 }, { "epoch": 1.64, "learning_rate": 2.5253394530502966e-07, "logits/chosen": -2.6712114810943604, "logits/rejected": -2.8075413703918457, "logps/chosen": -356.3838195800781, "logps/rejected": -310.9335021972656, "loss": 0.0616, "rewards/accuracies": 1.0, "rewards/chosen": 2.7382097244262695, "rewards/margins": 10.840738296508789, "rewards/rejected": -8.102529525756836, "step": 3170 }, { "epoch": 1.64, "learning_rate": 2.5157773952954677e-07, "logits/chosen": -2.894787311553955, "logits/rejected": -2.7637925148010254, "logps/chosen": -287.5474853515625, "logps/rejected": -285.1513977050781, "loss": 0.0856, "rewards/accuracies": 1.0, "rewards/chosen": 1.372936487197876, "rewards/margins": 11.11128044128418, "rewards/rejected": -9.738344192504883, "step": 3180 }, { "epoch": 1.65, "learning_rate": 2.506215337540639e-07, "logits/chosen": -2.830756425857544, "logits/rejected": -2.7069759368896484, "logps/chosen": -289.5176086425781, "logps/rejected": -309.98321533203125, "loss": 0.0699, "rewards/accuracies": 1.0, "rewards/chosen": 1.2456578016281128, "rewards/margins": 9.056872367858887, "rewards/rejected": -7.811214447021484, "step": 3190 }, { "epoch": 1.65, "learning_rate": 2.4966532797858095e-07, "logits/chosen": -2.6839346885681152, "logits/rejected": -2.648435592651367, "logps/chosen": -281.0553283691406, "logps/rejected": -281.7121276855469, "loss": 0.0623, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.267576217651367, "rewards/margins": 12.378862380981445, "rewards/rejected": -10.111286163330078, "step": 3200 }, { "epoch": 1.66, "learning_rate": 2.4870912220309807e-07, "logits/chosen": -2.7548084259033203, "logits/rejected": -2.929920196533203, "logps/chosen": -265.11920166015625, "logps/rejected": -276.0952453613281, "loss": 0.085, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.494207739830017, "rewards/margins": 7.784449577331543, "rewards/rejected": -6.290242671966553, "step": 3210 }, { "epoch": 1.66, "learning_rate": 2.477529164276152e-07, "logits/chosen": -2.8512723445892334, "logits/rejected": -2.8269026279449463, "logps/chosen": -289.71673583984375, "logps/rejected": -290.6452331542969, "loss": 0.1119, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.928057074546814, "rewards/margins": 9.863795280456543, "rewards/rejected": -7.935737609863281, "step": 3220 }, { "epoch": 1.67, "learning_rate": 2.4679671065213235e-07, "logits/chosen": -2.726294994354248, "logits/rejected": -2.779247999191284, "logps/chosen": -300.8678894042969, "logps/rejected": -243.9254608154297, "loss": 0.0955, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.6631157398223877, "rewards/margins": 8.425924301147461, "rewards/rejected": -6.762808322906494, "step": 3230 }, { "epoch": 1.67, "learning_rate": 2.4584050487664947e-07, "logits/chosen": -2.7843034267425537, "logits/rejected": -2.866281509399414, "logps/chosen": -301.68731689453125, "logps/rejected": -259.0189514160156, "loss": 0.051, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.0484724044799805, "rewards/margins": 12.256793022155762, "rewards/rejected": -8.208322525024414, "step": 3240 }, { "epoch": 1.68, "learning_rate": 2.448842991011666e-07, "logits/chosen": -2.700822591781616, "logits/rejected": -2.7363715171813965, "logps/chosen": -268.10528564453125, "logps/rejected": -271.51776123046875, "loss": 0.1079, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.7115895748138428, "rewards/margins": 9.137028694152832, "rewards/rejected": -7.42543888092041, "step": 3250 }, { "epoch": 1.68, "learning_rate": 2.439280933256837e-07, "logits/chosen": -2.688324451446533, "logits/rejected": -2.728954792022705, "logps/chosen": -246.6923065185547, "logps/rejected": -309.48809814453125, "loss": 0.0476, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.557828664779663, "rewards/margins": 11.568731307983398, "rewards/rejected": -9.010904312133789, "step": 3260 }, { "epoch": 1.69, "learning_rate": 2.429718875502008e-07, "logits/chosen": -2.471862316131592, "logits/rejected": -2.5184130668640137, "logps/chosen": -242.50222778320312, "logps/rejected": -264.163818359375, "loss": 0.0642, "rewards/accuracies": 1.0, "rewards/chosen": 1.2356094121932983, "rewards/margins": 8.016067504882812, "rewards/rejected": -6.780457973480225, "step": 3270 }, { "epoch": 1.69, "learning_rate": 2.420156817747179e-07, "logits/chosen": -2.7138893604278564, "logits/rejected": -2.7802371978759766, "logps/chosen": -322.5417785644531, "logps/rejected": -268.6466064453125, "loss": 0.0542, "rewards/accuracies": 1.0, "rewards/chosen": 2.545907497406006, "rewards/margins": 9.485400199890137, "rewards/rejected": -6.939493656158447, "step": 3280 }, { "epoch": 1.7, "learning_rate": 2.41059475999235e-07, "logits/chosen": -2.6953110694885254, "logits/rejected": -2.6134727001190186, "logps/chosen": -237.255126953125, "logps/rejected": -310.57427978515625, "loss": 0.0726, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 1.1554365158081055, "rewards/margins": 11.051652908325195, "rewards/rejected": -9.896215438842773, "step": 3290 }, { "epoch": 1.7, "learning_rate": 2.4010327022375216e-07, "logits/chosen": -2.745811939239502, "logits/rejected": -2.7188100814819336, "logps/chosen": -318.18414306640625, "logps/rejected": -325.09307861328125, "loss": 0.0684, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.7801249027252197, "rewards/margins": 11.05907917022705, "rewards/rejected": -9.278955459594727, "step": 3300 }, { "epoch": 1.71, "learning_rate": 2.391470644482693e-07, "logits/chosen": -2.652722120285034, "logits/rejected": -2.693530321121216, "logps/chosen": -273.9671936035156, "logps/rejected": -266.43548583984375, "loss": 0.0536, "rewards/accuracies": 1.0, "rewards/chosen": 1.9491304159164429, "rewards/margins": 9.380148887634277, "rewards/rejected": -7.431018829345703, "step": 3310 }, { "epoch": 1.71, "learning_rate": 2.3819085867278636e-07, "logits/chosen": -2.6838040351867676, "logits/rejected": -2.68790864944458, "logps/chosen": -178.25857543945312, "logps/rejected": -211.74545288085938, "loss": 0.0883, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.007467269897461, "rewards/margins": 9.49828052520752, "rewards/rejected": -8.490812301635742, "step": 3320 }, { "epoch": 1.72, "learning_rate": 2.3723465289730348e-07, "logits/chosen": -2.776559352874756, "logits/rejected": -2.745678424835205, "logps/chosen": -306.03582763671875, "logps/rejected": -242.4042510986328, "loss": 0.1094, "rewards/accuracies": 1.0, "rewards/chosen": 0.9364816546440125, "rewards/margins": 7.912406921386719, "rewards/rejected": -6.975924491882324, "step": 3330 }, { "epoch": 1.72, "learning_rate": 2.362784471218206e-07, "logits/chosen": -2.720956325531006, "logits/rejected": -2.714653491973877, "logps/chosen": -295.54034423828125, "logps/rejected": -292.66986083984375, "loss": 0.0578, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.876239061355591, "rewards/margins": 11.462187767028809, "rewards/rejected": -8.58594799041748, "step": 3340 }, { "epoch": 1.73, "learning_rate": 2.353222413463377e-07, "logits/chosen": -2.737004518508911, "logits/rejected": -2.8274214267730713, "logps/chosen": -223.5535125732422, "logps/rejected": -261.5608215332031, "loss": 0.0541, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.330844521522522, "rewards/margins": 9.457793235778809, "rewards/rejected": -8.126947402954102, "step": 3350 }, { "epoch": 1.73, "learning_rate": 2.3436603557085483e-07, "logits/chosen": -2.814901351928711, "logits/rejected": -2.721123218536377, "logps/chosen": -261.89306640625, "logps/rejected": -271.2594909667969, "loss": 0.0962, "rewards/accuracies": 0.875, "rewards/chosen": 0.48820918798446655, "rewards/margins": 7.547884941101074, "rewards/rejected": -7.059675693511963, "step": 3360 }, { "epoch": 1.74, "learning_rate": 2.3340982979537197e-07, "logits/chosen": -2.756761074066162, "logits/rejected": -2.788041830062866, "logps/chosen": -338.298583984375, "logps/rejected": -325.39788818359375, "loss": 0.0672, "rewards/accuracies": 1.0, "rewards/chosen": 2.7895655632019043, "rewards/margins": 11.199186325073242, "rewards/rejected": -8.40962028503418, "step": 3370 }, { "epoch": 1.74, "learning_rate": 2.3245362401988909e-07, "logits/chosen": -2.832690715789795, "logits/rejected": -2.772152900695801, "logps/chosen": -295.143310546875, "logps/rejected": -312.2070617675781, "loss": 0.0745, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.858865261077881, "rewards/margins": 12.961053848266602, "rewards/rejected": -10.102187156677246, "step": 3380 }, { "epoch": 1.75, "learning_rate": 2.314974182444062e-07, "logits/chosen": -2.787862777709961, "logits/rejected": -2.790987730026245, "logps/chosen": -249.3990020751953, "logps/rejected": -269.93011474609375, "loss": 0.0478, "rewards/accuracies": 1.0, "rewards/chosen": 0.25441741943359375, "rewards/margins": 9.50505256652832, "rewards/rejected": -9.250636100769043, "step": 3390 }, { "epoch": 1.76, "learning_rate": 2.305412124689233e-07, "logits/chosen": -2.619658946990967, "logits/rejected": -2.61407470703125, "logps/chosen": -305.6173095703125, "logps/rejected": -324.3746032714844, "loss": 0.0704, "rewards/accuracies": 1.0, "rewards/chosen": 3.354332685470581, "rewards/margins": 13.424585342407227, "rewards/rejected": -10.070249557495117, "step": 3400 }, { "epoch": 1.76, "learning_rate": 2.295850066934404e-07, "logits/chosen": -2.713583469390869, "logits/rejected": -2.6319689750671387, "logps/chosen": -251.8077392578125, "logps/rejected": -264.23101806640625, "loss": 0.0685, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.20004582405090332, "rewards/margins": 8.175451278686523, "rewards/rejected": -8.375497817993164, "step": 3410 }, { "epoch": 1.77, "learning_rate": 2.2862880091795752e-07, "logits/chosen": -2.6867470741271973, "logits/rejected": -2.723292350769043, "logps/chosen": -252.469482421875, "logps/rejected": -334.07525634765625, "loss": 0.0726, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.7798032760620117, "rewards/margins": 11.993390083312988, "rewards/rejected": -10.213586807250977, "step": 3420 }, { "epoch": 1.77, "learning_rate": 2.2767259514247464e-07, "logits/chosen": -2.610609292984009, "logits/rejected": -2.564879894256592, "logps/chosen": -269.3361511230469, "logps/rejected": -295.60821533203125, "loss": 0.0734, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.9466493129730225, "rewards/margins": 10.422674179077148, "rewards/rejected": -8.476022720336914, "step": 3430 }, { "epoch": 1.78, "learning_rate": 2.2671638936699178e-07, "logits/chosen": -2.499741315841675, "logits/rejected": -2.4898147583007812, "logps/chosen": -280.90838623046875, "logps/rejected": -286.2072448730469, "loss": 0.0726, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 2.3896098136901855, "rewards/margins": 10.770309448242188, "rewards/rejected": -8.380699157714844, "step": 3440 }, { "epoch": 1.78, "learning_rate": 2.257601835915089e-07, "logits/chosen": -2.6703405380249023, "logits/rejected": -2.68867826461792, "logps/chosen": -284.33984375, "logps/rejected": -301.51214599609375, "loss": 0.0734, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.9578365087509155, "rewards/margins": 10.313423156738281, "rewards/rejected": -8.35558795928955, "step": 3450 }, { "epoch": 1.79, "learning_rate": 2.24803977816026e-07, "logits/chosen": -2.575701951980591, "logits/rejected": -2.7522482872009277, "logps/chosen": -222.56655883789062, "logps/rejected": -213.4800262451172, "loss": 0.1307, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 1.8081939220428467, "rewards/margins": 10.595215797424316, "rewards/rejected": -8.787023544311523, "step": 3460 }, { "epoch": 1.79, "learning_rate": 2.2384777204054313e-07, "logits/chosen": -2.7906508445739746, "logits/rejected": -2.7896149158477783, "logps/chosen": -276.54205322265625, "logps/rejected": -303.80804443359375, "loss": 0.1239, "rewards/accuracies": 1.0, "rewards/chosen": 4.196168422698975, "rewards/margins": 13.602836608886719, "rewards/rejected": -9.406667709350586, "step": 3470 }, { "epoch": 1.8, "learning_rate": 2.2289156626506022e-07, "logits/chosen": -2.6736395359039307, "logits/rejected": -2.5446834564208984, "logps/chosen": -238.1494598388672, "logps/rejected": -265.4019470214844, "loss": 0.0708, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.9061222076416016, "rewards/margins": 9.534143447875977, "rewards/rejected": -7.6280198097229, "step": 3480 }, { "epoch": 1.8, "learning_rate": 2.2193536048957733e-07, "logits/chosen": -2.764954090118408, "logits/rejected": -2.8380179405212402, "logps/chosen": -241.90158081054688, "logps/rejected": -259.67919921875, "loss": 0.0755, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.5641316175460815, "rewards/margins": 8.558606147766113, "rewards/rejected": -6.994473934173584, "step": 3490 }, { "epoch": 1.81, "learning_rate": 2.2097915471409445e-07, "logits/chosen": -2.772516965866089, "logits/rejected": -2.728278398513794, "logps/chosen": -304.7802734375, "logps/rejected": -286.6618347167969, "loss": 0.0765, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.4173293113708496, "rewards/margins": 10.533430099487305, "rewards/rejected": -8.116101264953613, "step": 3500 }, { "epoch": 1.81, "eval_logits/chosen": -2.738905668258667, "eval_logits/rejected": -2.740257501602173, "eval_logps/chosen": -304.2354431152344, "eval_logps/rejected": -266.13909912109375, "eval_loss": 0.6581782102584839, "eval_rewards/accuracies": 0.7658730149269104, "eval_rewards/chosen": -1.149203896522522, "eval_rewards/margins": 2.890200614929199, "eval_rewards/rejected": -4.03940486907959, "eval_runtime": 217.4993, "eval_samples_per_second": 9.195, "eval_steps_per_second": 0.29, "step": 3500 }, { "epoch": 1.81, "learning_rate": 2.200229489386116e-07, "logits/chosen": -2.7111144065856934, "logits/rejected": -2.670027017593384, "logps/chosen": -250.5125732421875, "logps/rejected": -249.6392364501953, "loss": 0.0825, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.3286582231521606, "rewards/margins": 8.792272567749023, "rewards/rejected": -7.463613986968994, "step": 3510 }, { "epoch": 1.82, "learning_rate": 2.190667431631287e-07, "logits/chosen": -2.7528154850006104, "logits/rejected": -2.7122788429260254, "logps/chosen": -278.01910400390625, "logps/rejected": -313.9847106933594, "loss": 0.0943, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 2.2548537254333496, "rewards/margins": 11.679734230041504, "rewards/rejected": -9.424880027770996, "step": 3520 }, { "epoch": 1.82, "learning_rate": 2.1811053738764582e-07, "logits/chosen": -2.5960257053375244, "logits/rejected": -2.6290123462677, "logps/chosen": -204.6955108642578, "logps/rejected": -282.3271179199219, "loss": 0.0602, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.0323585271835327, "rewards/margins": 8.045377731323242, "rewards/rejected": -7.013019561767578, "step": 3530 }, { "epoch": 1.83, "learning_rate": 2.1715433161216294e-07, "logits/chosen": -2.7657580375671387, "logits/rejected": -2.744833469390869, "logps/chosen": -288.4944152832031, "logps/rejected": -280.72955322265625, "loss": 0.055, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.4996562004089355, "rewards/margins": 10.434306144714355, "rewards/rejected": -7.9346513748168945, "step": 3540 }, { "epoch": 1.83, "learning_rate": 2.1619812583668005e-07, "logits/chosen": -2.682262897491455, "logits/rejected": -2.7337307929992676, "logps/chosen": -257.96453857421875, "logps/rejected": -262.64801025390625, "loss": 0.0884, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.6263707876205444, "rewards/margins": 8.416780471801758, "rewards/rejected": -7.790409088134766, "step": 3550 }, { "epoch": 1.84, "learning_rate": 2.1524192006119714e-07, "logits/chosen": -2.66699481010437, "logits/rejected": -2.7092580795288086, "logps/chosen": -260.70819091796875, "logps/rejected": -278.57171630859375, "loss": 0.067, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.16346995532512665, "rewards/margins": 7.7417144775390625, "rewards/rejected": -7.578243255615234, "step": 3560 }, { "epoch": 1.84, "learning_rate": 2.1428571428571426e-07, "logits/chosen": -2.829007863998413, "logits/rejected": -2.7939159870147705, "logps/chosen": -281.33416748046875, "logps/rejected": -275.6802978515625, "loss": 0.0438, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.2156023979187012, "rewards/margins": 9.397723197937012, "rewards/rejected": -8.182120323181152, "step": 3570 }, { "epoch": 1.85, "learning_rate": 2.133295085102314e-07, "logits/chosen": -2.5332906246185303, "logits/rejected": -2.5238547325134277, "logps/chosen": -265.1497497558594, "logps/rejected": -257.6131591796875, "loss": 0.0395, "rewards/accuracies": 1.0, "rewards/chosen": 1.2848436832427979, "rewards/margins": 10.311678886413574, "rewards/rejected": -9.026835441589355, "step": 3580 }, { "epoch": 1.85, "learning_rate": 2.1237330273474851e-07, "logits/chosen": -2.7879021167755127, "logits/rejected": -2.816457509994507, "logps/chosen": -330.1148986816406, "logps/rejected": -284.9937438964844, "loss": 0.0694, "rewards/accuracies": 1.0, "rewards/chosen": 1.7244058847427368, "rewards/margins": 9.594953536987305, "rewards/rejected": -7.870547294616699, "step": 3590 }, { "epoch": 1.86, "learning_rate": 2.1141709695926563e-07, "logits/chosen": -2.7168538570404053, "logits/rejected": -2.7340972423553467, "logps/chosen": -250.33151245117188, "logps/rejected": -226.6660614013672, "loss": 0.0641, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 1.10770583152771, "rewards/margins": 7.989068508148193, "rewards/rejected": -6.881363868713379, "step": 3600 }, { "epoch": 1.86, "learning_rate": 2.1046089118378275e-07, "logits/chosen": -2.746110439300537, "logits/rejected": -2.7883448600769043, "logps/chosen": -253.4336395263672, "logps/rejected": -300.6252136230469, "loss": 0.0681, "rewards/accuracies": 1.0, "rewards/chosen": 1.5055543184280396, "rewards/margins": 11.491781234741211, "rewards/rejected": -9.986227035522461, "step": 3610 }, { "epoch": 1.87, "learning_rate": 2.0950468540829986e-07, "logits/chosen": -2.7674827575683594, "logits/rejected": -2.760319471359253, "logps/chosen": -262.53131103515625, "logps/rejected": -249.01333618164062, "loss": 0.0827, "rewards/accuracies": 1.0, "rewards/chosen": 0.8487039804458618, "rewards/margins": 9.199454307556152, "rewards/rejected": -8.350749969482422, "step": 3620 }, { "epoch": 1.87, "learning_rate": 2.0854847963281698e-07, "logits/chosen": -2.604976177215576, "logits/rejected": -2.541404962539673, "logps/chosen": -246.70480346679688, "logps/rejected": -250.37417602539062, "loss": 0.0496, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.391180396080017, "rewards/margins": 10.575980186462402, "rewards/rejected": -9.184799194335938, "step": 3630 }, { "epoch": 1.88, "learning_rate": 2.0759227385733407e-07, "logits/chosen": -2.5545432567596436, "logits/rejected": -2.6954281330108643, "logps/chosen": -311.905029296875, "logps/rejected": -267.7748107910156, "loss": 0.0806, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.2542879581451416, "rewards/margins": 9.12138557434082, "rewards/rejected": -7.8670973777771, "step": 3640 }, { "epoch": 1.88, "learning_rate": 2.066360680818512e-07, "logits/chosen": -2.7910752296447754, "logits/rejected": -2.8306946754455566, "logps/chosen": -359.8431091308594, "logps/rejected": -279.3539733886719, "loss": 0.0671, "rewards/accuracies": 1.0, "rewards/chosen": 2.022540330886841, "rewards/margins": 10.497331619262695, "rewards/rejected": -8.47479248046875, "step": 3650 }, { "epoch": 1.89, "learning_rate": 2.0567986230636832e-07, "logits/chosen": -2.7704267501831055, "logits/rejected": -2.77883243560791, "logps/chosen": -235.0912628173828, "logps/rejected": -271.0329895019531, "loss": 0.1054, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.019496440887451, "rewards/margins": 10.298086166381836, "rewards/rejected": -8.278589248657227, "step": 3660 }, { "epoch": 1.89, "learning_rate": 2.0472365653088544e-07, "logits/chosen": -2.720661163330078, "logits/rejected": -2.725130558013916, "logps/chosen": -275.77752685546875, "logps/rejected": -312.16265869140625, "loss": 0.0516, "rewards/accuracies": 1.0, "rewards/chosen": 0.19810207188129425, "rewards/margins": 8.972552299499512, "rewards/rejected": -8.774450302124023, "step": 3670 }, { "epoch": 1.9, "learning_rate": 2.0376745075540256e-07, "logits/chosen": -2.833644390106201, "logits/rejected": -2.8849120140075684, "logps/chosen": -320.95904541015625, "logps/rejected": -268.92987060546875, "loss": 0.0512, "rewards/accuracies": 1.0, "rewards/chosen": 1.2254230976104736, "rewards/margins": 9.793492317199707, "rewards/rejected": -8.568068504333496, "step": 3680 }, { "epoch": 1.91, "learning_rate": 2.0281124497991967e-07, "logits/chosen": -2.8426222801208496, "logits/rejected": -2.813960313796997, "logps/chosen": -240.68954467773438, "logps/rejected": -312.35589599609375, "loss": 0.0968, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.614633560180664, "rewards/margins": 11.83614444732666, "rewards/rejected": -9.22150993347168, "step": 3690 }, { "epoch": 1.91, "learning_rate": 2.018550392044368e-07, "logits/chosen": -2.8380324840545654, "logits/rejected": -2.798767566680908, "logps/chosen": -294.00396728515625, "logps/rejected": -365.79522705078125, "loss": 0.0533, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.709501266479492, "rewards/margins": 12.373403549194336, "rewards/rejected": -9.663900375366211, "step": 3700 }, { "epoch": 1.92, "learning_rate": 2.0089883342895388e-07, "logits/chosen": -2.806723117828369, "logits/rejected": -2.939481258392334, "logps/chosen": -275.45538330078125, "logps/rejected": -234.10690307617188, "loss": 0.0738, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.2928574085235596, "rewards/margins": 9.613503456115723, "rewards/rejected": -8.320646286010742, "step": 3710 }, { "epoch": 1.92, "learning_rate": 1.9994262765347102e-07, "logits/chosen": -2.764084815979004, "logits/rejected": -2.790250301361084, "logps/chosen": -277.35833740234375, "logps/rejected": -220.1525421142578, "loss": 0.0817, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.888009250164032, "rewards/margins": 8.029479026794434, "rewards/rejected": -7.141470432281494, "step": 3720 }, { "epoch": 1.93, "learning_rate": 1.9898642187798813e-07, "logits/chosen": -2.640573501586914, "logits/rejected": -2.656597137451172, "logps/chosen": -292.8527526855469, "logps/rejected": -301.4468688964844, "loss": 0.0494, "rewards/accuracies": 1.0, "rewards/chosen": 2.1724305152893066, "rewards/margins": 11.529372215270996, "rewards/rejected": -9.356943130493164, "step": 3730 }, { "epoch": 1.93, "learning_rate": 1.9803021610250525e-07, "logits/chosen": -2.7659599781036377, "logits/rejected": -2.8064372539520264, "logps/chosen": -246.72659301757812, "logps/rejected": -294.17572021484375, "loss": 0.0785, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.036099672317505, "rewards/margins": 11.232912063598633, "rewards/rejected": -9.196812629699707, "step": 3740 }, { "epoch": 1.94, "learning_rate": 1.9707401032702237e-07, "logits/chosen": -2.65791916847229, "logits/rejected": -2.7072339057922363, "logps/chosen": -232.99755859375, "logps/rejected": -272.58465576171875, "loss": 0.0484, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.425924301147461, "rewards/margins": 10.325573921203613, "rewards/rejected": -8.899649620056152, "step": 3750 }, { "epoch": 1.94, "learning_rate": 1.9611780455153948e-07, "logits/chosen": -2.799206256866455, "logits/rejected": -2.832388401031494, "logps/chosen": -259.90631103515625, "logps/rejected": -268.10015869140625, "loss": 0.0846, "rewards/accuracies": 1.0, "rewards/chosen": 2.051642894744873, "rewards/margins": 10.926515579223633, "rewards/rejected": -8.874872207641602, "step": 3760 }, { "epoch": 1.95, "learning_rate": 1.951615987760566e-07, "logits/chosen": -2.5179781913757324, "logits/rejected": -2.6008613109588623, "logps/chosen": -233.42056274414062, "logps/rejected": -251.9779815673828, "loss": 0.063, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.4882890582084656, "rewards/margins": 8.425145149230957, "rewards/rejected": -7.936856269836426, "step": 3770 }, { "epoch": 1.95, "learning_rate": 1.942053930005737e-07, "logits/chosen": -2.9028313159942627, "logits/rejected": -2.8197319507598877, "logps/chosen": -275.57598876953125, "logps/rejected": -298.7084045410156, "loss": 0.0418, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.8865839242935181, "rewards/margins": 10.174108505249023, "rewards/rejected": -9.287524223327637, "step": 3780 }, { "epoch": 1.96, "learning_rate": 1.9324918722509086e-07, "logits/chosen": -2.688558578491211, "logits/rejected": -2.711604356765747, "logps/chosen": -250.48641967773438, "logps/rejected": -235.5607147216797, "loss": 0.0717, "rewards/accuracies": 1.0, "rewards/chosen": 0.9537031054496765, "rewards/margins": 8.950319290161133, "rewards/rejected": -7.996614933013916, "step": 3790 }, { "epoch": 1.96, "learning_rate": 1.9229298144960794e-07, "logits/chosen": -2.6265201568603516, "logits/rejected": -2.643637180328369, "logps/chosen": -274.25970458984375, "logps/rejected": -283.9968566894531, "loss": 0.0796, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 1.4979665279388428, "rewards/margins": 10.394436836242676, "rewards/rejected": -8.896470069885254, "step": 3800 }, { "epoch": 1.97, "learning_rate": 1.9133677567412506e-07, "logits/chosen": -2.630946159362793, "logits/rejected": -2.723823070526123, "logps/chosen": -282.9399108886719, "logps/rejected": -233.25167846679688, "loss": 0.1037, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.8352066278457642, "rewards/margins": 8.478796005249023, "rewards/rejected": -6.643589973449707, "step": 3810 }, { "epoch": 1.97, "learning_rate": 1.9038056989864218e-07, "logits/chosen": -2.7855098247528076, "logits/rejected": -2.7989068031311035, "logps/chosen": -274.2118835449219, "logps/rejected": -282.32708740234375, "loss": 0.0916, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 2.546886920928955, "rewards/margins": 10.067007064819336, "rewards/rejected": -7.520121097564697, "step": 3820 }, { "epoch": 1.98, "learning_rate": 1.894243641231593e-07, "logits/chosen": -2.4910149574279785, "logits/rejected": -2.501765727996826, "logps/chosen": -235.5742950439453, "logps/rejected": -227.42919921875, "loss": 0.0827, "rewards/accuracies": 1.0, "rewards/chosen": 0.2598899006843567, "rewards/margins": 7.640264987945557, "rewards/rejected": -7.380374908447266, "step": 3830 }, { "epoch": 1.98, "learning_rate": 1.884681583476764e-07, "logits/chosen": -2.649984836578369, "logits/rejected": -2.794214963912964, "logps/chosen": -231.7446746826172, "logps/rejected": -270.55938720703125, "loss": 0.0427, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.4616106152534485, "rewards/margins": 7.806038856506348, "rewards/rejected": -7.344427585601807, "step": 3840 }, { "epoch": 1.99, "learning_rate": 1.8751195257219352e-07, "logits/chosen": -2.6904454231262207, "logits/rejected": -2.7609245777130127, "logps/chosen": -256.99359130859375, "logps/rejected": -285.15380859375, "loss": 0.0941, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.14900550246238708, "rewards/margins": 7.018132209777832, "rewards/rejected": -6.8691277503967285, "step": 3850 }, { "epoch": 1.99, "learning_rate": 1.8655574679671067e-07, "logits/chosen": -2.773998737335205, "logits/rejected": -2.675830125808716, "logps/chosen": -249.72525024414062, "logps/rejected": -265.43524169921875, "loss": 0.0859, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.5194703340530396, "rewards/margins": 10.313286781311035, "rewards/rejected": -8.793817520141602, "step": 3860 }, { "epoch": 2.0, "learning_rate": 1.8559954102122778e-07, "logits/chosen": -2.656508684158325, "logits/rejected": -2.6907639503479004, "logps/chosen": -237.22695922851562, "logps/rejected": -256.4031982421875, "loss": 0.0951, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 1.107354760169983, "rewards/margins": 7.762026786804199, "rewards/rejected": -6.654671669006348, "step": 3870 }, { "epoch": 2.0, "learning_rate": 1.8464333524574487e-07, "logits/chosen": -2.6225860118865967, "logits/rejected": -2.629585027694702, "logps/chosen": -197.79290771484375, "logps/rejected": -238.13485717773438, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": 1.3901655673980713, "rewards/margins": 8.894407272338867, "rewards/rejected": -7.504241943359375, "step": 3880 }, { "epoch": 2.01, "learning_rate": 1.8368712947026199e-07, "logits/chosen": -2.7568469047546387, "logits/rejected": -2.792118549346924, "logps/chosen": -279.4228820800781, "logps/rejected": -296.11126708984375, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": 1.8633215427398682, "rewards/margins": 9.97962760925293, "rewards/rejected": -8.11630630493164, "step": 3890 }, { "epoch": 2.01, "learning_rate": 1.827309236947791e-07, "logits/chosen": -2.508894681930542, "logits/rejected": -2.627253293991089, "logps/chosen": -167.35305786132812, "logps/rejected": -272.1786193847656, "loss": 0.0206, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.3741319179534912, "rewards/margins": 11.488214492797852, "rewards/rejected": -10.114082336425781, "step": 3900 }, { "epoch": 2.02, "learning_rate": 1.8177471791929622e-07, "logits/chosen": -2.562891960144043, "logits/rejected": -2.627436876296997, "logps/chosen": -219.225830078125, "logps/rejected": -294.67742919921875, "loss": 0.0332, "rewards/accuracies": 1.0, "rewards/chosen": 1.6091563701629639, "rewards/margins": 11.577367782592773, "rewards/rejected": -9.968213081359863, "step": 3910 }, { "epoch": 2.02, "learning_rate": 1.8081851214381333e-07, "logits/chosen": -2.6254868507385254, "logits/rejected": -2.662242889404297, "logps/chosen": -272.97344970703125, "logps/rejected": -386.03802490234375, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": 1.5487847328186035, "rewards/margins": 13.016972541809082, "rewards/rejected": -11.468185424804688, "step": 3920 }, { "epoch": 2.03, "learning_rate": 1.7986230636833047e-07, "logits/chosen": -2.7052996158599854, "logits/rejected": -2.6561102867126465, "logps/chosen": -184.27590942382812, "logps/rejected": -234.31112670898438, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": 0.5409653782844543, "rewards/margins": 8.709564208984375, "rewards/rejected": -8.168600082397461, "step": 3930 }, { "epoch": 2.03, "learning_rate": 1.789061005928476e-07, "logits/chosen": -2.7472193241119385, "logits/rejected": -2.7598049640655518, "logps/chosen": -289.13043212890625, "logps/rejected": -259.87261962890625, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": 0.779617428779602, "rewards/margins": 9.105241775512695, "rewards/rejected": -8.325624465942383, "step": 3940 }, { "epoch": 2.04, "learning_rate": 1.7794989481736468e-07, "logits/chosen": -2.646831512451172, "logits/rejected": -2.6080241203308105, "logps/chosen": -234.96249389648438, "logps/rejected": -258.51947021484375, "loss": 0.0222, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.3543825149536133, "rewards/margins": 10.9358549118042, "rewards/rejected": -9.581473350524902, "step": 3950 }, { "epoch": 2.04, "learning_rate": 1.769936890418818e-07, "logits/chosen": -2.6132800579071045, "logits/rejected": -2.6494579315185547, "logps/chosen": -269.6546325683594, "logps/rejected": -271.829833984375, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": 2.320793390274048, "rewards/margins": 10.98828411102295, "rewards/rejected": -8.667490005493164, "step": 3960 }, { "epoch": 2.05, "learning_rate": 1.760374832663989e-07, "logits/chosen": -2.6040878295898438, "logits/rejected": -2.5603089332580566, "logps/chosen": -298.0629577636719, "logps/rejected": -297.24237060546875, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": 3.069619655609131, "rewards/margins": 13.027959823608398, "rewards/rejected": -9.958338737487793, "step": 3970 }, { "epoch": 2.05, "learning_rate": 1.7508127749091603e-07, "logits/chosen": -2.7057785987854004, "logits/rejected": -2.7334468364715576, "logps/chosen": -260.33624267578125, "logps/rejected": -262.96954345703125, "loss": 0.0121, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.2088730335235596, "rewards/margins": 10.28221607208252, "rewards/rejected": -9.073343276977539, "step": 3980 }, { "epoch": 2.06, "learning_rate": 1.7412507171543314e-07, "logits/chosen": -2.7243123054504395, "logits/rejected": -2.814331293106079, "logps/chosen": -309.1561584472656, "logps/rejected": -260.5794677734375, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": 1.2261295318603516, "rewards/margins": 11.2640962600708, "rewards/rejected": -10.03796672821045, "step": 3990 }, { "epoch": 2.07, "learning_rate": 1.7316886593995028e-07, "logits/chosen": -2.729241371154785, "logits/rejected": -2.680410861968994, "logps/chosen": -240.87356567382812, "logps/rejected": -250.3045654296875, "loss": 0.0178, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.6860274076461792, "rewards/margins": 9.379659652709961, "rewards/rejected": -8.693634033203125, "step": 4000 }, { "epoch": 2.07, "eval_logits/chosen": -2.7272632122039795, "eval_logits/rejected": -2.731018304824829, "eval_logps/chosen": -307.7316589355469, "eval_logps/rejected": -272.21661376953125, "eval_loss": 0.6797224283218384, "eval_rewards/accuracies": 0.761904776096344, "eval_rewards/chosen": -1.8484528064727783, "eval_rewards/margins": 3.40644907951355, "eval_rewards/rejected": -5.254901885986328, "eval_runtime": 218.0384, "eval_samples_per_second": 9.173, "eval_steps_per_second": 0.289, "step": 4000 }, { "epoch": 2.07, "learning_rate": 1.722126601644674e-07, "logits/chosen": -2.733309745788574, "logits/rejected": -2.81620717048645, "logps/chosen": -298.2601013183594, "logps/rejected": -286.43121337890625, "loss": 0.0125, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.6399898529052734, "rewards/margins": 10.189987182617188, "rewards/rejected": -9.549996376037598, "step": 4010 }, { "epoch": 2.08, "learning_rate": 1.7125645438898452e-07, "logits/chosen": -2.754019021987915, "logits/rejected": -2.7992444038391113, "logps/chosen": -292.0843505859375, "logps/rejected": -278.53179931640625, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": 1.7986736297607422, "rewards/margins": 11.726593017578125, "rewards/rejected": -9.927919387817383, "step": 4020 }, { "epoch": 2.08, "learning_rate": 1.703002486135016e-07, "logits/chosen": -2.6090283393859863, "logits/rejected": -2.6274030208587646, "logps/chosen": -247.7549591064453, "logps/rejected": -275.93341064453125, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": 2.168842077255249, "rewards/margins": 13.396291732788086, "rewards/rejected": -11.227449417114258, "step": 4030 }, { "epoch": 2.09, "learning_rate": 1.6934404283801872e-07, "logits/chosen": -2.715796947479248, "logits/rejected": -2.5600688457489014, "logps/chosen": -269.89971923828125, "logps/rejected": -321.0479431152344, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": 3.061984062194824, "rewards/margins": 16.50414276123047, "rewards/rejected": -13.442158699035645, "step": 4040 }, { "epoch": 2.09, "learning_rate": 1.6838783706253584e-07, "logits/chosen": -2.722234010696411, "logits/rejected": -2.648040294647217, "logps/chosen": -201.85948181152344, "logps/rejected": -215.13235473632812, "loss": 0.0357, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.3728597164154053, "rewards/margins": 9.69245433807373, "rewards/rejected": -8.319594383239746, "step": 4050 }, { "epoch": 2.1, "learning_rate": 1.6743163128705295e-07, "logits/chosen": -2.7006664276123047, "logits/rejected": -2.738062858581543, "logps/chosen": -271.821533203125, "logps/rejected": -308.8503723144531, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": 1.8506834506988525, "rewards/margins": 13.487665176391602, "rewards/rejected": -11.636982917785645, "step": 4060 }, { "epoch": 2.1, "learning_rate": 1.664754255115701e-07, "logits/chosen": -2.6540932655334473, "logits/rejected": -2.6025888919830322, "logps/chosen": -205.8916778564453, "logps/rejected": -249.5850067138672, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": 2.721344470977783, "rewards/margins": 12.787909507751465, "rewards/rejected": -10.066566467285156, "step": 4070 }, { "epoch": 2.11, "learning_rate": 1.655192197360872e-07, "logits/chosen": -2.8125226497650146, "logits/rejected": -2.768751621246338, "logps/chosen": -287.2244873046875, "logps/rejected": -293.54669189453125, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": 0.948952853679657, "rewards/margins": 11.471153259277344, "rewards/rejected": -10.522199630737305, "step": 4080 }, { "epoch": 2.11, "learning_rate": 1.6456301396060433e-07, "logits/chosen": -2.6349759101867676, "logits/rejected": -2.7236363887786865, "logps/chosen": -278.74725341796875, "logps/rejected": -290.44500732421875, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": 1.579256296157837, "rewards/margins": 9.917515754699707, "rewards/rejected": -8.338258743286133, "step": 4090 }, { "epoch": 2.12, "learning_rate": 1.6360680818512144e-07, "logits/chosen": -2.8475401401519775, "logits/rejected": -2.7553558349609375, "logps/chosen": -308.8858337402344, "logps/rejected": -360.74920654296875, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": 1.8224369287490845, "rewards/margins": 11.919973373413086, "rewards/rejected": -10.097536087036133, "step": 4100 }, { "epoch": 2.12, "learning_rate": 1.6265060240963853e-07, "logits/chosen": -2.769975185394287, "logits/rejected": -2.719059467315674, "logps/chosen": -308.78369140625, "logps/rejected": -271.7587890625, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": 2.12917160987854, "rewards/margins": 12.400825500488281, "rewards/rejected": -10.27165412902832, "step": 4110 }, { "epoch": 2.13, "learning_rate": 1.6169439663415565e-07, "logits/chosen": -2.7416560649871826, "logits/rejected": -2.804804801940918, "logps/chosen": -285.85491943359375, "logps/rejected": -307.9002380371094, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": 2.2552881240844727, "rewards/margins": 13.406854629516602, "rewards/rejected": -11.151565551757812, "step": 4120 }, { "epoch": 2.13, "learning_rate": 1.6073819085867276e-07, "logits/chosen": -2.8296265602111816, "logits/rejected": -2.805642604827881, "logps/chosen": -274.7913513183594, "logps/rejected": -383.2001647949219, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": 2.098189353942871, "rewards/margins": 12.88170337677002, "rewards/rejected": -10.783514022827148, "step": 4130 }, { "epoch": 2.14, "learning_rate": 1.597819850831899e-07, "logits/chosen": -2.7971558570861816, "logits/rejected": -2.7784924507141113, "logps/chosen": -230.7209930419922, "logps/rejected": -262.358642578125, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": 2.9080710411071777, "rewards/margins": 13.876028060913086, "rewards/rejected": -10.96795654296875, "step": 4140 }, { "epoch": 2.14, "learning_rate": 1.5882577930770702e-07, "logits/chosen": -2.7370054721832275, "logits/rejected": -2.744947671890259, "logps/chosen": -256.64117431640625, "logps/rejected": -260.4443664550781, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": 3.3169360160827637, "rewards/margins": 13.783903121948242, "rewards/rejected": -10.46696662902832, "step": 4150 }, { "epoch": 2.15, "learning_rate": 1.5786957353222414e-07, "logits/chosen": -2.7004518508911133, "logits/rejected": -2.7562923431396484, "logps/chosen": -271.4837341308594, "logps/rejected": -229.91641235351562, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": 1.476590633392334, "rewards/margins": 10.639229774475098, "rewards/rejected": -9.162638664245605, "step": 4160 }, { "epoch": 2.15, "learning_rate": 1.5691336775674125e-07, "logits/chosen": -2.6011502742767334, "logits/rejected": -2.620133399963379, "logps/chosen": -261.44512939453125, "logps/rejected": -240.9309844970703, "loss": 0.0192, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.5351409912109375, "rewards/margins": 11.039804458618164, "rewards/rejected": -9.504663467407227, "step": 4170 }, { "epoch": 2.16, "learning_rate": 1.5595716198125837e-07, "logits/chosen": -2.848597288131714, "logits/rejected": -2.875506639480591, "logps/chosen": -337.51287841796875, "logps/rejected": -337.42193603515625, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": 0.44808346033096313, "rewards/margins": 11.492895126342773, "rewards/rejected": -11.044811248779297, "step": 4180 }, { "epoch": 2.16, "learning_rate": 1.5500095620577546e-07, "logits/chosen": -2.825258255004883, "logits/rejected": -2.8877339363098145, "logps/chosen": -259.0141296386719, "logps/rejected": -222.65609741210938, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": 1.203387975692749, "rewards/margins": 9.037870407104492, "rewards/rejected": -7.8344831466674805, "step": 4190 }, { "epoch": 2.17, "learning_rate": 1.5404475043029257e-07, "logits/chosen": -2.7203478813171387, "logits/rejected": -2.6974122524261475, "logps/chosen": -261.0373840332031, "logps/rejected": -284.78057861328125, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": 0.7423670887947083, "rewards/margins": 10.860982894897461, "rewards/rejected": -10.118616104125977, "step": 4200 }, { "epoch": 2.17, "learning_rate": 1.5308854465480971e-07, "logits/chosen": -2.797438144683838, "logits/rejected": -2.789064407348633, "logps/chosen": -262.25006103515625, "logps/rejected": -250.6930694580078, "loss": 0.015, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.2764910459518433, "rewards/margins": 9.582831382751465, "rewards/rejected": -8.306341171264648, "step": 4210 }, { "epoch": 2.18, "learning_rate": 1.5213233887932683e-07, "logits/chosen": -2.72556734085083, "logits/rejected": -2.7338197231292725, "logps/chosen": -296.2276916503906, "logps/rejected": -277.2362976074219, "loss": 0.0081, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.4097917079925537, "rewards/margins": 10.847259521484375, "rewards/rejected": -9.437466621398926, "step": 4220 }, { "epoch": 2.18, "learning_rate": 1.5117613310384395e-07, "logits/chosen": -2.7299015522003174, "logits/rejected": -2.768979549407959, "logps/chosen": -248.88082885742188, "logps/rejected": -251.6071319580078, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": 1.7584762573242188, "rewards/margins": 11.731807708740234, "rewards/rejected": -9.973332405090332, "step": 4230 }, { "epoch": 2.19, "learning_rate": 1.5021992732836106e-07, "logits/chosen": -2.723501205444336, "logits/rejected": -2.798146963119507, "logps/chosen": -289.02252197265625, "logps/rejected": -341.7744445800781, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": 3.187995195388794, "rewards/margins": 14.743619918823242, "rewards/rejected": -11.555627822875977, "step": 4240 }, { "epoch": 2.19, "learning_rate": 1.4926372155287818e-07, "logits/chosen": -2.8687949180603027, "logits/rejected": -2.7464382648468018, "logps/chosen": -214.82046508789062, "logps/rejected": -266.95269775390625, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": 0.9481528997421265, "rewards/margins": 11.698265075683594, "rewards/rejected": -10.750112533569336, "step": 4250 }, { "epoch": 2.2, "learning_rate": 1.483075157773953e-07, "logits/chosen": -2.793741464614868, "logits/rejected": -2.8271756172180176, "logps/chosen": -290.243896484375, "logps/rejected": -278.4471740722656, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": 0.8705357313156128, "rewards/margins": 10.359903335571289, "rewards/rejected": -9.489367485046387, "step": 4260 }, { "epoch": 2.2, "learning_rate": 1.4735131000191238e-07, "logits/chosen": -2.677849531173706, "logits/rejected": -2.703662157058716, "logps/chosen": -260.99310302734375, "logps/rejected": -251.24911499023438, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": 1.6553013324737549, "rewards/margins": 11.322183609008789, "rewards/rejected": -9.66688346862793, "step": 4270 }, { "epoch": 2.21, "learning_rate": 1.4639510422642952e-07, "logits/chosen": -2.82415509223938, "logits/rejected": -2.731492280960083, "logps/chosen": -256.83941650390625, "logps/rejected": -281.5820007324219, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": 0.9518572092056274, "rewards/margins": 10.992048263549805, "rewards/rejected": -10.040190696716309, "step": 4280 }, { "epoch": 2.21, "learning_rate": 1.4543889845094664e-07, "logits/chosen": -2.6741342544555664, "logits/rejected": -2.625251054763794, "logps/chosen": -296.27880859375, "logps/rejected": -359.40643310546875, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": 2.28782057762146, "rewards/margins": 15.902807235717773, "rewards/rejected": -13.61498737335205, "step": 4290 }, { "epoch": 2.22, "learning_rate": 1.4448269267546376e-07, "logits/chosen": -2.65960693359375, "logits/rejected": -2.767089605331421, "logps/chosen": -283.67694091796875, "logps/rejected": -276.77972412109375, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": 2.666792869567871, "rewards/margins": 13.424444198608398, "rewards/rejected": -10.757650375366211, "step": 4300 }, { "epoch": 2.23, "learning_rate": 1.4352648689998087e-07, "logits/chosen": -2.7458882331848145, "logits/rejected": -2.788189649581909, "logps/chosen": -260.59552001953125, "logps/rejected": -238.883056640625, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 2.0128729343414307, "rewards/margins": 13.033166885375977, "rewards/rejected": -11.020292282104492, "step": 4310 }, { "epoch": 2.23, "learning_rate": 1.42570281124498e-07, "logits/chosen": -2.6500084400177, "logits/rejected": -2.7445132732391357, "logps/chosen": -318.0313415527344, "logps/rejected": -330.5144958496094, "loss": 0.0165, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.5414737462997437, "rewards/margins": 13.6046781539917, "rewards/rejected": -12.063204765319824, "step": 4320 }, { "epoch": 2.24, "learning_rate": 1.416140753490151e-07, "logits/chosen": -2.6413755416870117, "logits/rejected": -2.7113699913024902, "logps/chosen": -301.3822326660156, "logps/rejected": -330.09271240234375, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": 0.6454537510871887, "rewards/margins": 11.528231620788574, "rewards/rejected": -10.882777214050293, "step": 4330 }, { "epoch": 2.24, "learning_rate": 1.4065786957353222e-07, "logits/chosen": -2.67690110206604, "logits/rejected": -2.6537270545959473, "logps/chosen": -276.92431640625, "logps/rejected": -315.29595947265625, "loss": 0.015, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.7478466629981995, "rewards/margins": 11.373164176940918, "rewards/rejected": -10.625317573547363, "step": 4340 }, { "epoch": 2.25, "learning_rate": 1.3970166379804933e-07, "logits/chosen": -2.64355731010437, "logits/rejected": -2.569204807281494, "logps/chosen": -315.84100341796875, "logps/rejected": -309.93670654296875, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": 1.3451144695281982, "rewards/margins": 12.67324447631836, "rewards/rejected": -11.32812786102295, "step": 4350 }, { "epoch": 2.25, "learning_rate": 1.3874545802256645e-07, "logits/chosen": -2.6960456371307373, "logits/rejected": -2.6648309230804443, "logps/chosen": -263.03717041015625, "logps/rejected": -332.3580017089844, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": 0.8603103756904602, "rewards/margins": 14.067952156066895, "rewards/rejected": -13.207639694213867, "step": 4360 }, { "epoch": 2.26, "learning_rate": 1.3778925224708357e-07, "logits/chosen": -2.547229528427124, "logits/rejected": -2.56522798538208, "logps/chosen": -233.1015625, "logps/rejected": -270.6748046875, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 1.7012211084365845, "rewards/margins": 12.212430953979492, "rewards/rejected": -10.511209487915039, "step": 4370 }, { "epoch": 2.26, "learning_rate": 1.3683304647160068e-07, "logits/chosen": -2.627701759338379, "logits/rejected": -2.732863187789917, "logps/chosen": -249.3077392578125, "logps/rejected": -332.698486328125, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": 1.220199704170227, "rewards/margins": 13.386858940124512, "rewards/rejected": -12.166659355163574, "step": 4380 }, { "epoch": 2.27, "learning_rate": 1.358768406961178e-07, "logits/chosen": -2.6899096965789795, "logits/rejected": -2.6770434379577637, "logps/chosen": -243.5663299560547, "logps/rejected": -309.42755126953125, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": 1.4383031129837036, "rewards/margins": 13.10438346862793, "rewards/rejected": -11.666081428527832, "step": 4390 }, { "epoch": 2.27, "learning_rate": 1.349206349206349e-07, "logits/chosen": -2.7282745838165283, "logits/rejected": -2.6892740726470947, "logps/chosen": -263.52374267578125, "logps/rejected": -268.36956787109375, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -0.8276527523994446, "rewards/margins": 9.619959831237793, "rewards/rejected": -10.447612762451172, "step": 4400 }, { "epoch": 2.28, "learning_rate": 1.3396442914515203e-07, "logits/chosen": -2.598597526550293, "logits/rejected": -2.6134636402130127, "logps/chosen": -249.866455078125, "logps/rejected": -296.13909912109375, "loss": 0.0139, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.13881632685661316, "rewards/margins": 12.453330039978027, "rewards/rejected": -12.31451416015625, "step": 4410 }, { "epoch": 2.28, "learning_rate": 1.3300822336966917e-07, "logits/chosen": -2.62734055519104, "logits/rejected": -2.482203960418701, "logps/chosen": -280.33648681640625, "logps/rejected": -261.93939208984375, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": 2.6268467903137207, "rewards/margins": 15.163488388061523, "rewards/rejected": -12.536642074584961, "step": 4420 }, { "epoch": 2.29, "learning_rate": 1.3205201759418626e-07, "logits/chosen": -2.5897057056427, "logits/rejected": -2.541538953781128, "logps/chosen": -279.7352294921875, "logps/rejected": -272.1889953613281, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": 1.6353164911270142, "rewards/margins": 11.685983657836914, "rewards/rejected": -10.050667762756348, "step": 4430 }, { "epoch": 2.29, "learning_rate": 1.3109581181870338e-07, "logits/chosen": -2.6762611865997314, "logits/rejected": -2.712613821029663, "logps/chosen": -289.64984130859375, "logps/rejected": -287.3264465332031, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": 3.0438344478607178, "rewards/margins": 14.380327224731445, "rewards/rejected": -11.336492538452148, "step": 4440 }, { "epoch": 2.3, "learning_rate": 1.301396060432205e-07, "logits/chosen": -2.6527390480041504, "logits/rejected": -2.6962897777557373, "logps/chosen": -310.69927978515625, "logps/rejected": -291.29052734375, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": 2.072523593902588, "rewards/margins": 13.862905502319336, "rewards/rejected": -11.790382385253906, "step": 4450 }, { "epoch": 2.3, "learning_rate": 1.291834002677376e-07, "logits/chosen": -2.692753791809082, "logits/rejected": -2.6270699501037598, "logps/chosen": -252.09933471679688, "logps/rejected": -259.0921630859375, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": 0.35815539956092834, "rewards/margins": 10.08549976348877, "rewards/rejected": -9.72734546661377, "step": 4460 }, { "epoch": 2.31, "learning_rate": 1.2822719449225472e-07, "logits/chosen": -2.548062801361084, "logits/rejected": -2.685159206390381, "logps/chosen": -198.88003540039062, "logps/rejected": -219.5816192626953, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": 0.3538029193878174, "rewards/margins": 9.92510986328125, "rewards/rejected": -9.571308135986328, "step": 4470 }, { "epoch": 2.31, "learning_rate": 1.2727098871677184e-07, "logits/chosen": -2.675671100616455, "logits/rejected": -2.6893503665924072, "logps/chosen": -323.32763671875, "logps/rejected": -297.06390380859375, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": 2.494044303894043, "rewards/margins": 12.427671432495117, "rewards/rejected": -9.933626174926758, "step": 4480 }, { "epoch": 2.32, "learning_rate": 1.2631478294128898e-07, "logits/chosen": -2.7433652877807617, "logits/rejected": -2.655972719192505, "logps/chosen": -300.86981201171875, "logps/rejected": -324.5699768066406, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": 2.4431943893432617, "rewards/margins": 15.235920906066895, "rewards/rejected": -12.792726516723633, "step": 4490 }, { "epoch": 2.32, "learning_rate": 1.253585771658061e-07, "logits/chosen": -2.6331238746643066, "logits/rejected": -2.601410150527954, "logps/chosen": -251.07992553710938, "logps/rejected": -289.9195251464844, "loss": 0.0165, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.1148226261138916, "rewards/margins": 11.354089736938477, "rewards/rejected": -10.239266395568848, "step": 4500 }, { "epoch": 2.32, "eval_logits/chosen": -2.700108766555786, "eval_logits/rejected": -2.700565814971924, "eval_logps/chosen": -309.5376281738281, "eval_logps/rejected": -276.1910095214844, "eval_loss": 0.735942006111145, "eval_rewards/accuracies": 0.7817460298538208, "eval_rewards/chosen": -2.209642171859741, "eval_rewards/margins": 3.8401401042938232, "eval_rewards/rejected": -6.0497822761535645, "eval_runtime": 217.7058, "eval_samples_per_second": 9.187, "eval_steps_per_second": 0.289, "step": 4500 }, { "epoch": 2.33, "learning_rate": 1.2440237139032319e-07, "logits/chosen": -2.6619417667388916, "logits/rejected": -2.7171759605407715, "logps/chosen": -295.917236328125, "logps/rejected": -280.96728515625, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": 1.4065229892730713, "rewards/margins": 13.129376411437988, "rewards/rejected": -11.722851753234863, "step": 4510 }, { "epoch": 2.33, "learning_rate": 1.234461656148403e-07, "logits/chosen": -2.6914889812469482, "logits/rejected": -2.6381001472473145, "logps/chosen": -299.221923828125, "logps/rejected": -310.075439453125, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": 1.4773738384246826, "rewards/margins": 13.474810600280762, "rewards/rejected": -11.9974365234375, "step": 4520 }, { "epoch": 2.34, "learning_rate": 1.2248995983935742e-07, "logits/chosen": -2.6824350357055664, "logits/rejected": -2.6979966163635254, "logps/chosen": -300.82366943359375, "logps/rejected": -299.1722717285156, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": 2.3239176273345947, "rewards/margins": 13.268686294555664, "rewards/rejected": -10.9447660446167, "step": 4530 }, { "epoch": 2.34, "learning_rate": 1.2153375406387456e-07, "logits/chosen": -2.6138525009155273, "logits/rejected": -2.7358133792877197, "logps/chosen": -329.01947021484375, "logps/rejected": -350.1726379394531, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": 2.126481056213379, "rewards/margins": 14.977691650390625, "rewards/rejected": -12.85120964050293, "step": 4540 }, { "epoch": 2.35, "learning_rate": 1.2057754828839165e-07, "logits/chosen": -2.690929889678955, "logits/rejected": -2.8147425651550293, "logps/chosen": -267.2896728515625, "logps/rejected": -314.25030517578125, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": 1.8327617645263672, "rewards/margins": 11.646089553833008, "rewards/rejected": -9.81332778930664, "step": 4550 }, { "epoch": 2.35, "learning_rate": 1.1962134251290876e-07, "logits/chosen": -2.581512928009033, "logits/rejected": -2.6514651775360107, "logps/chosen": -244.9253692626953, "logps/rejected": -206.70034790039062, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -0.3540361523628235, "rewards/margins": 10.505193710327148, "rewards/rejected": -10.85922908782959, "step": 4560 }, { "epoch": 2.36, "learning_rate": 1.1866513673742588e-07, "logits/chosen": -2.720597267150879, "logits/rejected": -2.6273646354675293, "logps/chosen": -262.72467041015625, "logps/rejected": -277.3870544433594, "loss": 0.0442, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.38703417778015137, "rewards/margins": 10.342449188232422, "rewards/rejected": -9.955414772033691, "step": 4570 }, { "epoch": 2.36, "learning_rate": 1.1770893096194301e-07, "logits/chosen": -2.683189868927002, "logits/rejected": -2.6948843002319336, "logps/chosen": -320.70184326171875, "logps/rejected": -330.626953125, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": 3.077538013458252, "rewards/margins": 15.482243537902832, "rewards/rejected": -12.404706954956055, "step": 4580 }, { "epoch": 2.37, "learning_rate": 1.1675272518646012e-07, "logits/chosen": -2.7156405448913574, "logits/rejected": -2.637774705886841, "logps/chosen": -258.29266357421875, "logps/rejected": -310.4388732910156, "loss": 0.0382, "rewards/accuracies": 1.0, "rewards/chosen": 3.1347460746765137, "rewards/margins": 14.748605728149414, "rewards/rejected": -11.613859176635742, "step": 4590 }, { "epoch": 2.37, "learning_rate": 1.1579651941097724e-07, "logits/chosen": -2.6939098834991455, "logits/rejected": -2.7423393726348877, "logps/chosen": -299.42889404296875, "logps/rejected": -245.23208618164062, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": 0.775151789188385, "rewards/margins": 11.489202499389648, "rewards/rejected": -10.714052200317383, "step": 4600 }, { "epoch": 2.38, "learning_rate": 1.1484031363549436e-07, "logits/chosen": -2.6191563606262207, "logits/rejected": -2.6700427532196045, "logps/chosen": -314.7420349121094, "logps/rejected": -322.2962951660156, "loss": 0.0187, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.6653627157211304, "rewards/margins": 14.713505744934082, "rewards/rejected": -14.048141479492188, "step": 4610 }, { "epoch": 2.39, "learning_rate": 1.1388410786001147e-07, "logits/chosen": -2.7032155990600586, "logits/rejected": -2.714507579803467, "logps/chosen": -241.12741088867188, "logps/rejected": -308.56304931640625, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": 1.174452543258667, "rewards/margins": 13.297747611999512, "rewards/rejected": -12.123295783996582, "step": 4620 }, { "epoch": 2.39, "learning_rate": 1.1292790208452859e-07, "logits/chosen": -2.6625142097473145, "logits/rejected": -2.6771183013916016, "logps/chosen": -240.2090301513672, "logps/rejected": -263.73797607421875, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": 0.6283326745033264, "rewards/margins": 13.485211372375488, "rewards/rejected": -12.856880187988281, "step": 4630 }, { "epoch": 2.4, "learning_rate": 1.119716963090457e-07, "logits/chosen": -2.555988311767578, "logits/rejected": -2.58622407913208, "logps/chosen": -210.8300018310547, "logps/rejected": -265.80462646484375, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": 0.8450849652290344, "rewards/margins": 11.602185249328613, "rewards/rejected": -10.757101058959961, "step": 4640 }, { "epoch": 2.4, "learning_rate": 1.1101549053356282e-07, "logits/chosen": -2.7897839546203613, "logits/rejected": -2.7569496631622314, "logps/chosen": -279.80267333984375, "logps/rejected": -290.2506103515625, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": 0.9652635455131531, "rewards/margins": 11.88095760345459, "rewards/rejected": -10.915693283081055, "step": 4650 }, { "epoch": 2.41, "learning_rate": 1.1005928475807993e-07, "logits/chosen": -2.706782579421997, "logits/rejected": -2.690147876739502, "logps/chosen": -263.908935546875, "logps/rejected": -268.34808349609375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -0.13803401589393616, "rewards/margins": 11.163463592529297, "rewards/rejected": -11.301496505737305, "step": 4660 }, { "epoch": 2.41, "learning_rate": 1.0910307898259705e-07, "logits/chosen": -2.5569040775299072, "logits/rejected": -2.5663299560546875, "logps/chosen": -228.5583038330078, "logps/rejected": -284.5388488769531, "loss": 0.0164, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.3399560451507568, "rewards/margins": 12.518512725830078, "rewards/rejected": -11.178556442260742, "step": 4670 }, { "epoch": 2.42, "learning_rate": 1.0814687320711418e-07, "logits/chosen": -2.6800427436828613, "logits/rejected": -2.5935287475585938, "logps/chosen": -198.6033477783203, "logps/rejected": -292.9828796386719, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": 0.3410785496234894, "rewards/margins": 13.070027351379395, "rewards/rejected": -12.728948593139648, "step": 4680 }, { "epoch": 2.42, "learning_rate": 1.0719066743163128e-07, "logits/chosen": -2.589458465576172, "logits/rejected": -2.6344993114471436, "logps/chosen": -291.27984619140625, "logps/rejected": -293.70477294921875, "loss": 0.0146, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.0233111381530762, "rewards/margins": 14.37419605255127, "rewards/rejected": -13.350885391235352, "step": 4690 }, { "epoch": 2.43, "learning_rate": 1.062344616561484e-07, "logits/chosen": -2.659127950668335, "logits/rejected": -2.737194299697876, "logps/chosen": -317.55108642578125, "logps/rejected": -367.94293212890625, "loss": 0.0109, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 3.5995655059814453, "rewards/margins": 18.63315200805664, "rewards/rejected": -15.033584594726562, "step": 4700 }, { "epoch": 2.43, "learning_rate": 1.0527825588066551e-07, "logits/chosen": -2.648740530014038, "logits/rejected": -2.6491146087646484, "logps/chosen": -304.24017333984375, "logps/rejected": -293.5978088378906, "loss": 0.0087, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.9029955863952637, "rewards/margins": 16.679758071899414, "rewards/rejected": -13.776761054992676, "step": 4710 }, { "epoch": 2.44, "learning_rate": 1.0432205010518264e-07, "logits/chosen": -2.699047088623047, "logits/rejected": -2.7981960773468018, "logps/chosen": -233.66970825195312, "logps/rejected": -306.4530944824219, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": 1.6058984994888306, "rewards/margins": 14.446258544921875, "rewards/rejected": -12.840359687805176, "step": 4720 }, { "epoch": 2.44, "learning_rate": 1.0336584432969974e-07, "logits/chosen": -2.567579746246338, "logits/rejected": -2.5516488552093506, "logps/chosen": -252.8605499267578, "logps/rejected": -278.5452575683594, "loss": 0.011, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.2912280559539795, "rewards/margins": 14.698877334594727, "rewards/rejected": -12.407648086547852, "step": 4730 }, { "epoch": 2.45, "learning_rate": 1.0240963855421686e-07, "logits/chosen": -2.592355728149414, "logits/rejected": -2.6269283294677734, "logps/chosen": -322.3033142089844, "logps/rejected": -310.4656982421875, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 1.6497825384140015, "rewards/margins": 13.311511039733887, "rewards/rejected": -11.66172981262207, "step": 4740 }, { "epoch": 2.45, "learning_rate": 1.0145343277873399e-07, "logits/chosen": -2.652545213699341, "logits/rejected": -2.7091572284698486, "logps/chosen": -293.6610107421875, "logps/rejected": -328.0738830566406, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": 1.4136769771575928, "rewards/margins": 13.760273933410645, "rewards/rejected": -12.346597671508789, "step": 4750 }, { "epoch": 2.46, "learning_rate": 1.004972270032511e-07, "logits/chosen": -2.4025778770446777, "logits/rejected": -2.369227647781372, "logps/chosen": -224.7193603515625, "logps/rejected": -253.95285034179688, "loss": 0.0146, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.4258928298950195, "rewards/margins": 13.243026733398438, "rewards/rejected": -11.817132949829102, "step": 4760 }, { "epoch": 2.46, "learning_rate": 9.95410212277682e-08, "logits/chosen": -2.6297268867492676, "logits/rejected": -2.6853363513946533, "logps/chosen": -287.7942199707031, "logps/rejected": -280.2574768066406, "loss": 0.0289, "rewards/accuracies": 1.0, "rewards/chosen": 1.281556248664856, "rewards/margins": 12.799880027770996, "rewards/rejected": -11.518324851989746, "step": 4770 }, { "epoch": 2.47, "learning_rate": 9.858481545228532e-08, "logits/chosen": -2.6263976097106934, "logits/rejected": -2.6547935009002686, "logps/chosen": -253.96615600585938, "logps/rejected": -236.6396026611328, "loss": 0.0236, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.941236138343811, "rewards/margins": 12.551959991455078, "rewards/rejected": -10.610723495483398, "step": 4780 }, { "epoch": 2.47, "learning_rate": 9.762860967680245e-08, "logits/chosen": -2.648315906524658, "logits/rejected": -2.6408438682556152, "logps/chosen": -217.11599731445312, "logps/rejected": -267.97222900390625, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": 0.5781963467597961, "rewards/margins": 11.57778549194336, "rewards/rejected": -10.999589920043945, "step": 4790 }, { "epoch": 2.48, "learning_rate": 9.667240390131957e-08, "logits/chosen": -2.711276054382324, "logits/rejected": -2.798565626144409, "logps/chosen": -260.7125549316406, "logps/rejected": -253.3577117919922, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": 1.451697587966919, "rewards/margins": 14.93236255645752, "rewards/rejected": -13.480664253234863, "step": 4800 }, { "epoch": 2.48, "learning_rate": 9.571619812583667e-08, "logits/chosen": -2.5804848670959473, "logits/rejected": -2.550412178039551, "logps/chosen": -296.70941162109375, "logps/rejected": -289.5600891113281, "loss": 0.0135, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.216889500617981, "rewards/margins": 12.942187309265137, "rewards/rejected": -11.725297927856445, "step": 4810 }, { "epoch": 2.49, "learning_rate": 9.47599923503538e-08, "logits/chosen": -2.656806230545044, "logits/rejected": -2.7578437328338623, "logps/chosen": -273.0238342285156, "logps/rejected": -310.3895568847656, "loss": 0.0229, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.7180588245391846, "rewards/margins": 13.919692993164062, "rewards/rejected": -12.201634407043457, "step": 4820 }, { "epoch": 2.49, "learning_rate": 9.380378657487091e-08, "logits/chosen": -2.611632823944092, "logits/rejected": -2.606079339981079, "logps/chosen": -236.2656707763672, "logps/rejected": -278.8091735839844, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": 2.171788215637207, "rewards/margins": 15.047994613647461, "rewards/rejected": -12.876205444335938, "step": 4830 }, { "epoch": 2.5, "learning_rate": 9.284758079938803e-08, "logits/chosen": -2.752904176712036, "logits/rejected": -2.702975034713745, "logps/chosen": -300.28704833984375, "logps/rejected": -323.9290466308594, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": 1.8785560131072998, "rewards/margins": 14.25035572052002, "rewards/rejected": -12.371798515319824, "step": 4840 }, { "epoch": 2.5, "learning_rate": 9.189137502390513e-08, "logits/chosen": -2.719749927520752, "logits/rejected": -2.782289981842041, "logps/chosen": -267.34246826171875, "logps/rejected": -312.0362854003906, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": 1.0956392288208008, "rewards/margins": 13.80529499053955, "rewards/rejected": -12.70965576171875, "step": 4850 }, { "epoch": 2.51, "learning_rate": 9.093516924842226e-08, "logits/chosen": -2.7262961864471436, "logits/rejected": -2.68577241897583, "logps/chosen": -218.84176635742188, "logps/rejected": -276.78289794921875, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": 2.0724873542785645, "rewards/margins": 13.172772407531738, "rewards/rejected": -11.100284576416016, "step": 4860 }, { "epoch": 2.51, "learning_rate": 8.997896347293938e-08, "logits/chosen": -2.718925952911377, "logits/rejected": -2.708313226699829, "logps/chosen": -245.9953155517578, "logps/rejected": -305.19354248046875, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": 1.8387666940689087, "rewards/margins": 12.512483596801758, "rewards/rejected": -10.67371654510498, "step": 4870 }, { "epoch": 2.52, "learning_rate": 8.902275769745648e-08, "logits/chosen": -2.6192898750305176, "logits/rejected": -2.722072124481201, "logps/chosen": -224.29110717773438, "logps/rejected": -262.021240234375, "loss": 0.0107, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.255924940109253, "rewards/margins": 12.77586841583252, "rewards/rejected": -11.519944190979004, "step": 4880 }, { "epoch": 2.52, "learning_rate": 8.806655192197361e-08, "logits/chosen": -2.67940616607666, "logits/rejected": -2.7205865383148193, "logps/chosen": -225.5251922607422, "logps/rejected": -312.525390625, "loss": 0.0083, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.921847939491272, "rewards/margins": 16.122520446777344, "rewards/rejected": -14.20067310333252, "step": 4890 }, { "epoch": 2.53, "learning_rate": 8.711034614649072e-08, "logits/chosen": -2.7915446758270264, "logits/rejected": -2.7987782955169678, "logps/chosen": -264.7381896972656, "logps/rejected": -313.4402770996094, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": 2.471101999282837, "rewards/margins": 16.768362045288086, "rewards/rejected": -14.297264099121094, "step": 4900 }, { "epoch": 2.53, "learning_rate": 8.615414037100784e-08, "logits/chosen": -2.678217887878418, "logits/rejected": -2.6336052417755127, "logps/chosen": -332.27801513671875, "logps/rejected": -323.1417541503906, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": 0.799132227897644, "rewards/margins": 12.401124000549316, "rewards/rejected": -11.601991653442383, "step": 4910 }, { "epoch": 2.54, "learning_rate": 8.519793459552494e-08, "logits/chosen": -2.5783820152282715, "logits/rejected": -2.601856231689453, "logps/chosen": -275.9147033691406, "logps/rejected": -285.7947082519531, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": 1.3343265056610107, "rewards/margins": 12.40494441986084, "rewards/rejected": -11.070618629455566, "step": 4920 }, { "epoch": 2.55, "learning_rate": 8.424172882004207e-08, "logits/chosen": -2.7203049659729004, "logits/rejected": -2.773594856262207, "logps/chosen": -298.14984130859375, "logps/rejected": -309.88861083984375, "loss": 0.0272, "rewards/accuracies": 1.0, "rewards/chosen": 1.4647815227508545, "rewards/margins": 12.672274589538574, "rewards/rejected": -11.207493782043457, "step": 4930 }, { "epoch": 2.55, "learning_rate": 8.328552304455919e-08, "logits/chosen": -2.603013515472412, "logits/rejected": -2.6129584312438965, "logps/chosen": -202.2315216064453, "logps/rejected": -246.1285400390625, "loss": 0.0132, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.493094265460968, "rewards/margins": 10.736700057983398, "rewards/rejected": -10.243606567382812, "step": 4940 }, { "epoch": 2.56, "learning_rate": 8.23293172690763e-08, "logits/chosen": -2.5959506034851074, "logits/rejected": -2.761260509490967, "logps/chosen": -240.5106964111328, "logps/rejected": -269.2008361816406, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": 0.8836824297904968, "rewards/margins": 13.944262504577637, "rewards/rejected": -13.060582160949707, "step": 4950 }, { "epoch": 2.56, "learning_rate": 8.137311149359343e-08, "logits/chosen": -2.8082408905029297, "logits/rejected": -2.8189988136291504, "logps/chosen": -332.5243225097656, "logps/rejected": -314.81414794921875, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": 2.6978859901428223, "rewards/margins": 15.818052291870117, "rewards/rejected": -13.120168685913086, "step": 4960 }, { "epoch": 2.57, "learning_rate": 8.041690571811053e-08, "logits/chosen": -2.731813907623291, "logits/rejected": -2.7160086631774902, "logps/chosen": -273.2561950683594, "logps/rejected": -297.5777587890625, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": 1.1517640352249146, "rewards/margins": 15.763650894165039, "rewards/rejected": -14.611885070800781, "step": 4970 }, { "epoch": 2.57, "learning_rate": 7.946069994262765e-08, "logits/chosen": -2.634385347366333, "logits/rejected": -2.653733253479004, "logps/chosen": -249.900634765625, "logps/rejected": -238.01327514648438, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": 1.5609699487686157, "rewards/margins": 12.663952827453613, "rewards/rejected": -11.102984428405762, "step": 4980 }, { "epoch": 2.58, "learning_rate": 7.850449416714476e-08, "logits/chosen": -2.6939308643341064, "logits/rejected": -2.8180503845214844, "logps/chosen": -339.6492614746094, "logps/rejected": -327.8633728027344, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": 2.133561134338379, "rewards/margins": 13.298667907714844, "rewards/rejected": -11.165107727050781, "step": 4990 }, { "epoch": 2.58, "learning_rate": 7.754828839166188e-08, "logits/chosen": -2.720078229904175, "logits/rejected": -2.694797992706299, "logps/chosen": -274.102294921875, "logps/rejected": -277.6142578125, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": 0.9390266537666321, "rewards/margins": 11.481155395507812, "rewards/rejected": -10.542128562927246, "step": 5000 }, { "epoch": 2.58, "eval_logits/chosen": -2.7196364402770996, "eval_logits/rejected": -2.718513250350952, "eval_logps/chosen": -312.903564453125, "eval_logps/rejected": -280.2130126953125, "eval_loss": 0.7864260673522949, "eval_rewards/accuracies": 0.773809552192688, "eval_rewards/chosen": -2.882828950881958, "eval_rewards/margins": 3.971348762512207, "eval_rewards/rejected": -6.854177951812744, "eval_runtime": 217.5511, "eval_samples_per_second": 9.193, "eval_steps_per_second": 0.29, "step": 5000 }, { "epoch": 2.59, "learning_rate": 7.6592082616179e-08, "logits/chosen": -2.6231255531311035, "logits/rejected": -2.626267910003662, "logps/chosen": -235.2042236328125, "logps/rejected": -297.21063232421875, "loss": 0.0098, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.4571438431739807, "rewards/margins": 15.142412185668945, "rewards/rejected": -14.685267448425293, "step": 5010 }, { "epoch": 2.59, "learning_rate": 7.563587684069611e-08, "logits/chosen": -2.5229032039642334, "logits/rejected": -2.6841416358947754, "logps/chosen": -258.5490417480469, "logps/rejected": -363.48162841796875, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": 2.008655309677124, "rewards/margins": 15.509332656860352, "rewards/rejected": -13.500676155090332, "step": 5020 }, { "epoch": 2.6, "learning_rate": 7.467967106521324e-08, "logits/chosen": -2.6904513835906982, "logits/rejected": -2.694772243499756, "logps/chosen": -229.6025390625, "logps/rejected": -202.74024963378906, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": 2.2432830333709717, "rewards/margins": 12.049966812133789, "rewards/rejected": -9.806684494018555, "step": 5030 }, { "epoch": 2.6, "learning_rate": 7.372346528973034e-08, "logits/chosen": -2.68023419380188, "logits/rejected": -2.7025341987609863, "logps/chosen": -220.0260772705078, "logps/rejected": -261.80352783203125, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": 0.38927072286605835, "rewards/margins": 11.34105110168457, "rewards/rejected": -10.951781272888184, "step": 5040 }, { "epoch": 2.61, "learning_rate": 7.276725951424746e-08, "logits/chosen": -2.738955020904541, "logits/rejected": -2.659353494644165, "logps/chosen": -230.02413940429688, "logps/rejected": -324.76116943359375, "loss": 0.0135, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.024852514266967773, "rewards/margins": 12.52303409576416, "rewards/rejected": -12.547883987426758, "step": 5050 }, { "epoch": 2.61, "learning_rate": 7.181105373876457e-08, "logits/chosen": -2.627469539642334, "logits/rejected": -2.6676948070526123, "logps/chosen": -260.1104431152344, "logps/rejected": -266.2936096191406, "loss": 0.0137, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.9252920150756836, "rewards/margins": 12.830589294433594, "rewards/rejected": -11.905298233032227, "step": 5060 }, { "epoch": 2.62, "learning_rate": 7.08548479632817e-08, "logits/chosen": -2.784240484237671, "logits/rejected": -2.735193967819214, "logps/chosen": -369.58270263671875, "logps/rejected": -323.7354736328125, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": 1.6716524362564087, "rewards/margins": 12.856073379516602, "rewards/rejected": -11.184419631958008, "step": 5070 }, { "epoch": 2.62, "learning_rate": 6.98986421877988e-08, "logits/chosen": -2.740915298461914, "logits/rejected": -2.7226786613464355, "logps/chosen": -340.28802490234375, "logps/rejected": -355.4793395996094, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": 0.994368851184845, "rewards/margins": 13.248014450073242, "rewards/rejected": -12.253645896911621, "step": 5080 }, { "epoch": 2.63, "learning_rate": 6.894243641231592e-08, "logits/chosen": -2.6611123085021973, "logits/rejected": -2.627011299133301, "logps/chosen": -255.84457397460938, "logps/rejected": -327.6121826171875, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": 1.7034765481948853, "rewards/margins": 16.250394821166992, "rewards/rejected": -14.546918869018555, "step": 5090 }, { "epoch": 2.63, "learning_rate": 6.798623063683305e-08, "logits/chosen": -2.6290283203125, "logits/rejected": -2.695361852645874, "logps/chosen": -279.5427551269531, "logps/rejected": -333.14532470703125, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": 0.6256012320518494, "rewards/margins": 16.523670196533203, "rewards/rejected": -15.89806842803955, "step": 5100 }, { "epoch": 2.64, "learning_rate": 6.703002486135017e-08, "logits/chosen": -2.5708651542663574, "logits/rejected": -2.6656506061553955, "logps/chosen": -249.72073364257812, "logps/rejected": -296.4393005371094, "loss": 0.0139, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.8271173238754272, "rewards/margins": 16.879287719726562, "rewards/rejected": -15.052169799804688, "step": 5110 }, { "epoch": 2.64, "learning_rate": 6.607381908586727e-08, "logits/chosen": -2.7096810340881348, "logits/rejected": -2.700406074523926, "logps/chosen": -238.05990600585938, "logps/rejected": -302.31378173828125, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": 0.9860852956771851, "rewards/margins": 14.121419906616211, "rewards/rejected": -13.135335922241211, "step": 5120 }, { "epoch": 2.65, "learning_rate": 6.511761331038438e-08, "logits/chosen": -2.774763584136963, "logits/rejected": -2.795804500579834, "logps/chosen": -236.27658081054688, "logps/rejected": -272.2627868652344, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -1.472693681716919, "rewards/margins": 9.923027038574219, "rewards/rejected": -11.395719528198242, "step": 5130 }, { "epoch": 2.65, "learning_rate": 6.416140753490151e-08, "logits/chosen": -2.7146975994110107, "logits/rejected": -2.7896509170532227, "logps/chosen": -368.1150817871094, "logps/rejected": -340.7099609375, "loss": 0.0099, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.3701469898223877, "rewards/margins": 13.160669326782227, "rewards/rejected": -11.790521621704102, "step": 5140 }, { "epoch": 2.66, "learning_rate": 6.320520175941863e-08, "logits/chosen": -2.5971240997314453, "logits/rejected": -2.595491647720337, "logps/chosen": -248.2359619140625, "logps/rejected": -278.7591857910156, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": 1.151533603668213, "rewards/margins": 13.753721237182617, "rewards/rejected": -12.602187156677246, "step": 5150 }, { "epoch": 2.66, "learning_rate": 6.224899598393573e-08, "logits/chosen": -2.7201809883117676, "logits/rejected": -2.751149892807007, "logps/chosen": -292.6256103515625, "logps/rejected": -320.6335754394531, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": 0.5118240714073181, "rewards/margins": 13.12120532989502, "rewards/rejected": -12.609381675720215, "step": 5160 }, { "epoch": 2.67, "learning_rate": 6.129279020845286e-08, "logits/chosen": -2.7117671966552734, "logits/rejected": -2.8060145378112793, "logps/chosen": -262.6538391113281, "logps/rejected": -248.8769989013672, "loss": 0.0102, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.5282812118530273, "rewards/margins": 12.767044067382812, "rewards/rejected": -11.238761901855469, "step": 5170 }, { "epoch": 2.67, "learning_rate": 6.033658443296998e-08, "logits/chosen": -2.627601146697998, "logits/rejected": -2.671121120452881, "logps/chosen": -203.81683349609375, "logps/rejected": -290.76055908203125, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": 1.5020616054534912, "rewards/margins": 14.89185619354248, "rewards/rejected": -13.389793395996094, "step": 5180 }, { "epoch": 2.68, "learning_rate": 5.9380378657487085e-08, "logits/chosen": -2.7020039558410645, "logits/rejected": -2.7345330715179443, "logps/chosen": -323.268310546875, "logps/rejected": -310.77606201171875, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": 1.3893721103668213, "rewards/margins": 14.052579879760742, "rewards/rejected": -12.663207054138184, "step": 5190 }, { "epoch": 2.68, "learning_rate": 5.842417288200421e-08, "logits/chosen": -2.6971147060394287, "logits/rejected": -2.7128520011901855, "logps/chosen": -312.43505859375, "logps/rejected": -313.35479736328125, "loss": 0.0103, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 2.1101088523864746, "rewards/margins": 14.328615188598633, "rewards/rejected": -12.218506813049316, "step": 5200 }, { "epoch": 2.69, "learning_rate": 5.7467967106521317e-08, "logits/chosen": -2.677502155303955, "logits/rejected": -2.684664726257324, "logps/chosen": -221.14669799804688, "logps/rejected": -336.1509704589844, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": 2.3738515377044678, "rewards/margins": 18.37417221069336, "rewards/rejected": -16.000322341918945, "step": 5210 }, { "epoch": 2.69, "learning_rate": 5.651176133103844e-08, "logits/chosen": -2.7950339317321777, "logits/rejected": -2.7664475440979004, "logps/chosen": -280.81549072265625, "logps/rejected": -316.634521484375, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": 1.932924509048462, "rewards/margins": 14.996597290039062, "rewards/rejected": -13.063672065734863, "step": 5220 }, { "epoch": 2.7, "learning_rate": 5.555555555555555e-08, "logits/chosen": -2.6718757152557373, "logits/rejected": -2.7057576179504395, "logps/chosen": -278.4066162109375, "logps/rejected": -269.1131896972656, "loss": 0.0068, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.11279463768005371, "rewards/margins": 12.314489364624023, "rewards/rejected": -12.201693534851074, "step": 5230 }, { "epoch": 2.71, "learning_rate": 5.459934978007267e-08, "logits/chosen": -2.7089667320251465, "logits/rejected": -2.8040003776550293, "logps/chosen": -277.17437744140625, "logps/rejected": -312.9452209472656, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": 1.6362769603729248, "rewards/margins": 15.224557876586914, "rewards/rejected": -13.588282585144043, "step": 5240 }, { "epoch": 2.71, "learning_rate": 5.3643144004589786e-08, "logits/chosen": -2.6221039295196533, "logits/rejected": -2.5532572269439697, "logps/chosen": -252.72628784179688, "logps/rejected": -297.381103515625, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": 3.109865665435791, "rewards/margins": 16.99216079711914, "rewards/rejected": -13.882293701171875, "step": 5250 }, { "epoch": 2.72, "learning_rate": 5.26869382291069e-08, "logits/chosen": -2.7333858013153076, "logits/rejected": -2.7665257453918457, "logps/chosen": -237.46041870117188, "logps/rejected": -275.907470703125, "loss": 0.0222, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.37319669127464294, "rewards/margins": 11.739015579223633, "rewards/rejected": -11.365819931030273, "step": 5260 }, { "epoch": 2.72, "learning_rate": 5.173073245362402e-08, "logits/chosen": -2.548161745071411, "logits/rejected": -2.7022764682769775, "logps/chosen": -240.03506469726562, "logps/rejected": -316.1889953613281, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": 0.8135687112808228, "rewards/margins": 14.389727592468262, "rewards/rejected": -13.57615852355957, "step": 5270 }, { "epoch": 2.73, "learning_rate": 5.077452667814113e-08, "logits/chosen": -2.6491031646728516, "logits/rejected": -2.6999027729034424, "logps/chosen": -220.13034057617188, "logps/rejected": -269.79119873046875, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -0.7047170400619507, "rewards/margins": 13.21327018737793, "rewards/rejected": -13.917986869812012, "step": 5280 }, { "epoch": 2.73, "learning_rate": 4.981832090265825e-08, "logits/chosen": -2.7604708671569824, "logits/rejected": -2.7237050533294678, "logps/chosen": -268.9174499511719, "logps/rejected": -308.0942687988281, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": 2.1735785007476807, "rewards/margins": 16.48755645751953, "rewards/rejected": -14.313977241516113, "step": 5290 }, { "epoch": 2.74, "learning_rate": 4.8862115127175364e-08, "logits/chosen": -2.686573028564453, "logits/rejected": -2.807529926300049, "logps/chosen": -309.7162170410156, "logps/rejected": -307.1707458496094, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": 1.2362159490585327, "rewards/margins": 12.712292671203613, "rewards/rejected": -11.476076126098633, "step": 5300 }, { "epoch": 2.74, "learning_rate": 4.790590935169248e-08, "logits/chosen": -2.7090699672698975, "logits/rejected": -2.5909218788146973, "logps/chosen": -244.21267700195312, "logps/rejected": -422.15533447265625, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": 0.389432430267334, "rewards/margins": 15.106956481933594, "rewards/rejected": -14.717523574829102, "step": 5310 }, { "epoch": 2.75, "learning_rate": 4.69497035762096e-08, "logits/chosen": -2.621753454208374, "logits/rejected": -2.606581211090088, "logps/chosen": -291.43316650390625, "logps/rejected": -347.6919860839844, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -1.3517930507659912, "rewards/margins": 13.509759902954102, "rewards/rejected": -14.861552238464355, "step": 5320 }, { "epoch": 2.75, "learning_rate": 4.599349780072671e-08, "logits/chosen": -2.6323933601379395, "logits/rejected": -2.65653133392334, "logps/chosen": -311.70855712890625, "logps/rejected": -272.53302001953125, "loss": 0.0142, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.12222401052713394, "rewards/margins": 12.896278381347656, "rewards/rejected": -13.018501281738281, "step": 5330 }, { "epoch": 2.76, "learning_rate": 4.5037292025243834e-08, "logits/chosen": -2.6000657081604004, "logits/rejected": -2.6205499172210693, "logps/chosen": -250.4822235107422, "logps/rejected": -320.0833740234375, "loss": 0.0072, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.5791562795639038, "rewards/margins": 13.950708389282227, "rewards/rejected": -13.371553421020508, "step": 5340 }, { "epoch": 2.76, "learning_rate": 4.408108624976094e-08, "logits/chosen": -2.833369016647339, "logits/rejected": -2.6747612953186035, "logps/chosen": -281.80267333984375, "logps/rejected": -340.23828125, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": 1.0539871454238892, "rewards/margins": 16.2853946685791, "rewards/rejected": -15.231409072875977, "step": 5350 }, { "epoch": 2.77, "learning_rate": 4.3124880474278065e-08, "logits/chosen": -2.8220465183258057, "logits/rejected": -2.790334701538086, "logps/chosen": -293.7169494628906, "logps/rejected": -277.62127685546875, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": 1.2055577039718628, "rewards/margins": 13.435251235961914, "rewards/rejected": -12.229693412780762, "step": 5360 }, { "epoch": 2.77, "learning_rate": 4.2168674698795174e-08, "logits/chosen": -2.7694520950317383, "logits/rejected": -2.808863878250122, "logps/chosen": -226.52627563476562, "logps/rejected": -304.43389892578125, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": 0.23372545838356018, "rewards/margins": 13.936861991882324, "rewards/rejected": -13.70313549041748, "step": 5370 }, { "epoch": 2.78, "learning_rate": 4.1212468923312296e-08, "logits/chosen": -2.6227002143859863, "logits/rejected": -2.589164972305298, "logps/chosen": -232.10678100585938, "logps/rejected": -308.0304870605469, "loss": 0.0116, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.9900895357131958, "rewards/margins": 11.820328712463379, "rewards/rejected": -12.810419082641602, "step": 5380 }, { "epoch": 2.78, "learning_rate": 4.025626314782941e-08, "logits/chosen": -2.654756784439087, "logits/rejected": -2.7141530513763428, "logps/chosen": -248.1104736328125, "logps/rejected": -272.46295166015625, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": 2.5592339038848877, "rewards/margins": 15.954304695129395, "rewards/rejected": -13.395071029663086, "step": 5390 }, { "epoch": 2.79, "learning_rate": 3.930005737234653e-08, "logits/chosen": -2.6779532432556152, "logits/rejected": -2.731102228164673, "logps/chosen": -228.0155029296875, "logps/rejected": -250.24398803710938, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": 0.5090616345405579, "rewards/margins": 12.24303150177002, "rewards/rejected": -11.73397159576416, "step": 5400 }, { "epoch": 2.79, "learning_rate": 3.8343851596863644e-08, "logits/chosen": -2.699240207672119, "logits/rejected": -2.6759274005889893, "logps/chosen": -265.9540100097656, "logps/rejected": -252.60317993164062, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": 1.9274028539657593, "rewards/margins": 13.91094970703125, "rewards/rejected": -11.983546257019043, "step": 5410 }, { "epoch": 2.8, "learning_rate": 3.738764582138076e-08, "logits/chosen": -2.7132983207702637, "logits/rejected": -2.699207067489624, "logps/chosen": -306.46649169921875, "logps/rejected": -347.5021057128906, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": 0.05111388489603996, "rewards/margins": 13.25873851776123, "rewards/rejected": -13.207626342773438, "step": 5420 }, { "epoch": 2.8, "learning_rate": 3.6431440045897875e-08, "logits/chosen": -2.632061243057251, "logits/rejected": -2.628340721130371, "logps/chosen": -310.02984619140625, "logps/rejected": -364.7181701660156, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": 2.396155595779419, "rewards/margins": 15.769102096557617, "rewards/rejected": -13.372945785522461, "step": 5430 }, { "epoch": 2.81, "learning_rate": 3.547523427041499e-08, "logits/chosen": -2.7925541400909424, "logits/rejected": -2.7636916637420654, "logps/chosen": -307.3076171875, "logps/rejected": -317.149169921875, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": 2.418766975402832, "rewards/margins": 15.514017105102539, "rewards/rejected": -13.095250129699707, "step": 5440 }, { "epoch": 2.81, "learning_rate": 3.4519028494932106e-08, "logits/chosen": -2.6972720623016357, "logits/rejected": -2.6307532787323, "logps/chosen": -246.3926239013672, "logps/rejected": -326.93157958984375, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": 0.45250099897384644, "rewards/margins": 13.748095512390137, "rewards/rejected": -13.29559326171875, "step": 5450 }, { "epoch": 2.82, "learning_rate": 3.356282271944923e-08, "logits/chosen": -2.688779354095459, "logits/rejected": -2.712975263595581, "logps/chosen": -234.59335327148438, "logps/rejected": -302.8453674316406, "loss": 0.0163, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.1471259593963623, "rewards/margins": 15.453851699829102, "rewards/rejected": -13.306724548339844, "step": 5460 }, { "epoch": 2.82, "learning_rate": 3.260661694396634e-08, "logits/chosen": -2.756401538848877, "logits/rejected": -2.809408664703369, "logps/chosen": -336.09332275390625, "logps/rejected": -258.19744873046875, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": 0.9356826543807983, "rewards/margins": 14.006448745727539, "rewards/rejected": -13.070767402648926, "step": 5470 }, { "epoch": 2.83, "learning_rate": 3.165041116848346e-08, "logits/chosen": -2.7278525829315186, "logits/rejected": -2.6752920150756836, "logps/chosen": -236.9536590576172, "logps/rejected": -334.90704345703125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 1.1287994384765625, "rewards/margins": 14.35100269317627, "rewards/rejected": -13.222201347351074, "step": 5480 }, { "epoch": 2.83, "learning_rate": 3.0694205393000576e-08, "logits/chosen": -2.6900107860565186, "logits/rejected": -2.6366961002349854, "logps/chosen": -231.33364868164062, "logps/rejected": -267.84808349609375, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": 3.4923148155212402, "rewards/margins": 15.316784858703613, "rewards/rejected": -11.824469566345215, "step": 5490 }, { "epoch": 2.84, "learning_rate": 2.9737999617517688e-08, "logits/chosen": -2.707181692123413, "logits/rejected": -2.603635787963867, "logps/chosen": -254.90615844726562, "logps/rejected": -268.971435546875, "loss": 0.0094, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.9463617205619812, "rewards/margins": 13.233505249023438, "rewards/rejected": -12.287142753601074, "step": 5500 }, { "epoch": 2.84, "eval_logits/chosen": -2.7012245655059814, "eval_logits/rejected": -2.698698043823242, "eval_logps/chosen": -314.4378356933594, "eval_logps/rejected": -282.4463806152344, "eval_loss": 0.7952748537063599, "eval_rewards/accuracies": 0.7579365372657776, "eval_rewards/chosen": -3.189682722091675, "eval_rewards/margins": 4.111170768737793, "eval_rewards/rejected": -7.3008527755737305, "eval_runtime": 217.7034, "eval_samples_per_second": 9.187, "eval_steps_per_second": 0.289, "step": 5500 }, { "epoch": 2.84, "learning_rate": 2.8781793842034804e-08, "logits/chosen": -2.605278491973877, "logits/rejected": -2.5629143714904785, "logps/chosen": -250.08090209960938, "logps/rejected": -277.3458557128906, "loss": 0.0147, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.06431760638952255, "rewards/margins": 13.18347454071045, "rewards/rejected": -13.247793197631836, "step": 5510 }, { "epoch": 2.85, "learning_rate": 2.782558806655192e-08, "logits/chosen": -2.6917879581451416, "logits/rejected": -2.7281289100646973, "logps/chosen": -289.0796203613281, "logps/rejected": -350.89227294921875, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": 1.8091856241226196, "rewards/margins": 14.719850540161133, "rewards/rejected": -12.910664558410645, "step": 5520 }, { "epoch": 2.85, "learning_rate": 2.6869382291069035e-08, "logits/chosen": -2.7241485118865967, "logits/rejected": -2.7581992149353027, "logps/chosen": -277.55157470703125, "logps/rejected": -296.58648681640625, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": 0.336979478597641, "rewards/margins": 13.269563674926758, "rewards/rejected": -12.932583808898926, "step": 5530 }, { "epoch": 2.86, "learning_rate": 2.591317651558615e-08, "logits/chosen": -2.6184306144714355, "logits/rejected": -2.6479744911193848, "logps/chosen": -243.2447509765625, "logps/rejected": -317.91619873046875, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": 0.5495041608810425, "rewards/margins": 13.560381889343262, "rewards/rejected": -13.01087760925293, "step": 5540 }, { "epoch": 2.87, "learning_rate": 2.4956970740103267e-08, "logits/chosen": -2.707151412963867, "logits/rejected": -2.621060609817505, "logps/chosen": -282.298095703125, "logps/rejected": -377.27313232421875, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": 2.2465739250183105, "rewards/margins": 17.916969299316406, "rewards/rejected": -15.67039680480957, "step": 5550 }, { "epoch": 2.87, "learning_rate": 2.4000764964620386e-08, "logits/chosen": -2.7057225704193115, "logits/rejected": -2.7142810821533203, "logps/chosen": -342.3331298828125, "logps/rejected": -351.43023681640625, "loss": 0.0102, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.6923713684082031, "rewards/margins": 11.715002059936523, "rewards/rejected": -11.022631645202637, "step": 5560 }, { "epoch": 2.88, "learning_rate": 2.30445591891375e-08, "logits/chosen": -2.7297680377960205, "logits/rejected": -2.671332836151123, "logps/chosen": -309.3148498535156, "logps/rejected": -334.22381591796875, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": 2.074737071990967, "rewards/margins": 16.295886993408203, "rewards/rejected": -14.221150398254395, "step": 5570 }, { "epoch": 2.88, "learning_rate": 2.2088353413654617e-08, "logits/chosen": -2.514273166656494, "logits/rejected": -2.5561318397521973, "logps/chosen": -222.4387969970703, "logps/rejected": -260.14617919921875, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": 2.7343811988830566, "rewards/margins": 16.719032287597656, "rewards/rejected": -13.984651565551758, "step": 5580 }, { "epoch": 2.89, "learning_rate": 2.1132147638171733e-08, "logits/chosen": -2.562824249267578, "logits/rejected": -2.6268982887268066, "logps/chosen": -324.80877685546875, "logps/rejected": -313.646484375, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": 3.58955717086792, "rewards/margins": 17.370824813842773, "rewards/rejected": -13.781267166137695, "step": 5590 }, { "epoch": 2.89, "learning_rate": 2.0175941862688848e-08, "logits/chosen": -2.6945369243621826, "logits/rejected": -2.6687333583831787, "logps/chosen": -249.3484344482422, "logps/rejected": -313.9985656738281, "loss": 0.0538, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.41767793893814087, "rewards/margins": 13.5536527633667, "rewards/rejected": -13.13597583770752, "step": 5600 }, { "epoch": 2.9, "learning_rate": 1.9219736087205964e-08, "logits/chosen": -2.642052173614502, "logits/rejected": -2.716784715652466, "logps/chosen": -241.4844970703125, "logps/rejected": -301.21759033203125, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": 2.055972099304199, "rewards/margins": 14.421625137329102, "rewards/rejected": -12.365653038024902, "step": 5610 }, { "epoch": 2.9, "learning_rate": 1.826353031172308e-08, "logits/chosen": -2.6660943031311035, "logits/rejected": -2.667335033416748, "logps/chosen": -356.5085754394531, "logps/rejected": -324.35858154296875, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": 0.6099711060523987, "rewards/margins": 12.738626480102539, "rewards/rejected": -12.128656387329102, "step": 5620 }, { "epoch": 2.91, "learning_rate": 1.73073245362402e-08, "logits/chosen": -2.624074935913086, "logits/rejected": -2.5837934017181396, "logps/chosen": -310.8951721191406, "logps/rejected": -266.6996154785156, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": 1.0079091787338257, "rewards/margins": 14.171859741210938, "rewards/rejected": -13.163949966430664, "step": 5630 }, { "epoch": 2.91, "learning_rate": 1.6351118760757314e-08, "logits/chosen": -2.663119077682495, "logits/rejected": -2.6472928524017334, "logps/chosen": -273.03973388671875, "logps/rejected": -245.8977813720703, "loss": 0.0165, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.0619621276855469, "rewards/margins": 12.637825012207031, "rewards/rejected": -11.575862884521484, "step": 5640 }, { "epoch": 2.92, "learning_rate": 1.539491298527443e-08, "logits/chosen": -2.67164945602417, "logits/rejected": -2.636287212371826, "logps/chosen": -229.2053680419922, "logps/rejected": -299.718994140625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 0.481825590133667, "rewards/margins": 13.588674545288086, "rewards/rejected": -13.106849670410156, "step": 5650 }, { "epoch": 2.92, "learning_rate": 1.4438707209791546e-08, "logits/chosen": -2.6152384281158447, "logits/rejected": -2.8120269775390625, "logps/chosen": -359.27978515625, "logps/rejected": -286.0852966308594, "loss": 0.0174, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.058980107307434, "rewards/margins": 12.662736892700195, "rewards/rejected": -11.60375690460205, "step": 5660 }, { "epoch": 2.93, "learning_rate": 1.3482501434308661e-08, "logits/chosen": -2.5586142539978027, "logits/rejected": -2.6513123512268066, "logps/chosen": -300.5698547363281, "logps/rejected": -245.2171173095703, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": 1.214424967765808, "rewards/margins": 12.673861503601074, "rewards/rejected": -11.459436416625977, "step": 5670 }, { "epoch": 2.93, "learning_rate": 1.2526295658825777e-08, "logits/chosen": -2.7828521728515625, "logits/rejected": -2.752152919769287, "logps/chosen": -265.4630126953125, "logps/rejected": -351.28350830078125, "loss": 0.0176, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.451908826828003, "rewards/margins": 15.984410285949707, "rewards/rejected": -14.532501220703125, "step": 5680 }, { "epoch": 2.94, "learning_rate": 1.1570089883342895e-08, "logits/chosen": -2.6207876205444336, "logits/rejected": -2.6226754188537598, "logps/chosen": -309.70770263671875, "logps/rejected": -365.6626892089844, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": 1.7145103216171265, "rewards/margins": 16.66482925415039, "rewards/rejected": -14.950319290161133, "step": 5690 }, { "epoch": 2.94, "learning_rate": 1.061388410786001e-08, "logits/chosen": -2.6478333473205566, "logits/rejected": -2.686779022216797, "logps/chosen": -259.3996887207031, "logps/rejected": -246.5650634765625, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": 1.815895438194275, "rewards/margins": 12.701074600219727, "rewards/rejected": -10.88517951965332, "step": 5700 }, { "epoch": 2.95, "learning_rate": 9.657678332377126e-09, "logits/chosen": -2.6393017768859863, "logits/rejected": -2.7247872352600098, "logps/chosen": -262.43060302734375, "logps/rejected": -258.43292236328125, "loss": 0.011, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.0494627952575684, "rewards/margins": 14.03632640838623, "rewards/rejected": -11.98686408996582, "step": 5710 }, { "epoch": 2.95, "learning_rate": 8.701472556894243e-09, "logits/chosen": -2.6285250186920166, "logits/rejected": -2.681663990020752, "logps/chosen": -244.53811645507812, "logps/rejected": -280.70391845703125, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.3365360200405121, "rewards/margins": 13.008634567260742, "rewards/rejected": -13.345170974731445, "step": 5720 }, { "epoch": 2.96, "learning_rate": 7.745266781411359e-09, "logits/chosen": -2.69207501411438, "logits/rejected": -2.71191668510437, "logps/chosen": -271.1983947753906, "logps/rejected": -309.11407470703125, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.11351821571588516, "rewards/margins": 13.056490898132324, "rewards/rejected": -13.170010566711426, "step": 5730 }, { "epoch": 2.96, "learning_rate": 6.7890610059284754e-09, "logits/chosen": -2.6312122344970703, "logits/rejected": -2.6347126960754395, "logps/chosen": -217.0697784423828, "logps/rejected": -255.55484008789062, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": 1.3219164609909058, "rewards/margins": 12.84302043914795, "rewards/rejected": -11.52110481262207, "step": 5740 }, { "epoch": 2.97, "learning_rate": 5.832855230445592e-09, "logits/chosen": -2.6520233154296875, "logits/rejected": -2.6504294872283936, "logps/chosen": -268.7332763671875, "logps/rejected": -300.84686279296875, "loss": 0.0121, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 2.144339084625244, "rewards/margins": 15.850810050964355, "rewards/rejected": -13.706469535827637, "step": 5750 }, { "epoch": 2.97, "learning_rate": 4.8766494549627085e-09, "logits/chosen": -2.6992509365081787, "logits/rejected": -2.681377410888672, "logps/chosen": -273.0556335449219, "logps/rejected": -297.84661865234375, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": 3.6503357887268066, "rewards/margins": 17.637630462646484, "rewards/rejected": -13.987295150756836, "step": 5760 }, { "epoch": 2.98, "learning_rate": 3.920443679479824e-09, "logits/chosen": -2.778294086456299, "logits/rejected": -2.667935609817505, "logps/chosen": -249.29043579101562, "logps/rejected": -267.514404296875, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": 0.5016997456550598, "rewards/margins": 12.95671272277832, "rewards/rejected": -12.455013275146484, "step": 5770 }, { "epoch": 2.98, "learning_rate": 2.96423790399694e-09, "logits/chosen": -2.723689317703247, "logits/rejected": -2.811107635498047, "logps/chosen": -278.9635925292969, "logps/rejected": -307.65924072265625, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": 0.774082362651825, "rewards/margins": 12.066235542297363, "rewards/rejected": -11.292154312133789, "step": 5780 }, { "epoch": 2.99, "learning_rate": 2.008032128514056e-09, "logits/chosen": -2.688815116882324, "logits/rejected": -2.7489705085754395, "logps/chosen": -284.84881591796875, "logps/rejected": -345.14422607421875, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": 0.01807079277932644, "rewards/margins": 12.609796524047852, "rewards/rejected": -12.591726303100586, "step": 5790 }, { "epoch": 2.99, "learning_rate": 1.0518263530311723e-09, "logits/chosen": -2.7407386302948, "logits/rejected": -2.794053554534912, "logps/chosen": -225.2655029296875, "logps/rejected": -304.1062316894531, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": 1.2420426607131958, "rewards/margins": 12.693281173706055, "rewards/rejected": -11.451238632202148, "step": 5800 }, { "epoch": 3.0, "learning_rate": 9.562057754828839e-11, "logits/chosen": -2.6969985961914062, "logits/rejected": -2.7415974140167236, "logps/chosen": -257.86944580078125, "logps/rejected": -356.72320556640625, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": 1.3919858932495117, "rewards/margins": 12.642386436462402, "rewards/rejected": -11.250399589538574, "step": 5810 }, { "epoch": 3.0, "step": 5811, "total_flos": 0.0, "train_loss": 0.22832600153418584, "train_runtime": 42683.4293, "train_samples_per_second": 4.355, "train_steps_per_second": 0.136 } ], "logging_steps": 10, "max_steps": 5811, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }