zephyr-7b-dpo-lora / trainer_state.json
wenzw's picture
Model save
f0b2539
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9993222089532967,
"eval_steps": 100,
"global_step": 2904,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.7182130584192438e-09,
"logits/chosen": -2.447075843811035,
"logits/rejected": -2.526996612548828,
"logps/chosen": -235.39663696289062,
"logps/rejected": -214.08815002441406,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 1.718213058419244e-08,
"logits/chosen": -2.487886667251587,
"logits/rejected": -2.427130699157715,
"logps/chosen": -280.10888671875,
"logps/rejected": -230.16168212890625,
"loss": 0.691,
"rewards/accuracies": 0.4722222089767456,
"rewards/chosen": 0.0025838064029812813,
"rewards/margins": 0.0049818274565041065,
"rewards/rejected": -0.0023980215191841125,
"step": 10
},
{
"epoch": 0.02,
"learning_rate": 3.436426116838488e-08,
"logits/chosen": -2.41877818107605,
"logits/rejected": -2.356771230697632,
"logps/chosen": -255.56265258789062,
"logps/rejected": -226.37399291992188,
"loss": 0.6932,
"rewards/accuracies": 0.503125011920929,
"rewards/chosen": 0.001528903958387673,
"rewards/margins": 0.0006666237604804337,
"rewards/rejected": 0.0008622803725302219,
"step": 20
},
{
"epoch": 0.03,
"learning_rate": 5.154639175257731e-08,
"logits/chosen": -2.42828369140625,
"logits/rejected": -2.4059910774230957,
"logps/chosen": -272.57012939453125,
"logps/rejected": -227.35250854492188,
"loss": 0.6945,
"rewards/accuracies": 0.4921875,
"rewards/chosen": -0.001070805243216455,
"rewards/margins": -0.0018140410538762808,
"rewards/rejected": 0.000743235694244504,
"step": 30
},
{
"epoch": 0.04,
"learning_rate": 6.872852233676976e-08,
"logits/chosen": -2.425325870513916,
"logits/rejected": -2.374124050140381,
"logps/chosen": -249.1795654296875,
"logps/rejected": -220.6439971923828,
"loss": 0.6935,
"rewards/accuracies": 0.520312488079071,
"rewards/chosen": 0.0025015759747475386,
"rewards/margins": 8.866000280249864e-05,
"rewards/rejected": 0.0024129163939505816,
"step": 40
},
{
"epoch": 0.05,
"learning_rate": 8.59106529209622e-08,
"logits/chosen": -2.4614310264587402,
"logits/rejected": -2.416882038116455,
"logps/chosen": -259.7109680175781,
"logps/rejected": -220.2974090576172,
"loss": 0.6917,
"rewards/accuracies": 0.5078125,
"rewards/chosen": 0.0015415346715599298,
"rewards/margins": 0.003707319498062134,
"rewards/rejected": -0.0021657845936715603,
"step": 50
},
{
"epoch": 0.06,
"learning_rate": 1.0309278350515462e-07,
"logits/chosen": -2.462627649307251,
"logits/rejected": -2.4049839973449707,
"logps/chosen": -259.0118713378906,
"logps/rejected": -228.43917846679688,
"loss": 0.6927,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": 0.002671582391485572,
"rewards/margins": 0.0019277830142527819,
"rewards/rejected": 0.0007437997264787555,
"step": 60
},
{
"epoch": 0.07,
"learning_rate": 1.202749140893471e-07,
"logits/chosen": -2.4417717456817627,
"logits/rejected": -2.4220786094665527,
"logps/chosen": -267.39825439453125,
"logps/rejected": -210.96157836914062,
"loss": 0.692,
"rewards/accuracies": 0.503125011920929,
"rewards/chosen": 0.001063968287780881,
"rewards/margins": 0.002977523719891906,
"rewards/rejected": -0.001913555315695703,
"step": 70
},
{
"epoch": 0.08,
"learning_rate": 1.3745704467353952e-07,
"logits/chosen": -2.453876495361328,
"logits/rejected": -2.3886351585388184,
"logps/chosen": -280.5273132324219,
"logps/rejected": -225.0200653076172,
"loss": 0.6929,
"rewards/accuracies": 0.534375011920929,
"rewards/chosen": 0.0006787125021219254,
"rewards/margins": 0.0013104949612170458,
"rewards/rejected": -0.0006317828083410859,
"step": 80
},
{
"epoch": 0.09,
"learning_rate": 1.5463917525773197e-07,
"logits/chosen": -2.4767956733703613,
"logits/rejected": -2.3978798389434814,
"logps/chosen": -271.4781799316406,
"logps/rejected": -231.6018524169922,
"loss": 0.6932,
"rewards/accuracies": 0.503125011920929,
"rewards/chosen": -0.00017936174117494375,
"rewards/margins": 0.0006834475207142532,
"rewards/rejected": -0.0008628091891296208,
"step": 90
},
{
"epoch": 0.1,
"learning_rate": 1.718213058419244e-07,
"logits/chosen": -2.4933345317840576,
"logits/rejected": -2.397916555404663,
"logps/chosen": -265.00872802734375,
"logps/rejected": -215.407470703125,
"loss": 0.6917,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.0026833172887563705,
"rewards/margins": 0.003812385257333517,
"rewards/rejected": -0.0011290680849924684,
"step": 100
},
{
"epoch": 0.11,
"learning_rate": 1.8900343642611682e-07,
"logits/chosen": -2.4396605491638184,
"logits/rejected": -2.366703748703003,
"logps/chosen": -283.7935791015625,
"logps/rejected": -214.5601806640625,
"loss": 0.6894,
"rewards/accuracies": 0.559374988079071,
"rewards/chosen": 0.0025542343501001596,
"rewards/margins": 0.00826872419565916,
"rewards/rejected": -0.005714490078389645,
"step": 110
},
{
"epoch": 0.12,
"learning_rate": 2.0618556701030925e-07,
"logits/chosen": -2.4569156169891357,
"logits/rejected": -2.429029703140259,
"logps/chosen": -271.7438049316406,
"logps/rejected": -229.4224395751953,
"loss": 0.6882,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.004260816611349583,
"rewards/margins": 0.010780954733490944,
"rewards/rejected": -0.0065201385878026485,
"step": 120
},
{
"epoch": 0.13,
"learning_rate": 2.2336769759450173e-07,
"logits/chosen": -2.452051877975464,
"logits/rejected": -2.3855373859405518,
"logps/chosen": -267.55743408203125,
"logps/rejected": -212.14273071289062,
"loss": 0.691,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.0025894823484122753,
"rewards/margins": 0.005025609862059355,
"rewards/rejected": -0.0024361279793083668,
"step": 130
},
{
"epoch": 0.14,
"learning_rate": 2.405498281786942e-07,
"logits/chosen": -2.4718971252441406,
"logits/rejected": -2.417950391769409,
"logps/chosen": -274.26593017578125,
"logps/rejected": -212.1128692626953,
"loss": 0.6894,
"rewards/accuracies": 0.5406249761581421,
"rewards/chosen": 0.0036728009581565857,
"rewards/margins": 0.008318398147821426,
"rewards/rejected": -0.004645597655326128,
"step": 140
},
{
"epoch": 0.15,
"learning_rate": 2.5773195876288655e-07,
"logits/chosen": -2.419431209564209,
"logits/rejected": -2.3849945068359375,
"logps/chosen": -250.10806274414062,
"logps/rejected": -210.3776397705078,
"loss": 0.6897,
"rewards/accuracies": 0.542187511920929,
"rewards/chosen": 0.0029598295222967863,
"rewards/margins": 0.007620878517627716,
"rewards/rejected": -0.004661048296838999,
"step": 150
},
{
"epoch": 0.17,
"learning_rate": 2.7491408934707903e-07,
"logits/chosen": -2.4403343200683594,
"logits/rejected": -2.378030776977539,
"logps/chosen": -267.47332763671875,
"logps/rejected": -218.4069061279297,
"loss": 0.6891,
"rewards/accuracies": 0.546875,
"rewards/chosen": 0.0031638103537261486,
"rewards/margins": 0.009145173244178295,
"rewards/rejected": -0.0059813628904521465,
"step": 160
},
{
"epoch": 0.18,
"learning_rate": 2.9209621993127146e-07,
"logits/chosen": -2.4039931297302246,
"logits/rejected": -2.3714652061462402,
"logps/chosen": -277.943359375,
"logps/rejected": -221.7199249267578,
"loss": 0.6873,
"rewards/accuracies": 0.5484374761581421,
"rewards/chosen": 0.008871063590049744,
"rewards/margins": 0.012961235828697681,
"rewards/rejected": -0.004090171307325363,
"step": 170
},
{
"epoch": 0.19,
"learning_rate": 3.0927835051546394e-07,
"logits/chosen": -2.41255784034729,
"logits/rejected": -2.382023572921753,
"logps/chosen": -271.4554443359375,
"logps/rejected": -226.9301300048828,
"loss": 0.6874,
"rewards/accuracies": 0.559374988079071,
"rewards/chosen": 0.005444863811135292,
"rewards/margins": 0.01286339946091175,
"rewards/rejected": -0.007418536581099033,
"step": 180
},
{
"epoch": 0.2,
"learning_rate": 3.2646048109965636e-07,
"logits/chosen": -2.4459285736083984,
"logits/rejected": -2.394118547439575,
"logps/chosen": -276.55389404296875,
"logps/rejected": -222.62655639648438,
"loss": 0.6821,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.01423065084964037,
"rewards/margins": 0.02362729236483574,
"rewards/rejected": -0.009396640583872795,
"step": 190
},
{
"epoch": 0.21,
"learning_rate": 3.436426116838488e-07,
"logits/chosen": -2.4238436222076416,
"logits/rejected": -2.393543243408203,
"logps/chosen": -249.68899536132812,
"logps/rejected": -214.36233520507812,
"loss": 0.6785,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.014417588710784912,
"rewards/margins": 0.0309614147990942,
"rewards/rejected": -0.01654382422566414,
"step": 200
},
{
"epoch": 0.22,
"learning_rate": 3.608247422680412e-07,
"logits/chosen": -2.4502434730529785,
"logits/rejected": -2.4075448513031006,
"logps/chosen": -270.61175537109375,
"logps/rejected": -235.2810516357422,
"loss": 0.6815,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": 0.01293298788368702,
"rewards/margins": 0.025450533255934715,
"rewards/rejected": -0.01251754630357027,
"step": 210
},
{
"epoch": 0.23,
"learning_rate": 3.7800687285223364e-07,
"logits/chosen": -2.414132595062256,
"logits/rejected": -2.364130735397339,
"logps/chosen": -263.3313903808594,
"logps/rejected": -219.0230712890625,
"loss": 0.6793,
"rewards/accuracies": 0.604687511920929,
"rewards/chosen": 0.014896327629685402,
"rewards/margins": 0.030013080686330795,
"rewards/rejected": -0.015116755850613117,
"step": 220
},
{
"epoch": 0.24,
"learning_rate": 3.9518900343642607e-07,
"logits/chosen": -2.4107182025909424,
"logits/rejected": -2.3757405281066895,
"logps/chosen": -273.1572265625,
"logps/rejected": -231.4423065185547,
"loss": 0.6761,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.015099003911018372,
"rewards/margins": 0.037129949778318405,
"rewards/rejected": -0.022030945867300034,
"step": 230
},
{
"epoch": 0.25,
"learning_rate": 4.123711340206185e-07,
"logits/chosen": -2.4387900829315186,
"logits/rejected": -2.396888256072998,
"logps/chosen": -271.6656799316406,
"logps/rejected": -233.677734375,
"loss": 0.6727,
"rewards/accuracies": 0.65625,
"rewards/chosen": 0.021000446751713753,
"rewards/margins": 0.04467698931694031,
"rewards/rejected": -0.023676546290516853,
"step": 240
},
{
"epoch": 0.26,
"learning_rate": 4.2955326460481097e-07,
"logits/chosen": -2.431246519088745,
"logits/rejected": -2.461184501647949,
"logps/chosen": -264.908447265625,
"logps/rejected": -225.65451049804688,
"loss": 0.6699,
"rewards/accuracies": 0.6468750238418579,
"rewards/chosen": 0.022870570421218872,
"rewards/margins": 0.05118563771247864,
"rewards/rejected": -0.028315063565969467,
"step": 250
},
{
"epoch": 0.27,
"learning_rate": 4.4673539518900345e-07,
"logits/chosen": -2.409027576446533,
"logits/rejected": -2.4082815647125244,
"logps/chosen": -249.64242553710938,
"logps/rejected": -204.5191650390625,
"loss": 0.6666,
"rewards/accuracies": 0.676562488079071,
"rewards/chosen": 0.02077900990843773,
"rewards/margins": 0.05811852216720581,
"rewards/rejected": -0.03733951598405838,
"step": 260
},
{
"epoch": 0.28,
"learning_rate": 4.639175257731959e-07,
"logits/chosen": -2.4640724658966064,
"logits/rejected": -2.438767910003662,
"logps/chosen": -281.8011169433594,
"logps/rejected": -224.46932983398438,
"loss": 0.6628,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.025280708447098732,
"rewards/margins": 0.06713660806417465,
"rewards/rejected": -0.04185590520501137,
"step": 270
},
{
"epoch": 0.29,
"learning_rate": 4.810996563573884e-07,
"logits/chosen": -2.441326141357422,
"logits/rejected": -2.3782386779785156,
"logps/chosen": -266.28228759765625,
"logps/rejected": -217.6759796142578,
"loss": 0.6539,
"rewards/accuracies": 0.6656249761581421,
"rewards/chosen": 0.02818796969950199,
"rewards/margins": 0.08737680315971375,
"rewards/rejected": -0.059188831597566605,
"step": 280
},
{
"epoch": 0.3,
"learning_rate": 4.982817869415807e-07,
"logits/chosen": -2.4530272483825684,
"logits/rejected": -2.4197421073913574,
"logps/chosen": -251.4274444580078,
"logps/rejected": -206.58395385742188,
"loss": 0.6597,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": 0.019450683146715164,
"rewards/margins": 0.07725103944540024,
"rewards/rejected": -0.05780036002397537,
"step": 290
},
{
"epoch": 0.31,
"learning_rate": 4.982778415614236e-07,
"logits/chosen": -2.4357409477233887,
"logits/rejected": -2.401296615600586,
"logps/chosen": -258.9688415527344,
"logps/rejected": -214.4955291748047,
"loss": 0.6529,
"rewards/accuracies": 0.6796875,
"rewards/chosen": 0.02522132731974125,
"rewards/margins": 0.09243801981210709,
"rewards/rejected": -0.06721669435501099,
"step": 300
},
{
"epoch": 0.32,
"learning_rate": 4.963643321852277e-07,
"logits/chosen": -2.433469772338867,
"logits/rejected": -2.397340774536133,
"logps/chosen": -264.56365966796875,
"logps/rejected": -223.6669464111328,
"loss": 0.6494,
"rewards/accuracies": 0.690625011920929,
"rewards/chosen": 0.027543241158127785,
"rewards/margins": 0.10247315466403961,
"rewards/rejected": -0.07492991536855698,
"step": 310
},
{
"epoch": 0.33,
"learning_rate": 4.944508228090318e-07,
"logits/chosen": -2.4279608726501465,
"logits/rejected": -2.383455514907837,
"logps/chosen": -268.522216796875,
"logps/rejected": -215.8023223876953,
"loss": 0.643,
"rewards/accuracies": 0.723437488079071,
"rewards/chosen": 0.032382432371377945,
"rewards/margins": 0.11742101609706879,
"rewards/rejected": -0.08503858745098114,
"step": 320
},
{
"epoch": 0.34,
"learning_rate": 4.925373134328357e-07,
"logits/chosen": -2.483980178833008,
"logits/rejected": -2.4091663360595703,
"logps/chosen": -266.2663879394531,
"logps/rejected": -230.7337188720703,
"loss": 0.6403,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.03822886198759079,
"rewards/margins": 0.12609949707984924,
"rewards/rejected": -0.08787062764167786,
"step": 330
},
{
"epoch": 0.35,
"learning_rate": 4.906238040566398e-07,
"logits/chosen": -2.437373161315918,
"logits/rejected": -2.3692476749420166,
"logps/chosen": -252.1580047607422,
"logps/rejected": -221.46554565429688,
"loss": 0.6414,
"rewards/accuracies": 0.682812511920929,
"rewards/chosen": 0.034671518951654434,
"rewards/margins": 0.12736742198467255,
"rewards/rejected": -0.09269589185714722,
"step": 340
},
{
"epoch": 0.36,
"learning_rate": 4.887102946804438e-07,
"logits/chosen": -2.457171678543091,
"logits/rejected": -2.3946237564086914,
"logps/chosen": -263.380615234375,
"logps/rejected": -218.726318359375,
"loss": 0.6377,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.023257676512002945,
"rewards/margins": 0.13810031116008759,
"rewards/rejected": -0.11484263837337494,
"step": 350
},
{
"epoch": 0.37,
"learning_rate": 4.867967853042479e-07,
"logits/chosen": -2.4557504653930664,
"logits/rejected": -2.4013724327087402,
"logps/chosen": -267.2643737792969,
"logps/rejected": -222.85366821289062,
"loss": 0.6286,
"rewards/accuracies": 0.6484375,
"rewards/chosen": 0.03796042129397392,
"rewards/margins": 0.160946324467659,
"rewards/rejected": -0.12298589944839478,
"step": 360
},
{
"epoch": 0.38,
"learning_rate": 4.84883275928052e-07,
"logits/chosen": -2.4332971572875977,
"logits/rejected": -2.421247959136963,
"logps/chosen": -266.8581237792969,
"logps/rejected": -235.67788696289062,
"loss": 0.6366,
"rewards/accuracies": 0.6734374761581421,
"rewards/chosen": 0.01746644265949726,
"rewards/margins": 0.14841753244400024,
"rewards/rejected": -0.13095109164714813,
"step": 370
},
{
"epoch": 0.39,
"learning_rate": 4.82969766551856e-07,
"logits/chosen": -2.417196750640869,
"logits/rejected": -2.37961483001709,
"logps/chosen": -261.7236633300781,
"logps/rejected": -229.08639526367188,
"loss": 0.6354,
"rewards/accuracies": 0.651562511920929,
"rewards/chosen": 0.01633612811565399,
"rewards/margins": 0.1533532738685608,
"rewards/rejected": -0.1370171457529068,
"step": 380
},
{
"epoch": 0.4,
"learning_rate": 4.810562571756601e-07,
"logits/chosen": -2.4581520557403564,
"logits/rejected": -2.3880105018615723,
"logps/chosen": -263.3890686035156,
"logps/rejected": -218.2093505859375,
"loss": 0.6132,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": 0.046173859387636185,
"rewards/margins": 0.2001974880695343,
"rewards/rejected": -0.15402361750602722,
"step": 390
},
{
"epoch": 0.41,
"learning_rate": 4.791427477994642e-07,
"logits/chosen": -2.4509260654449463,
"logits/rejected": -2.4113948345184326,
"logps/chosen": -270.0736083984375,
"logps/rejected": -221.9901123046875,
"loss": 0.6236,
"rewards/accuracies": 0.692187488079071,
"rewards/chosen": 0.04222818464040756,
"rewards/margins": 0.1874697059392929,
"rewards/rejected": -0.14524152874946594,
"step": 400
},
{
"epoch": 0.42,
"learning_rate": 4.772292384232682e-07,
"logits/chosen": -2.4471030235290527,
"logits/rejected": -2.4141643047332764,
"logps/chosen": -261.27337646484375,
"logps/rejected": -230.60299682617188,
"loss": 0.6294,
"rewards/accuracies": 0.6546875238418579,
"rewards/chosen": 0.021040040999650955,
"rewards/margins": 0.18236112594604492,
"rewards/rejected": -0.16132107377052307,
"step": 410
},
{
"epoch": 0.43,
"learning_rate": 4.753157290470723e-07,
"logits/chosen": -2.460665464401245,
"logits/rejected": -2.4335570335388184,
"logps/chosen": -276.3302917480469,
"logps/rejected": -226.70639038085938,
"loss": 0.6125,
"rewards/accuracies": 0.6859375238418579,
"rewards/chosen": 0.026431281119585037,
"rewards/margins": 0.2242995798587799,
"rewards/rejected": -0.19786831736564636,
"step": 420
},
{
"epoch": 0.44,
"learning_rate": 4.7340221967087635e-07,
"logits/chosen": -2.4207959175109863,
"logits/rejected": -2.383884906768799,
"logps/chosen": -250.6901397705078,
"logps/rejected": -207.92062377929688,
"loss": 0.6128,
"rewards/accuracies": 0.6953125,
"rewards/chosen": 0.01974855735898018,
"rewards/margins": 0.21391530334949493,
"rewards/rejected": -0.19416674971580505,
"step": 430
},
{
"epoch": 0.45,
"learning_rate": 4.714887102946804e-07,
"logits/chosen": -2.457104206085205,
"logits/rejected": -2.3864612579345703,
"logps/chosen": -274.47650146484375,
"logps/rejected": -225.985107421875,
"loss": 0.6012,
"rewards/accuracies": 0.7203124761581421,
"rewards/chosen": 0.04322098195552826,
"rewards/margins": 0.24149248003959656,
"rewards/rejected": -0.19827154278755188,
"step": 440
},
{
"epoch": 0.46,
"learning_rate": 4.6957520091848447e-07,
"logits/chosen": -2.4684674739837646,
"logits/rejected": -2.432194948196411,
"logps/chosen": -262.0184020996094,
"logps/rejected": -226.8969268798828,
"loss": 0.6132,
"rewards/accuracies": 0.6703125238418579,
"rewards/chosen": 0.012870723381638527,
"rewards/margins": 0.226064994931221,
"rewards/rejected": -0.21319429576396942,
"step": 450
},
{
"epoch": 0.48,
"learning_rate": 4.6766169154228853e-07,
"logits/chosen": -2.4258971214294434,
"logits/rejected": -2.3564021587371826,
"logps/chosen": -256.30084228515625,
"logps/rejected": -219.12112426757812,
"loss": 0.6188,
"rewards/accuracies": 0.6734374761581421,
"rewards/chosen": -0.004575688857585192,
"rewards/margins": 0.21516656875610352,
"rewards/rejected": -0.21974226832389832,
"step": 460
},
{
"epoch": 0.49,
"learning_rate": 4.657481821660926e-07,
"logits/chosen": -2.4722161293029785,
"logits/rejected": -2.4338574409484863,
"logps/chosen": -279.41644287109375,
"logps/rejected": -232.3635711669922,
"loss": 0.6072,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.012542584910988808,
"rewards/margins": 0.2474808394908905,
"rewards/rejected": -0.23493823409080505,
"step": 470
},
{
"epoch": 0.5,
"learning_rate": 4.6383467278989666e-07,
"logits/chosen": -2.381640911102295,
"logits/rejected": -2.4078078269958496,
"logps/chosen": -262.5255126953125,
"logps/rejected": -226.96853637695312,
"loss": 0.5953,
"rewards/accuracies": 0.707812488079071,
"rewards/chosen": 0.03892933949828148,
"rewards/margins": 0.2794772982597351,
"rewards/rejected": -0.24054794013500214,
"step": 480
},
{
"epoch": 0.51,
"learning_rate": 4.6192116341370067e-07,
"logits/chosen": -2.436652421951294,
"logits/rejected": -2.3565993309020996,
"logps/chosen": -263.81829833984375,
"logps/rejected": -223.61801147460938,
"loss": 0.5897,
"rewards/accuracies": 0.723437488079071,
"rewards/chosen": 0.016417725011706352,
"rewards/margins": 0.2946879267692566,
"rewards/rejected": -0.2782701849937439,
"step": 490
},
{
"epoch": 0.52,
"learning_rate": 4.6000765403750473e-07,
"logits/chosen": -2.3938071727752686,
"logits/rejected": -2.37441086769104,
"logps/chosen": -273.1866760253906,
"logps/rejected": -219.1422576904297,
"loss": 0.5987,
"rewards/accuracies": 0.7046874761581421,
"rewards/chosen": 0.022179026156663895,
"rewards/margins": 0.27144354581832886,
"rewards/rejected": -0.24926450848579407,
"step": 500
},
{
"epoch": 0.53,
"learning_rate": 4.580941446613088e-07,
"logits/chosen": -2.438375473022461,
"logits/rejected": -2.4063642024993896,
"logps/chosen": -268.3760681152344,
"logps/rejected": -213.6297607421875,
"loss": 0.5779,
"rewards/accuracies": 0.7265625,
"rewards/chosen": 0.025703424587845802,
"rewards/margins": 0.3301311433315277,
"rewards/rejected": -0.30442774295806885,
"step": 510
},
{
"epoch": 0.54,
"learning_rate": 4.5618063528511285e-07,
"logits/chosen": -2.4285857677459717,
"logits/rejected": -2.3742969036102295,
"logps/chosen": -270.7893371582031,
"logps/rejected": -229.7726593017578,
"loss": 0.5886,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": 0.017922762781381607,
"rewards/margins": 0.3229644298553467,
"rewards/rejected": -0.305041640996933,
"step": 520
},
{
"epoch": 0.55,
"learning_rate": 4.542671259089169e-07,
"logits/chosen": -2.4130568504333496,
"logits/rejected": -2.3629188537597656,
"logps/chosen": -272.3194885253906,
"logps/rejected": -231.18997192382812,
"loss": 0.5947,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": 0.015474101528525352,
"rewards/margins": 0.3130945861339569,
"rewards/rejected": -0.2976204752922058,
"step": 530
},
{
"epoch": 0.56,
"learning_rate": 4.52353616532721e-07,
"logits/chosen": -2.443058490753174,
"logits/rejected": -2.3707220554351807,
"logps/chosen": -265.5616760253906,
"logps/rejected": -224.46688842773438,
"loss": 0.5945,
"rewards/accuracies": 0.6796875,
"rewards/chosen": -0.0283407811075449,
"rewards/margins": 0.30165895819664,
"rewards/rejected": -0.32999974489212036,
"step": 540
},
{
"epoch": 0.57,
"learning_rate": 4.5044010715652504e-07,
"logits/chosen": -2.459993362426758,
"logits/rejected": -2.4190433025360107,
"logps/chosen": -262.33197021484375,
"logps/rejected": -231.3585662841797,
"loss": 0.5988,
"rewards/accuracies": 0.6953125,
"rewards/chosen": 0.00041465210961177945,
"rewards/margins": 0.29224497079849243,
"rewards/rejected": -0.29183030128479004,
"step": 550
},
{
"epoch": 0.58,
"learning_rate": 4.485265977803291e-07,
"logits/chosen": -2.3841280937194824,
"logits/rejected": -2.3862245082855225,
"logps/chosen": -268.51177978515625,
"logps/rejected": -231.3872833251953,
"loss": 0.5903,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.008536433801054955,
"rewards/margins": 0.32063713669776917,
"rewards/rejected": -0.31210070848464966,
"step": 560
},
{
"epoch": 0.59,
"learning_rate": 4.4661308840413316e-07,
"logits/chosen": -2.4563305377960205,
"logits/rejected": -2.423436403274536,
"logps/chosen": -267.9896545410156,
"logps/rejected": -222.6366729736328,
"loss": 0.5746,
"rewards/accuracies": 0.7171875238418579,
"rewards/chosen": 0.011319964192807674,
"rewards/margins": 0.36020052433013916,
"rewards/rejected": -0.3488805890083313,
"step": 570
},
{
"epoch": 0.6,
"learning_rate": 4.446995790279372e-07,
"logits/chosen": -2.4537854194641113,
"logits/rejected": -2.3811707496643066,
"logps/chosen": -270.5040588378906,
"logps/rejected": -231.43017578125,
"loss": 0.574,
"rewards/accuracies": 0.7093750238418579,
"rewards/chosen": 0.019544053822755814,
"rewards/margins": 0.3704259693622589,
"rewards/rejected": -0.3508819341659546,
"step": 580
},
{
"epoch": 0.61,
"learning_rate": 4.4278606965174123e-07,
"logits/chosen": -2.447350025177002,
"logits/rejected": -2.388247013092041,
"logps/chosen": -271.6213684082031,
"logps/rejected": -223.79696655273438,
"loss": 0.5809,
"rewards/accuracies": 0.7046874761581421,
"rewards/chosen": 0.0032621710561215878,
"rewards/margins": 0.367009699344635,
"rewards/rejected": -0.3637475371360779,
"step": 590
},
{
"epoch": 0.62,
"learning_rate": 4.408725602755453e-07,
"logits/chosen": -2.457573175430298,
"logits/rejected": -2.429401397705078,
"logps/chosen": -266.47222900390625,
"logps/rejected": -231.684814453125,
"loss": 0.5816,
"rewards/accuracies": 0.6953125,
"rewards/chosen": -0.007931029424071312,
"rewards/margins": 0.3559093475341797,
"rewards/rejected": -0.36384040117263794,
"step": 600
},
{
"epoch": 0.63,
"learning_rate": 4.3895905089934936e-07,
"logits/chosen": -2.4467155933380127,
"logits/rejected": -2.4398138523101807,
"logps/chosen": -280.1789855957031,
"logps/rejected": -237.6522216796875,
"loss": 0.5711,
"rewards/accuracies": 0.715624988079071,
"rewards/chosen": 0.0163104385137558,
"rewards/margins": 0.3898230493068695,
"rewards/rejected": -0.3735126256942749,
"step": 610
},
{
"epoch": 0.64,
"learning_rate": 4.370455415231534e-07,
"logits/chosen": -2.4456491470336914,
"logits/rejected": -2.397401809692383,
"logps/chosen": -257.31146240234375,
"logps/rejected": -213.8458709716797,
"loss": 0.5746,
"rewards/accuracies": 0.7046874761581421,
"rewards/chosen": 0.0058257849887013435,
"rewards/margins": 0.402109295129776,
"rewards/rejected": -0.39628344774246216,
"step": 620
},
{
"epoch": 0.65,
"learning_rate": 4.351320321469575e-07,
"logits/chosen": -2.455310821533203,
"logits/rejected": -2.4044442176818848,
"logps/chosen": -269.50531005859375,
"logps/rejected": -223.09915161132812,
"loss": 0.5824,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.03774386644363403,
"rewards/margins": 0.37117189168930054,
"rewards/rejected": -0.40891575813293457,
"step": 630
},
{
"epoch": 0.66,
"learning_rate": 4.3321852277076154e-07,
"logits/chosen": -2.429537773132324,
"logits/rejected": -2.4004569053649902,
"logps/chosen": -278.3745422363281,
"logps/rejected": -238.91348266601562,
"loss": 0.5602,
"rewards/accuracies": 0.7203124761581421,
"rewards/chosen": -0.02088163048028946,
"rewards/margins": 0.4280461370944977,
"rewards/rejected": -0.4489278197288513,
"step": 640
},
{
"epoch": 0.67,
"learning_rate": 4.313050133945656e-07,
"logits/chosen": -2.462010622024536,
"logits/rejected": -2.382342576980591,
"logps/chosen": -274.82489013671875,
"logps/rejected": -228.21871948242188,
"loss": 0.5689,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.027593884617090225,
"rewards/margins": 0.4239775538444519,
"rewards/rejected": -0.451571524143219,
"step": 650
},
{
"epoch": 0.68,
"learning_rate": 4.2939150401836967e-07,
"logits/chosen": -2.408452033996582,
"logits/rejected": -2.367763042449951,
"logps/chosen": -279.24713134765625,
"logps/rejected": -234.92257690429688,
"loss": 0.5812,
"rewards/accuracies": 0.7046874761581421,
"rewards/chosen": -0.029710102826356888,
"rewards/margins": 0.3825686275959015,
"rewards/rejected": -0.4122787117958069,
"step": 660
},
{
"epoch": 0.69,
"learning_rate": 4.2747799464217373e-07,
"logits/chosen": -2.400705575942993,
"logits/rejected": -2.386396884918213,
"logps/chosen": -265.80059814453125,
"logps/rejected": -221.22183227539062,
"loss": 0.5795,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.01276162825524807,
"rewards/margins": 0.3836382031440735,
"rewards/rejected": -0.3963998258113861,
"step": 670
},
{
"epoch": 0.7,
"learning_rate": 4.255644852659778e-07,
"logits/chosen": -2.434727191925049,
"logits/rejected": -2.3701629638671875,
"logps/chosen": -265.0262145996094,
"logps/rejected": -230.69918823242188,
"loss": 0.5857,
"rewards/accuracies": 0.690625011920929,
"rewards/chosen": -0.052033863961696625,
"rewards/margins": 0.3843652307987213,
"rewards/rejected": -0.43639907240867615,
"step": 680
},
{
"epoch": 0.71,
"learning_rate": 4.236509758897818e-07,
"logits/chosen": -2.4166369438171387,
"logits/rejected": -2.3753108978271484,
"logps/chosen": -263.7073059082031,
"logps/rejected": -228.69186401367188,
"loss": 0.5598,
"rewards/accuracies": 0.7421875,
"rewards/chosen": -0.031519632786512375,
"rewards/margins": 0.4231399893760681,
"rewards/rejected": -0.454659640789032,
"step": 690
},
{
"epoch": 0.72,
"learning_rate": 4.2173746651358586e-07,
"logits/chosen": -2.494065761566162,
"logits/rejected": -2.3916873931884766,
"logps/chosen": -277.77325439453125,
"logps/rejected": -226.1985321044922,
"loss": 0.5795,
"rewards/accuracies": 0.714062511920929,
"rewards/chosen": -0.04932181164622307,
"rewards/margins": 0.41205042600631714,
"rewards/rejected": -0.4613722264766693,
"step": 700
},
{
"epoch": 0.73,
"learning_rate": 4.198239571373899e-07,
"logits/chosen": -2.431324005126953,
"logits/rejected": -2.4029393196105957,
"logps/chosen": -280.7895812988281,
"logps/rejected": -239.1829833984375,
"loss": 0.584,
"rewards/accuracies": 0.7171875238418579,
"rewards/chosen": -0.054767437279224396,
"rewards/margins": 0.40931397676467896,
"rewards/rejected": -0.46408137679100037,
"step": 710
},
{
"epoch": 0.74,
"learning_rate": 4.17910447761194e-07,
"logits/chosen": -2.4472877979278564,
"logits/rejected": -2.357172727584839,
"logps/chosen": -252.1331329345703,
"logps/rejected": -216.9487762451172,
"loss": 0.5796,
"rewards/accuracies": 0.692187488079071,
"rewards/chosen": -0.049075834453105927,
"rewards/margins": 0.3829793632030487,
"rewards/rejected": -0.43205517530441284,
"step": 720
},
{
"epoch": 0.75,
"learning_rate": 4.1599693838499805e-07,
"logits/chosen": -2.3492183685302734,
"logits/rejected": -2.34523606300354,
"logps/chosen": -248.2432403564453,
"logps/rejected": -214.99880981445312,
"loss": 0.5851,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.07765182107686996,
"rewards/margins": 0.3819560408592224,
"rewards/rejected": -0.45960789918899536,
"step": 730
},
{
"epoch": 0.76,
"learning_rate": 4.140834290088021e-07,
"logits/chosen": -2.3994088172912598,
"logits/rejected": -2.3783352375030518,
"logps/chosen": -246.6106719970703,
"logps/rejected": -206.70840454101562,
"loss": 0.5663,
"rewards/accuracies": 0.707812488079071,
"rewards/chosen": -0.04350767284631729,
"rewards/margins": 0.439382404088974,
"rewards/rejected": -0.4828900694847107,
"step": 740
},
{
"epoch": 0.77,
"learning_rate": 4.121699196326062e-07,
"logits/chosen": -2.3943963050842285,
"logits/rejected": -2.3858072757720947,
"logps/chosen": -266.1705627441406,
"logps/rejected": -225.35940551757812,
"loss": 0.5473,
"rewards/accuracies": 0.723437488079071,
"rewards/chosen": -0.010749602690339088,
"rewards/margins": 0.48278599977493286,
"rewards/rejected": -0.49353551864624023,
"step": 750
},
{
"epoch": 0.78,
"learning_rate": 4.1025641025641024e-07,
"logits/chosen": -2.470837354660034,
"logits/rejected": -2.391634464263916,
"logps/chosen": -269.9073791503906,
"logps/rejected": -229.0169677734375,
"loss": 0.5639,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.05836876481771469,
"rewards/margins": 0.463728666305542,
"rewards/rejected": -0.5220974087715149,
"step": 760
},
{
"epoch": 0.8,
"learning_rate": 4.083429008802143e-07,
"logits/chosen": -2.399672746658325,
"logits/rejected": -2.386239528656006,
"logps/chosen": -265.0301513671875,
"logps/rejected": -216.77737426757812,
"loss": 0.5693,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.05191594362258911,
"rewards/margins": 0.4625419080257416,
"rewards/rejected": -0.5144578218460083,
"step": 770
},
{
"epoch": 0.81,
"learning_rate": 4.0642939150401836e-07,
"logits/chosen": -2.4455151557922363,
"logits/rejected": -2.3676414489746094,
"logps/chosen": -263.79571533203125,
"logps/rejected": -222.31787109375,
"loss": 0.5713,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": -0.07387879490852356,
"rewards/margins": 0.4389980435371399,
"rewards/rejected": -0.5128768086433411,
"step": 780
},
{
"epoch": 0.82,
"learning_rate": 4.0451588212782237e-07,
"logits/chosen": -2.4102261066436768,
"logits/rejected": -2.353691577911377,
"logps/chosen": -277.6340026855469,
"logps/rejected": -241.7203826904297,
"loss": 0.5791,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.08135993033647537,
"rewards/margins": 0.44334641098976135,
"rewards/rejected": -0.524706244468689,
"step": 790
},
{
"epoch": 0.83,
"learning_rate": 4.0260237275162643e-07,
"logits/chosen": -2.430101156234741,
"logits/rejected": -2.385629177093506,
"logps/chosen": -267.8277587890625,
"logps/rejected": -237.6192169189453,
"loss": 0.5621,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.09289533644914627,
"rewards/margins": 0.47283419966697693,
"rewards/rejected": -0.5657294988632202,
"step": 800
},
{
"epoch": 0.84,
"learning_rate": 4.006888633754305e-07,
"logits/chosen": -2.424495220184326,
"logits/rejected": -2.3845698833465576,
"logps/chosen": -265.8463134765625,
"logps/rejected": -226.7728729248047,
"loss": 0.5831,
"rewards/accuracies": 0.6953125,
"rewards/chosen": -0.05096619576215744,
"rewards/margins": 0.43721461296081543,
"rewards/rejected": -0.4881807863712311,
"step": 810
},
{
"epoch": 0.85,
"learning_rate": 3.9877535399923456e-07,
"logits/chosen": -2.4253883361816406,
"logits/rejected": -2.3850014209747314,
"logps/chosen": -272.5957946777344,
"logps/rejected": -233.60498046875,
"loss": 0.5633,
"rewards/accuracies": 0.7171875238418579,
"rewards/chosen": -0.06406211853027344,
"rewards/margins": 0.48712214827537537,
"rewards/rejected": -0.5511842370033264,
"step": 820
},
{
"epoch": 0.86,
"learning_rate": 3.968618446230386e-07,
"logits/chosen": -2.4437859058380127,
"logits/rejected": -2.3819785118103027,
"logps/chosen": -264.5028381347656,
"logps/rejected": -227.0218048095703,
"loss": 0.5708,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": -0.06855222582817078,
"rewards/margins": 0.4592631459236145,
"rewards/rejected": -0.5278154015541077,
"step": 830
},
{
"epoch": 0.87,
"learning_rate": 3.949483352468427e-07,
"logits/chosen": -2.427250385284424,
"logits/rejected": -2.3620200157165527,
"logps/chosen": -254.1734161376953,
"logps/rejected": -229.9873046875,
"loss": 0.5823,
"rewards/accuracies": 0.684374988079071,
"rewards/chosen": -0.0878441333770752,
"rewards/margins": 0.43157902359962463,
"rewards/rejected": -0.5194231271743774,
"step": 840
},
{
"epoch": 0.88,
"learning_rate": 3.9303482587064674e-07,
"logits/chosen": -2.4152088165283203,
"logits/rejected": -2.399456024169922,
"logps/chosen": -268.06689453125,
"logps/rejected": -232.3248748779297,
"loss": 0.5626,
"rewards/accuracies": 0.723437488079071,
"rewards/chosen": -0.09431511908769608,
"rewards/margins": 0.4891575872898102,
"rewards/rejected": -0.5834725499153137,
"step": 850
},
{
"epoch": 0.89,
"learning_rate": 3.911213164944508e-07,
"logits/chosen": -2.461259126663208,
"logits/rejected": -2.4431066513061523,
"logps/chosen": -260.641845703125,
"logps/rejected": -233.93637084960938,
"loss": 0.5791,
"rewards/accuracies": 0.715624988079071,
"rewards/chosen": -0.07183202356100082,
"rewards/margins": 0.44370943307876587,
"rewards/rejected": -0.5155414342880249,
"step": 860
},
{
"epoch": 0.9,
"learning_rate": 3.8920780711825487e-07,
"logits/chosen": -2.423548698425293,
"logits/rejected": -2.3617987632751465,
"logps/chosen": -264.8348083496094,
"logps/rejected": -234.61605834960938,
"loss": 0.5649,
"rewards/accuracies": 0.7093750238418579,
"rewards/chosen": -0.08685998618602753,
"rewards/margins": 0.47184914350509644,
"rewards/rejected": -0.5587090849876404,
"step": 870
},
{
"epoch": 0.91,
"learning_rate": 3.8729429774205893e-07,
"logits/chosen": -2.4239916801452637,
"logits/rejected": -2.3515267372131348,
"logps/chosen": -258.3001403808594,
"logps/rejected": -219.2425079345703,
"loss": 0.5832,
"rewards/accuracies": 0.7093750238418579,
"rewards/chosen": -0.12072154134511948,
"rewards/margins": 0.434969425201416,
"rewards/rejected": -0.5556910037994385,
"step": 880
},
{
"epoch": 0.92,
"learning_rate": 3.8538078836586294e-07,
"logits/chosen": -2.4307010173797607,
"logits/rejected": -2.3626708984375,
"logps/chosen": -283.5355224609375,
"logps/rejected": -235.6796417236328,
"loss": 0.5489,
"rewards/accuracies": 0.745312511920929,
"rewards/chosen": -0.06075868755578995,
"rewards/margins": 0.5323190689086914,
"rewards/rejected": -0.5930777788162231,
"step": 890
},
{
"epoch": 0.93,
"learning_rate": 3.83467278989667e-07,
"logits/chosen": -2.445495128631592,
"logits/rejected": -2.368015766143799,
"logps/chosen": -273.6012878417969,
"logps/rejected": -237.4881134033203,
"loss": 0.5823,
"rewards/accuracies": 0.6796875,
"rewards/chosen": -0.09077002108097076,
"rewards/margins": 0.45550060272216797,
"rewards/rejected": -0.5462706685066223,
"step": 900
},
{
"epoch": 0.94,
"learning_rate": 3.8155376961347106e-07,
"logits/chosen": -2.431802988052368,
"logits/rejected": -2.3802406787872314,
"logps/chosen": -268.13336181640625,
"logps/rejected": -231.0006561279297,
"loss": 0.5636,
"rewards/accuracies": 0.7171875238418579,
"rewards/chosen": -0.08810480684041977,
"rewards/margins": 0.49154072999954224,
"rewards/rejected": -0.579645574092865,
"step": 910
},
{
"epoch": 0.95,
"learning_rate": 3.796402602372751e-07,
"logits/chosen": -2.4426496028900146,
"logits/rejected": -2.385349750518799,
"logps/chosen": -276.5380859375,
"logps/rejected": -233.1389617919922,
"loss": 0.5482,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.05111056566238403,
"rewards/margins": 0.542784571647644,
"rewards/rejected": -0.5938950777053833,
"step": 920
},
{
"epoch": 0.96,
"learning_rate": 3.777267508610792e-07,
"logits/chosen": -2.383472204208374,
"logits/rejected": -2.399059295654297,
"logps/chosen": -272.26556396484375,
"logps/rejected": -223.87905883789062,
"loss": 0.5578,
"rewards/accuracies": 0.714062511920929,
"rewards/chosen": -0.09871871769428253,
"rewards/margins": 0.5212680101394653,
"rewards/rejected": -0.6199867129325867,
"step": 930
},
{
"epoch": 0.97,
"learning_rate": 3.7581324148488325e-07,
"logits/chosen": -2.4192233085632324,
"logits/rejected": -2.3954081535339355,
"logps/chosen": -273.0626525878906,
"logps/rejected": -239.1441192626953,
"loss": 0.5488,
"rewards/accuracies": 0.7171875238418579,
"rewards/chosen": -0.10023512691259384,
"rewards/margins": 0.5407330989837646,
"rewards/rejected": -0.6409682035446167,
"step": 940
},
{
"epoch": 0.98,
"learning_rate": 3.738997321086873e-07,
"logits/chosen": -2.406310558319092,
"logits/rejected": -2.383169651031494,
"logps/chosen": -268.0104064941406,
"logps/rejected": -233.89749145507812,
"loss": 0.57,
"rewards/accuracies": 0.6890624761581421,
"rewards/chosen": -0.09341312944889069,
"rewards/margins": 0.4971606135368347,
"rewards/rejected": -0.5905737280845642,
"step": 950
},
{
"epoch": 0.99,
"learning_rate": 3.7198622273249137e-07,
"logits/chosen": -2.4181647300720215,
"logits/rejected": -2.3776590824127197,
"logps/chosen": -284.4306640625,
"logps/rejected": -238.550537109375,
"loss": 0.5654,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -0.09716256707906723,
"rewards/margins": 0.506054699420929,
"rewards/rejected": -0.6032172441482544,
"step": 960
},
{
"epoch": 1.0,
"eval_logits/chosen": -2.079043388366699,
"eval_logits/rejected": -2.0256688594818115,
"eval_logps/chosen": -265.5612487792969,
"eval_logps/rejected": -229.98611450195312,
"eval_loss": 0.5545315742492676,
"eval_rewards/accuracies": 0.7160000205039978,
"eval_rewards/chosen": -0.09934788197278976,
"eval_rewards/margins": 0.5339328050613403,
"eval_rewards/rejected": -0.6332806348800659,
"eval_runtime": 1088.7146,
"eval_samples_per_second": 1.837,
"eval_steps_per_second": 0.459,
"step": 968
},
{
"epoch": 1.0,
"learning_rate": 3.7007271335629544e-07,
"logits/chosen": -2.449903964996338,
"logits/rejected": -2.3904850482940674,
"logps/chosen": -269.0638732910156,
"logps/rejected": -230.3978271484375,
"loss": 0.5468,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.05560935288667679,
"rewards/margins": 0.5521260499954224,
"rewards/rejected": -0.6077354550361633,
"step": 970
},
{
"epoch": 1.01,
"learning_rate": 3.681592039800995e-07,
"logits/chosen": -2.4261183738708496,
"logits/rejected": -2.3550448417663574,
"logps/chosen": -267.64080810546875,
"logps/rejected": -227.04812622070312,
"loss": 0.545,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.09351503103971481,
"rewards/margins": 0.5480056405067444,
"rewards/rejected": -0.641520619392395,
"step": 980
},
{
"epoch": 1.02,
"learning_rate": 3.662456946039035e-07,
"logits/chosen": -2.4298439025878906,
"logits/rejected": -2.3898258209228516,
"logps/chosen": -277.6336364746094,
"logps/rejected": -225.4404296875,
"loss": 0.5438,
"rewards/accuracies": 0.715624988079071,
"rewards/chosen": -0.08874578773975372,
"rewards/margins": 0.5600773096084595,
"rewards/rejected": -0.6488231420516968,
"step": 990
},
{
"epoch": 1.03,
"learning_rate": 3.6433218522770757e-07,
"logits/chosen": -2.440823793411255,
"logits/rejected": -2.3596456050872803,
"logps/chosen": -280.1471862792969,
"logps/rejected": -238.19503784179688,
"loss": 0.5523,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.09118635207414627,
"rewards/margins": 0.556471049785614,
"rewards/rejected": -0.6476574540138245,
"step": 1000
},
{
"epoch": 1.04,
"learning_rate": 3.6241867585151163e-07,
"logits/chosen": -2.4096405506134033,
"logits/rejected": -2.34090256690979,
"logps/chosen": -257.96527099609375,
"logps/rejected": -223.86474609375,
"loss": 0.5731,
"rewards/accuracies": 0.714062511920929,
"rewards/chosen": -0.13044361770153046,
"rewards/margins": 0.49574214220046997,
"rewards/rejected": -0.6261857151985168,
"step": 1010
},
{
"epoch": 1.05,
"learning_rate": 3.605051664753157e-07,
"logits/chosen": -2.436314105987549,
"logits/rejected": -2.3611092567443848,
"logps/chosen": -272.37335205078125,
"logps/rejected": -231.5602264404297,
"loss": 0.5459,
"rewards/accuracies": 0.729687511920929,
"rewards/chosen": -0.07726888358592987,
"rewards/margins": 0.5683926343917847,
"rewards/rejected": -0.645661473274231,
"step": 1020
},
{
"epoch": 1.06,
"learning_rate": 3.5859165709911975e-07,
"logits/chosen": -2.4350028038024902,
"logits/rejected": -2.3586974143981934,
"logps/chosen": -275.226806640625,
"logps/rejected": -223.6283721923828,
"loss": 0.5453,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.06087593361735344,
"rewards/margins": 0.5964738130569458,
"rewards/rejected": -0.6573497653007507,
"step": 1030
},
{
"epoch": 1.07,
"learning_rate": 3.566781477229238e-07,
"logits/chosen": -2.400864601135254,
"logits/rejected": -2.3652467727661133,
"logps/chosen": -255.86477661132812,
"logps/rejected": -222.24752807617188,
"loss": 0.5751,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": -0.1341889202594757,
"rewards/margins": 0.5123754739761353,
"rewards/rejected": -0.6465644240379333,
"step": 1040
},
{
"epoch": 1.08,
"learning_rate": 3.547646383467279e-07,
"logits/chosen": -2.3898608684539795,
"logits/rejected": -2.379241466522217,
"logps/chosen": -261.6153869628906,
"logps/rejected": -223.2140655517578,
"loss": 0.5499,
"rewards/accuracies": 0.714062511920929,
"rewards/chosen": -0.08567940443754196,
"rewards/margins": 0.5808093547821045,
"rewards/rejected": -0.666488766670227,
"step": 1050
},
{
"epoch": 1.09,
"learning_rate": 3.5285112897053194e-07,
"logits/chosen": -2.4234771728515625,
"logits/rejected": -2.4022397994995117,
"logps/chosen": -280.1412048339844,
"logps/rejected": -242.2364959716797,
"loss": 0.5684,
"rewards/accuracies": 0.7203124761581421,
"rewards/chosen": -0.11185096204280853,
"rewards/margins": 0.5072935223579407,
"rewards/rejected": -0.6191444993019104,
"step": 1060
},
{
"epoch": 1.11,
"learning_rate": 3.50937619594336e-07,
"logits/chosen": -2.4101145267486572,
"logits/rejected": -2.34965181350708,
"logps/chosen": -268.9992370605469,
"logps/rejected": -218.4785614013672,
"loss": 0.54,
"rewards/accuracies": 0.7171875238418579,
"rewards/chosen": -0.09902816265821457,
"rewards/margins": 0.6141443252563477,
"rewards/rejected": -0.7131724953651428,
"step": 1070
},
{
"epoch": 1.12,
"learning_rate": 3.4902411021814007e-07,
"logits/chosen": -2.4058384895324707,
"logits/rejected": -2.3813834190368652,
"logps/chosen": -261.517333984375,
"logps/rejected": -220.6446990966797,
"loss": 0.5651,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.11156700551509857,
"rewards/margins": 0.5487754940986633,
"rewards/rejected": -0.6603423357009888,
"step": 1080
},
{
"epoch": 1.13,
"learning_rate": 3.4711060084194413e-07,
"logits/chosen": -2.4069576263427734,
"logits/rejected": -2.3752903938293457,
"logps/chosen": -265.21124267578125,
"logps/rejected": -223.32421875,
"loss": 0.5423,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.1168740764260292,
"rewards/margins": 0.5669043064117432,
"rewards/rejected": -0.6837784051895142,
"step": 1090
},
{
"epoch": 1.14,
"learning_rate": 3.4519709146574814e-07,
"logits/chosen": -2.4091246128082275,
"logits/rejected": -2.359158515930176,
"logps/chosen": -261.7292175292969,
"logps/rejected": -225.2208709716797,
"loss": 0.5656,
"rewards/accuracies": 0.7046874761581421,
"rewards/chosen": -0.12864422798156738,
"rewards/margins": 0.5372087955474854,
"rewards/rejected": -0.6658530831336975,
"step": 1100
},
{
"epoch": 1.15,
"learning_rate": 3.432835820895522e-07,
"logits/chosen": -2.466919183731079,
"logits/rejected": -2.3888449668884277,
"logps/chosen": -277.5549011230469,
"logps/rejected": -240.7705841064453,
"loss": 0.5392,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.05221106857061386,
"rewards/margins": 0.6037675738334656,
"rewards/rejected": -0.6559786796569824,
"step": 1110
},
{
"epoch": 1.16,
"learning_rate": 3.4137007271335626e-07,
"logits/chosen": -2.4204328060150146,
"logits/rejected": -2.3684065341949463,
"logps/chosen": -269.5735778808594,
"logps/rejected": -219.9510498046875,
"loss": 0.5382,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.09449413418769836,
"rewards/margins": 0.6241403818130493,
"rewards/rejected": -0.7186344861984253,
"step": 1120
},
{
"epoch": 1.17,
"learning_rate": 3.394565633371603e-07,
"logits/chosen": -2.431792736053467,
"logits/rejected": -2.3539392948150635,
"logps/chosen": -279.9765930175781,
"logps/rejected": -239.1553955078125,
"loss": 0.5447,
"rewards/accuracies": 0.7265625,
"rewards/chosen": -0.07704336196184158,
"rewards/margins": 0.6232292652130127,
"rewards/rejected": -0.7002726197242737,
"step": 1130
},
{
"epoch": 1.18,
"learning_rate": 3.375430539609644e-07,
"logits/chosen": -2.404470920562744,
"logits/rejected": -2.3776755332946777,
"logps/chosen": -256.79559326171875,
"logps/rejected": -227.1933135986328,
"loss": 0.5584,
"rewards/accuracies": 0.7171875238418579,
"rewards/chosen": -0.11403951793909073,
"rewards/margins": 0.545345664024353,
"rewards/rejected": -0.6593851447105408,
"step": 1140
},
{
"epoch": 1.19,
"learning_rate": 3.3562954458476845e-07,
"logits/chosen": -2.357815980911255,
"logits/rejected": -2.331373691558838,
"logps/chosen": -253.86587524414062,
"logps/rejected": -217.3060760498047,
"loss": 0.5472,
"rewards/accuracies": 0.7265625,
"rewards/chosen": -0.10227999836206436,
"rewards/margins": 0.5686275362968445,
"rewards/rejected": -0.6709075570106506,
"step": 1150
},
{
"epoch": 1.2,
"learning_rate": 3.337160352085725e-07,
"logits/chosen": -2.409895420074463,
"logits/rejected": -2.3179931640625,
"logps/chosen": -266.31640625,
"logps/rejected": -218.92160034179688,
"loss": 0.5432,
"rewards/accuracies": 0.7359374761581421,
"rewards/chosen": -0.10928237438201904,
"rewards/margins": 0.6051470041275024,
"rewards/rejected": -0.7144292593002319,
"step": 1160
},
{
"epoch": 1.21,
"learning_rate": 3.3180252583237657e-07,
"logits/chosen": -2.4335556030273438,
"logits/rejected": -2.3714287281036377,
"logps/chosen": -277.588623046875,
"logps/rejected": -233.22079467773438,
"loss": 0.5461,
"rewards/accuracies": 0.7265625,
"rewards/chosen": -0.14634881913661957,
"rewards/margins": 0.5770747661590576,
"rewards/rejected": -0.7234236001968384,
"step": 1170
},
{
"epoch": 1.22,
"learning_rate": 3.2988901645618063e-07,
"logits/chosen": -2.4803996086120605,
"logits/rejected": -2.409782886505127,
"logps/chosen": -267.01678466796875,
"logps/rejected": -247.248291015625,
"loss": 0.5607,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.1365794688463211,
"rewards/margins": 0.5790367126464844,
"rewards/rejected": -0.7156162261962891,
"step": 1180
},
{
"epoch": 1.23,
"learning_rate": 3.279755070799847e-07,
"logits/chosen": -2.3643290996551514,
"logits/rejected": -2.3453285694122314,
"logps/chosen": -260.19134521484375,
"logps/rejected": -226.71481323242188,
"loss": 0.5624,
"rewards/accuracies": 0.698437511920929,
"rewards/chosen": -0.1081305742263794,
"rewards/margins": 0.571107029914856,
"rewards/rejected": -0.6792376637458801,
"step": 1190
},
{
"epoch": 1.24,
"learning_rate": 3.260619977037887e-07,
"logits/chosen": -2.3916163444519043,
"logits/rejected": -2.358982563018799,
"logps/chosen": -260.97052001953125,
"logps/rejected": -222.0037078857422,
"loss": 0.5276,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.10513798892498016,
"rewards/margins": 0.6672986149787903,
"rewards/rejected": -0.7724366188049316,
"step": 1200
},
{
"epoch": 1.25,
"learning_rate": 3.2414848832759277e-07,
"logits/chosen": -2.3794853687286377,
"logits/rejected": -2.3386852741241455,
"logps/chosen": -257.59130859375,
"logps/rejected": -222.3406219482422,
"loss": 0.5545,
"rewards/accuracies": 0.7046874761581421,
"rewards/chosen": -0.13189749419689178,
"rewards/margins": 0.5676501393318176,
"rewards/rejected": -0.6995476484298706,
"step": 1210
},
{
"epoch": 1.26,
"learning_rate": 3.2223497895139683e-07,
"logits/chosen": -2.384241819381714,
"logits/rejected": -2.3438777923583984,
"logps/chosen": -272.30767822265625,
"logps/rejected": -231.92471313476562,
"loss": 0.5362,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.1082894578576088,
"rewards/margins": 0.6236446499824524,
"rewards/rejected": -0.731934130191803,
"step": 1220
},
{
"epoch": 1.27,
"learning_rate": 3.203214695752009e-07,
"logits/chosen": -2.4498887062072754,
"logits/rejected": -2.382390260696411,
"logps/chosen": -270.11798095703125,
"logps/rejected": -228.3955841064453,
"loss": 0.5393,
"rewards/accuracies": 0.7484375238418579,
"rewards/chosen": -0.13679789006710052,
"rewards/margins": 0.6304437518119812,
"rewards/rejected": -0.7672415971755981,
"step": 1230
},
{
"epoch": 1.28,
"learning_rate": 3.1840796019900495e-07,
"logits/chosen": -2.3304078578948975,
"logits/rejected": -2.328829288482666,
"logps/chosen": -244.64791870117188,
"logps/rejected": -224.4540252685547,
"loss": 0.5672,
"rewards/accuracies": 0.723437488079071,
"rewards/chosen": -0.1754181832075119,
"rewards/margins": 0.5484617948532104,
"rewards/rejected": -0.7238799333572388,
"step": 1240
},
{
"epoch": 1.29,
"learning_rate": 3.16494450822809e-07,
"logits/chosen": -2.4004368782043457,
"logits/rejected": -2.394761562347412,
"logps/chosen": -260.854248046875,
"logps/rejected": -217.267333984375,
"loss": 0.5264,
"rewards/accuracies": 0.7328125238418579,
"rewards/chosen": -0.09097670018672943,
"rewards/margins": 0.6562029123306274,
"rewards/rejected": -0.7471795678138733,
"step": 1250
},
{
"epoch": 1.3,
"learning_rate": 3.145809414466131e-07,
"logits/chosen": -2.420809268951416,
"logits/rejected": -2.393630266189575,
"logps/chosen": -268.07220458984375,
"logps/rejected": -243.11996459960938,
"loss": 0.5434,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.12649384140968323,
"rewards/margins": 0.5802772641181946,
"rewards/rejected": -0.706771194934845,
"step": 1260
},
{
"epoch": 1.31,
"learning_rate": 3.1266743207041714e-07,
"logits/chosen": -2.4304604530334473,
"logits/rejected": -2.3626341819763184,
"logps/chosen": -274.98638916015625,
"logps/rejected": -246.15872192382812,
"loss": 0.5499,
"rewards/accuracies": 0.739062488079071,
"rewards/chosen": -0.07647743821144104,
"rewards/margins": 0.5869981646537781,
"rewards/rejected": -0.6634755730628967,
"step": 1270
},
{
"epoch": 1.32,
"learning_rate": 3.107539226942212e-07,
"logits/chosen": -2.392775774002075,
"logits/rejected": -2.3546760082244873,
"logps/chosen": -280.3741149902344,
"logps/rejected": -248.8837127685547,
"loss": 0.5092,
"rewards/accuracies": 0.7593749761581421,
"rewards/chosen": -0.0769033133983612,
"rewards/margins": 0.693490743637085,
"rewards/rejected": -0.7703940868377686,
"step": 1280
},
{
"epoch": 1.33,
"learning_rate": 3.0884041331802526e-07,
"logits/chosen": -2.3941831588745117,
"logits/rejected": -2.349119186401367,
"logps/chosen": -275.7878112792969,
"logps/rejected": -238.73684692382812,
"loss": 0.5188,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.11901184171438217,
"rewards/margins": 0.6922268867492676,
"rewards/rejected": -0.8112386465072632,
"step": 1290
},
{
"epoch": 1.34,
"learning_rate": 3.0692690394182927e-07,
"logits/chosen": -2.3558011054992676,
"logits/rejected": -2.365652322769165,
"logps/chosen": -254.7240447998047,
"logps/rejected": -225.5684051513672,
"loss": 0.5479,
"rewards/accuracies": 0.7015625238418579,
"rewards/chosen": -0.11073043197393417,
"rewards/margins": 0.6586212515830994,
"rewards/rejected": -0.7693516612052917,
"step": 1300
},
{
"epoch": 1.35,
"learning_rate": 3.0501339456563334e-07,
"logits/chosen": -2.400010108947754,
"logits/rejected": -2.3430371284484863,
"logps/chosen": -282.26483154296875,
"logps/rejected": -241.29495239257812,
"loss": 0.553,
"rewards/accuracies": 0.7109375,
"rewards/chosen": -0.1404508799314499,
"rewards/margins": 0.5930649042129517,
"rewards/rejected": -0.7335157990455627,
"step": 1310
},
{
"epoch": 1.36,
"learning_rate": 3.030998851894374e-07,
"logits/chosen": -2.429117202758789,
"logits/rejected": -2.380638360977173,
"logps/chosen": -277.0819396972656,
"logps/rejected": -231.4957733154297,
"loss": 0.5398,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.10495986044406891,
"rewards/margins": 0.6354261040687561,
"rewards/rejected": -0.740385890007019,
"step": 1320
},
{
"epoch": 1.37,
"learning_rate": 3.0118637581324146e-07,
"logits/chosen": -2.3560943603515625,
"logits/rejected": -2.299285650253296,
"logps/chosen": -283.2480773925781,
"logps/rejected": -236.7747802734375,
"loss": 0.5339,
"rewards/accuracies": 0.745312511920929,
"rewards/chosen": -0.15119323134422302,
"rewards/margins": 0.6373868584632874,
"rewards/rejected": -0.788580060005188,
"step": 1330
},
{
"epoch": 1.38,
"learning_rate": 2.992728664370455e-07,
"logits/chosen": -2.4186995029449463,
"logits/rejected": -2.403923511505127,
"logps/chosen": -265.8408203125,
"logps/rejected": -224.98312377929688,
"loss": 0.5481,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.1380973756313324,
"rewards/margins": 0.6355406045913696,
"rewards/rejected": -0.7736380100250244,
"step": 1340
},
{
"epoch": 1.39,
"learning_rate": 2.973593570608496e-07,
"logits/chosen": -2.3618245124816895,
"logits/rejected": -2.340223550796509,
"logps/chosen": -267.28338623046875,
"logps/rejected": -229.21469116210938,
"loss": 0.5572,
"rewards/accuracies": 0.7109375,
"rewards/chosen": -0.126164972782135,
"rewards/margins": 0.6094905138015747,
"rewards/rejected": -0.7356554865837097,
"step": 1350
},
{
"epoch": 1.4,
"learning_rate": 2.9544584768465365e-07,
"logits/chosen": -2.4243083000183105,
"logits/rejected": -2.398084878921509,
"logps/chosen": -256.0418395996094,
"logps/rejected": -234.45346069335938,
"loss": 0.5438,
"rewards/accuracies": 0.7109375,
"rewards/chosen": -0.10923846065998077,
"rewards/margins": 0.6375387907028198,
"rewards/rejected": -0.7467772364616394,
"step": 1360
},
{
"epoch": 1.41,
"learning_rate": 2.935323383084577e-07,
"logits/chosen": -2.448951005935669,
"logits/rejected": -2.3950791358947754,
"logps/chosen": -276.74725341796875,
"logps/rejected": -226.75149536132812,
"loss": 0.5584,
"rewards/accuracies": 0.707812488079071,
"rewards/chosen": -0.17273911833763123,
"rewards/margins": 0.5847989916801453,
"rewards/rejected": -0.7575381994247437,
"step": 1370
},
{
"epoch": 1.43,
"learning_rate": 2.9161882893226177e-07,
"logits/chosen": -2.427473545074463,
"logits/rejected": -2.3801541328430176,
"logps/chosen": -267.68463134765625,
"logps/rejected": -228.783447265625,
"loss": 0.5465,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.1168685331940651,
"rewards/margins": 0.6201252937316895,
"rewards/rejected": -0.7369938492774963,
"step": 1380
},
{
"epoch": 1.44,
"learning_rate": 2.8970531955606583e-07,
"logits/chosen": -2.4376165866851807,
"logits/rejected": -2.39223051071167,
"logps/chosen": -269.39691162109375,
"logps/rejected": -226.72702026367188,
"loss": 0.5143,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.10355620086193085,
"rewards/margins": 0.7153445482254028,
"rewards/rejected": -0.8189007639884949,
"step": 1390
},
{
"epoch": 1.45,
"learning_rate": 2.8779181017986984e-07,
"logits/chosen": -2.3847761154174805,
"logits/rejected": -2.358484983444214,
"logps/chosen": -265.6216125488281,
"logps/rejected": -226.9099578857422,
"loss": 0.5786,
"rewards/accuracies": 0.6859375238418579,
"rewards/chosen": -0.1487416923046112,
"rewards/margins": 0.582770049571991,
"rewards/rejected": -0.7315118312835693,
"step": 1400
},
{
"epoch": 1.46,
"learning_rate": 2.858783008036739e-07,
"logits/chosen": -2.441329002380371,
"logits/rejected": -2.405198335647583,
"logps/chosen": -254.2424774169922,
"logps/rejected": -216.15487670898438,
"loss": 0.5328,
"rewards/accuracies": 0.739062488079071,
"rewards/chosen": -0.10592655837535858,
"rewards/margins": 0.6519125699996948,
"rewards/rejected": -0.7578392624855042,
"step": 1410
},
{
"epoch": 1.47,
"learning_rate": 2.8396479142747797e-07,
"logits/chosen": -2.353024482727051,
"logits/rejected": -2.3756861686706543,
"logps/chosen": -271.5851135253906,
"logps/rejected": -226.3388214111328,
"loss": 0.5633,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.11950352042913437,
"rewards/margins": 0.5914410948753357,
"rewards/rejected": -0.710944652557373,
"step": 1420
},
{
"epoch": 1.48,
"learning_rate": 2.8205128205128203e-07,
"logits/chosen": -2.4454894065856934,
"logits/rejected": -2.4075827598571777,
"logps/chosen": -278.7067565917969,
"logps/rejected": -233.1806182861328,
"loss": 0.5468,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -0.1553649604320526,
"rewards/margins": 0.6472987532615662,
"rewards/rejected": -0.8026638031005859,
"step": 1430
},
{
"epoch": 1.49,
"learning_rate": 2.801377726750861e-07,
"logits/chosen": -2.41646671295166,
"logits/rejected": -2.380006790161133,
"logps/chosen": -259.50830078125,
"logps/rejected": -234.66000366210938,
"loss": 0.5442,
"rewards/accuracies": 0.7265625,
"rewards/chosen": -0.11458615958690643,
"rewards/margins": 0.6320740580558777,
"rewards/rejected": -0.7466602325439453,
"step": 1440
},
{
"epoch": 1.5,
"learning_rate": 2.7822426329889015e-07,
"logits/chosen": -2.3705825805664062,
"logits/rejected": -2.3389930725097656,
"logps/chosen": -264.9784240722656,
"logps/rejected": -235.85598754882812,
"loss": 0.5239,
"rewards/accuracies": 0.746874988079071,
"rewards/chosen": -0.11364629119634628,
"rewards/margins": 0.6920466423034668,
"rewards/rejected": -0.8056928515434265,
"step": 1450
},
{
"epoch": 1.51,
"learning_rate": 2.763107539226942e-07,
"logits/chosen": -2.3917994499206543,
"logits/rejected": -2.361053705215454,
"logps/chosen": -273.13323974609375,
"logps/rejected": -237.2021026611328,
"loss": 0.5537,
"rewards/accuracies": 0.753125011920929,
"rewards/chosen": -0.1213529109954834,
"rewards/margins": 0.6237030625343323,
"rewards/rejected": -0.7450559735298157,
"step": 1460
},
{
"epoch": 1.52,
"learning_rate": 2.743972445464983e-07,
"logits/chosen": -2.3829543590545654,
"logits/rejected": -2.3598859310150146,
"logps/chosen": -256.2921447753906,
"logps/rejected": -218.822998046875,
"loss": 0.5447,
"rewards/accuracies": 0.723437488079071,
"rewards/chosen": -0.09320759773254395,
"rewards/margins": 0.628312349319458,
"rewards/rejected": -0.721519947052002,
"step": 1470
},
{
"epoch": 1.53,
"learning_rate": 2.7248373517030234e-07,
"logits/chosen": -2.3986709117889404,
"logits/rejected": -2.3675730228424072,
"logps/chosen": -261.54193115234375,
"logps/rejected": -222.29812622070312,
"loss": 0.5696,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.17618440091609955,
"rewards/margins": 0.6171834468841553,
"rewards/rejected": -0.793367862701416,
"step": 1480
},
{
"epoch": 1.54,
"learning_rate": 2.705702257941064e-07,
"logits/chosen": -2.403079032897949,
"logits/rejected": -2.344881057739258,
"logps/chosen": -271.6820068359375,
"logps/rejected": -223.48422241210938,
"loss": 0.5317,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -0.13215352594852448,
"rewards/margins": 0.6836920976638794,
"rewards/rejected": -0.8158456683158875,
"step": 1490
},
{
"epoch": 1.55,
"learning_rate": 2.686567164179104e-07,
"logits/chosen": -2.4172019958496094,
"logits/rejected": -2.350555181503296,
"logps/chosen": -261.85516357421875,
"logps/rejected": -225.0038604736328,
"loss": 0.5255,
"rewards/accuracies": 0.7578125,
"rewards/chosen": -0.08901546150445938,
"rewards/margins": 0.6642698049545288,
"rewards/rejected": -0.75328528881073,
"step": 1500
},
{
"epoch": 1.56,
"learning_rate": 2.6674320704171447e-07,
"logits/chosen": -2.4008395671844482,
"logits/rejected": -2.351348876953125,
"logps/chosen": -267.31951904296875,
"logps/rejected": -227.8149871826172,
"loss": 0.5381,
"rewards/accuracies": 0.7328125238418579,
"rewards/chosen": -0.13619837164878845,
"rewards/margins": 0.6516298055648804,
"rewards/rejected": -0.7878280878067017,
"step": 1510
},
{
"epoch": 1.57,
"learning_rate": 2.6482969766551853e-07,
"logits/chosen": -2.348276138305664,
"logits/rejected": -2.329331159591675,
"logps/chosen": -263.51080322265625,
"logps/rejected": -227.07809448242188,
"loss": 0.5122,
"rewards/accuracies": 0.7421875,
"rewards/chosen": -0.10638642311096191,
"rewards/margins": 0.7151543498039246,
"rewards/rejected": -0.8215408325195312,
"step": 1520
},
{
"epoch": 1.58,
"learning_rate": 2.629161882893226e-07,
"logits/chosen": -2.4366257190704346,
"logits/rejected": -2.379861354827881,
"logps/chosen": -279.295166015625,
"logps/rejected": -239.08352661132812,
"loss": 0.5645,
"rewards/accuracies": 0.7093750238418579,
"rewards/chosen": -0.15942886471748352,
"rewards/margins": 0.6090508103370667,
"rewards/rejected": -0.7684796452522278,
"step": 1530
},
{
"epoch": 1.59,
"learning_rate": 2.6100267891312666e-07,
"logits/chosen": -2.364650249481201,
"logits/rejected": -2.3203299045562744,
"logps/chosen": -261.32708740234375,
"logps/rejected": -223.8793487548828,
"loss": 0.5457,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.13680413365364075,
"rewards/margins": 0.6616954207420349,
"rewards/rejected": -0.798499584197998,
"step": 1540
},
{
"epoch": 1.6,
"learning_rate": 2.590891695369307e-07,
"logits/chosen": -2.414820432662964,
"logits/rejected": -2.3798413276672363,
"logps/chosen": -281.36065673828125,
"logps/rejected": -240.29238891601562,
"loss": 0.5368,
"rewards/accuracies": 0.729687511920929,
"rewards/chosen": -0.08666771650314331,
"rewards/margins": 0.6870118975639343,
"rewards/rejected": -0.7736796140670776,
"step": 1550
},
{
"epoch": 1.61,
"learning_rate": 2.571756601607348e-07,
"logits/chosen": -2.407886266708374,
"logits/rejected": -2.3671507835388184,
"logps/chosen": -281.9557189941406,
"logps/rejected": -232.68588256835938,
"loss": 0.5316,
"rewards/accuracies": 0.723437488079071,
"rewards/chosen": -0.127783864736557,
"rewards/margins": 0.7107834219932556,
"rewards/rejected": -0.8385672569274902,
"step": 1560
},
{
"epoch": 1.62,
"learning_rate": 2.5526215078453884e-07,
"logits/chosen": -2.37595796585083,
"logits/rejected": -2.3402533531188965,
"logps/chosen": -275.40106201171875,
"logps/rejected": -241.32421875,
"loss": 0.5485,
"rewards/accuracies": 0.7328125238418579,
"rewards/chosen": -0.13988900184631348,
"rewards/margins": 0.6486446261405945,
"rewards/rejected": -0.788533627986908,
"step": 1570
},
{
"epoch": 1.63,
"learning_rate": 2.533486414083429e-07,
"logits/chosen": -2.383958578109741,
"logits/rejected": -2.3686203956604004,
"logps/chosen": -273.81549072265625,
"logps/rejected": -226.5820770263672,
"loss": 0.5231,
"rewards/accuracies": 0.745312511920929,
"rewards/chosen": -0.16685205698013306,
"rewards/margins": 0.6781736016273499,
"rewards/rejected": -0.8450256586074829,
"step": 1580
},
{
"epoch": 1.64,
"learning_rate": 2.5143513203214697e-07,
"logits/chosen": -2.4135966300964355,
"logits/rejected": -2.339186429977417,
"logps/chosen": -261.7090759277344,
"logps/rejected": -232.47018432617188,
"loss": 0.5607,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.18446998298168182,
"rewards/margins": 0.6322883367538452,
"rewards/rejected": -0.8167583346366882,
"step": 1590
},
{
"epoch": 1.65,
"learning_rate": 2.49521622655951e-07,
"logits/chosen": -2.4073891639709473,
"logits/rejected": -2.3973593711853027,
"logps/chosen": -263.8055725097656,
"logps/rejected": -233.00167846679688,
"loss": 0.5567,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.12571503221988678,
"rewards/margins": 0.6185272932052612,
"rewards/rejected": -0.7442423701286316,
"step": 1600
},
{
"epoch": 1.66,
"learning_rate": 2.4760811327975504e-07,
"logits/chosen": -2.3958241939544678,
"logits/rejected": -2.356121063232422,
"logps/chosen": -276.14556884765625,
"logps/rejected": -239.56112670898438,
"loss": 0.5407,
"rewards/accuracies": 0.729687511920929,
"rewards/chosen": -0.11243724822998047,
"rewards/margins": 0.6681596040725708,
"rewards/rejected": -0.7805968523025513,
"step": 1610
},
{
"epoch": 1.67,
"learning_rate": 2.456946039035591e-07,
"logits/chosen": -2.387842893600464,
"logits/rejected": -2.3812038898468018,
"logps/chosen": -256.08905029296875,
"logps/rejected": -216.9521484375,
"loss": 0.5135,
"rewards/accuracies": 0.7484375238418579,
"rewards/chosen": -0.11949145793914795,
"rewards/margins": 0.7178138494491577,
"rewards/rejected": -0.8373053669929504,
"step": 1620
},
{
"epoch": 1.68,
"learning_rate": 2.4378109452736316e-07,
"logits/chosen": -2.3569884300231934,
"logits/rejected": -2.3548595905303955,
"logps/chosen": -268.0955810546875,
"logps/rejected": -234.91317749023438,
"loss": 0.5402,
"rewards/accuracies": 0.723437488079071,
"rewards/chosen": -0.1568536013364792,
"rewards/margins": 0.656032145023346,
"rewards/rejected": -0.8128856420516968,
"step": 1630
},
{
"epoch": 1.69,
"learning_rate": 2.418675851511672e-07,
"logits/chosen": -2.394106388092041,
"logits/rejected": -2.338951587677002,
"logps/chosen": -258.17071533203125,
"logps/rejected": -227.0476531982422,
"loss": 0.5266,
"rewards/accuracies": 0.7359374761581421,
"rewards/chosen": -0.18490514159202576,
"rewards/margins": 0.674010157585144,
"rewards/rejected": -0.8589152097702026,
"step": 1640
},
{
"epoch": 1.7,
"learning_rate": 2.399540757749713e-07,
"logits/chosen": -2.4031760692596436,
"logits/rejected": -2.371420383453369,
"logps/chosen": -252.31594848632812,
"logps/rejected": -225.9946746826172,
"loss": 0.5485,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.11656501144170761,
"rewards/margins": 0.6458471417427063,
"rewards/rejected": -0.7624121308326721,
"step": 1650
},
{
"epoch": 1.71,
"learning_rate": 2.3804056639877535e-07,
"logits/chosen": -2.3602213859558105,
"logits/rejected": -2.3286445140838623,
"logps/chosen": -269.58294677734375,
"logps/rejected": -239.6148681640625,
"loss": 0.5509,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.10988609492778778,
"rewards/margins": 0.6352638006210327,
"rewards/rejected": -0.7451499700546265,
"step": 1660
},
{
"epoch": 1.72,
"learning_rate": 2.361270570225794e-07,
"logits/chosen": -2.3772807121276855,
"logits/rejected": -2.3392374515533447,
"logps/chosen": -273.1993713378906,
"logps/rejected": -226.94155883789062,
"loss": 0.5544,
"rewards/accuracies": 0.7109375,
"rewards/chosen": -0.15547646582126617,
"rewards/margins": 0.6606963872909546,
"rewards/rejected": -0.8161728978157043,
"step": 1670
},
{
"epoch": 1.74,
"learning_rate": 2.3421354764638345e-07,
"logits/chosen": -2.339207172393799,
"logits/rejected": -2.2881035804748535,
"logps/chosen": -262.3006896972656,
"logps/rejected": -222.82565307617188,
"loss": 0.5392,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.1391618549823761,
"rewards/margins": 0.6765463948249817,
"rewards/rejected": -0.8157082796096802,
"step": 1680
},
{
"epoch": 1.75,
"learning_rate": 2.323000382701875e-07,
"logits/chosen": -2.36671781539917,
"logits/rejected": -2.30442476272583,
"logps/chosen": -262.6791076660156,
"logps/rejected": -223.51834106445312,
"loss": 0.5352,
"rewards/accuracies": 0.7171875238418579,
"rewards/chosen": -0.1453666090965271,
"rewards/margins": 0.6910194754600525,
"rewards/rejected": -0.8363860845565796,
"step": 1690
},
{
"epoch": 1.76,
"learning_rate": 2.3038652889399157e-07,
"logits/chosen": -2.377718448638916,
"logits/rejected": -2.3732407093048096,
"logps/chosen": -264.3118591308594,
"logps/rejected": -235.5894775390625,
"loss": 0.5258,
"rewards/accuracies": 0.7421875,
"rewards/chosen": -0.12053600698709488,
"rewards/margins": 0.6903436183929443,
"rewards/rejected": -0.8108797073364258,
"step": 1700
},
{
"epoch": 1.77,
"learning_rate": 2.2847301951779563e-07,
"logits/chosen": -2.4043807983398438,
"logits/rejected": -2.3661141395568848,
"logps/chosen": -273.5931701660156,
"logps/rejected": -232.48287963867188,
"loss": 0.535,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.140711709856987,
"rewards/margins": 0.7188085317611694,
"rewards/rejected": -0.8595201373100281,
"step": 1710
},
{
"epoch": 1.78,
"learning_rate": 2.265595101415997e-07,
"logits/chosen": -2.391242265701294,
"logits/rejected": -2.33647084236145,
"logps/chosen": -269.0169372558594,
"logps/rejected": -230.73583984375,
"loss": 0.5443,
"rewards/accuracies": 0.714062511920929,
"rewards/chosen": -0.14084765315055847,
"rewards/margins": 0.6776271462440491,
"rewards/rejected": -0.8184748888015747,
"step": 1720
},
{
"epoch": 1.79,
"learning_rate": 2.2464600076540373e-07,
"logits/chosen": -2.405012845993042,
"logits/rejected": -2.3291537761688232,
"logps/chosen": -279.62371826171875,
"logps/rejected": -237.05722045898438,
"loss": 0.554,
"rewards/accuracies": 0.715624988079071,
"rewards/chosen": -0.10567928850650787,
"rewards/margins": 0.6449233293533325,
"rewards/rejected": -0.750602662563324,
"step": 1730
},
{
"epoch": 1.8,
"learning_rate": 2.227324913892078e-07,
"logits/chosen": -2.3809354305267334,
"logits/rejected": -2.341770648956299,
"logps/chosen": -272.91741943359375,
"logps/rejected": -228.8494873046875,
"loss": 0.547,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.1115594357252121,
"rewards/margins": 0.6728307604789734,
"rewards/rejected": -0.7843901515007019,
"step": 1740
},
{
"epoch": 1.81,
"learning_rate": 2.2081898201301186e-07,
"logits/chosen": -2.344855546951294,
"logits/rejected": -2.347912549972534,
"logps/chosen": -284.1566162109375,
"logps/rejected": -242.9143524169922,
"loss": 0.559,
"rewards/accuracies": 0.7328125238418579,
"rewards/chosen": -0.12913444638252258,
"rewards/margins": 0.6051042675971985,
"rewards/rejected": -0.7342387437820435,
"step": 1750
},
{
"epoch": 1.82,
"learning_rate": 2.1890547263681592e-07,
"logits/chosen": -2.3368725776672363,
"logits/rejected": -2.3267197608947754,
"logps/chosen": -279.5101623535156,
"logps/rejected": -228.0315399169922,
"loss": 0.5412,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.123216912150383,
"rewards/margins": 0.6950885653495789,
"rewards/rejected": -0.818305492401123,
"step": 1760
},
{
"epoch": 1.83,
"learning_rate": 2.1699196326061998e-07,
"logits/chosen": -2.334354877471924,
"logits/rejected": -2.3555445671081543,
"logps/chosen": -272.8717956542969,
"logps/rejected": -230.3594207763672,
"loss": 0.5313,
"rewards/accuracies": 0.7359374761581421,
"rewards/chosen": -0.12218773365020752,
"rewards/margins": 0.7177630662918091,
"rewards/rejected": -0.8399508595466614,
"step": 1770
},
{
"epoch": 1.84,
"learning_rate": 2.1507845388442402e-07,
"logits/chosen": -2.4097609519958496,
"logits/rejected": -2.3510959148406982,
"logps/chosen": -276.52862548828125,
"logps/rejected": -218.99441528320312,
"loss": 0.518,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.06975328177213669,
"rewards/margins": 0.6969150304794312,
"rewards/rejected": -0.7666682600975037,
"step": 1780
},
{
"epoch": 1.85,
"learning_rate": 2.1316494450822808e-07,
"logits/chosen": -2.3507437705993652,
"logits/rejected": -2.3511948585510254,
"logps/chosen": -271.771240234375,
"logps/rejected": -231.90634155273438,
"loss": 0.5264,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.12042073160409927,
"rewards/margins": 0.7034494876861572,
"rewards/rejected": -0.8238701820373535,
"step": 1790
},
{
"epoch": 1.86,
"learning_rate": 2.1125143513203214e-07,
"logits/chosen": -2.4258570671081543,
"logits/rejected": -2.4029757976531982,
"logps/chosen": -264.3330078125,
"logps/rejected": -227.8314208984375,
"loss": 0.5476,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.10862596333026886,
"rewards/margins": 0.6536161303520203,
"rewards/rejected": -0.7622420787811279,
"step": 1800
},
{
"epoch": 1.87,
"learning_rate": 2.093379257558362e-07,
"logits/chosen": -2.4013142585754395,
"logits/rejected": -2.34897518157959,
"logps/chosen": -271.6585693359375,
"logps/rejected": -241.2907257080078,
"loss": 0.5611,
"rewards/accuracies": 0.698437511920929,
"rewards/chosen": -0.11902491748332977,
"rewards/margins": 0.6104603409767151,
"rewards/rejected": -0.7294851541519165,
"step": 1810
},
{
"epoch": 1.88,
"learning_rate": 2.0742441637964026e-07,
"logits/chosen": -2.386214256286621,
"logits/rejected": -2.33040452003479,
"logps/chosen": -245.88143920898438,
"logps/rejected": -216.9251251220703,
"loss": 0.5401,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.14682015776634216,
"rewards/margins": 0.6507130861282349,
"rewards/rejected": -0.7975332736968994,
"step": 1820
},
{
"epoch": 1.89,
"learning_rate": 2.055109070034443e-07,
"logits/chosen": -2.4217278957366943,
"logits/rejected": -2.3312575817108154,
"logps/chosen": -260.94085693359375,
"logps/rejected": -222.13607788085938,
"loss": 0.5446,
"rewards/accuracies": 0.745312511920929,
"rewards/chosen": -0.11108909547328949,
"rewards/margins": 0.6555716395378113,
"rewards/rejected": -0.7666608095169067,
"step": 1830
},
{
"epoch": 1.9,
"learning_rate": 2.0359739762724836e-07,
"logits/chosen": -2.406583309173584,
"logits/rejected": -2.3424503803253174,
"logps/chosen": -289.1400146484375,
"logps/rejected": -241.73513793945312,
"loss": 0.4941,
"rewards/accuracies": 0.7671874761581421,
"rewards/chosen": -0.09672559797763824,
"rewards/margins": 0.7783478498458862,
"rewards/rejected": -0.8750733137130737,
"step": 1840
},
{
"epoch": 1.91,
"learning_rate": 2.0168388825105242e-07,
"logits/chosen": -2.405856132507324,
"logits/rejected": -2.350475311279297,
"logps/chosen": -261.40814208984375,
"logps/rejected": -229.8692169189453,
"loss": 0.5521,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.15710802376270294,
"rewards/margins": 0.6592746376991272,
"rewards/rejected": -0.8163825869560242,
"step": 1850
},
{
"epoch": 1.92,
"learning_rate": 1.997703788748565e-07,
"logits/chosen": -2.3927724361419678,
"logits/rejected": -2.332962989807129,
"logps/chosen": -261.10699462890625,
"logps/rejected": -237.5717010498047,
"loss": 0.5378,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -0.1337103396654129,
"rewards/margins": 0.6999514102935791,
"rewards/rejected": -0.8336617350578308,
"step": 1860
},
{
"epoch": 1.93,
"learning_rate": 1.9785686949866055e-07,
"logits/chosen": -2.4174818992614746,
"logits/rejected": -2.361926317214966,
"logps/chosen": -275.9540710449219,
"logps/rejected": -229.05615234375,
"loss": 0.5198,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.1561015248298645,
"rewards/margins": 0.7133805155754089,
"rewards/rejected": -0.8694820404052734,
"step": 1870
},
{
"epoch": 1.94,
"learning_rate": 1.9594336012246458e-07,
"logits/chosen": -2.373378276824951,
"logits/rejected": -2.3580093383789062,
"logps/chosen": -263.26739501953125,
"logps/rejected": -229.62686157226562,
"loss": 0.5396,
"rewards/accuracies": 0.745312511920929,
"rewards/chosen": -0.15732435882091522,
"rewards/margins": 0.6500160098075867,
"rewards/rejected": -0.8073404431343079,
"step": 1880
},
{
"epoch": 1.95,
"learning_rate": 1.9402985074626865e-07,
"logits/chosen": -2.3866982460021973,
"logits/rejected": -2.3246593475341797,
"logps/chosen": -261.379150390625,
"logps/rejected": -227.70016479492188,
"loss": 0.5263,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.117561474442482,
"rewards/margins": 0.7048689723014832,
"rewards/rejected": -0.8224304914474487,
"step": 1890
},
{
"epoch": 1.96,
"learning_rate": 1.921163413700727e-07,
"logits/chosen": -2.4001078605651855,
"logits/rejected": -2.3805463314056396,
"logps/chosen": -281.5653381347656,
"logps/rejected": -243.9423828125,
"loss": 0.5362,
"rewards/accuracies": 0.7421875,
"rewards/chosen": -0.10007290542125702,
"rewards/margins": 0.7120274305343628,
"rewards/rejected": -0.8121002316474915,
"step": 1900
},
{
"epoch": 1.97,
"learning_rate": 1.9020283199387677e-07,
"logits/chosen": -2.353015899658203,
"logits/rejected": -2.3475286960601807,
"logps/chosen": -268.6228942871094,
"logps/rejected": -238.2252197265625,
"loss": 0.5172,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.11127477884292603,
"rewards/margins": 0.7399830222129822,
"rewards/rejected": -0.8512576818466187,
"step": 1910
},
{
"epoch": 1.98,
"learning_rate": 1.8828932261768083e-07,
"logits/chosen": -2.3818321228027344,
"logits/rejected": -2.3469431400299072,
"logps/chosen": -265.0734558105469,
"logps/rejected": -227.3889617919922,
"loss": 0.5273,
"rewards/accuracies": 0.7421875,
"rewards/chosen": -0.12172959744930267,
"rewards/margins": 0.716955304145813,
"rewards/rejected": -0.8386849164962769,
"step": 1920
},
{
"epoch": 1.99,
"learning_rate": 1.8637581324148487e-07,
"logits/chosen": -2.401777744293213,
"logits/rejected": -2.3709285259246826,
"logps/chosen": -268.879638671875,
"logps/rejected": -239.0655517578125,
"loss": 0.5587,
"rewards/accuracies": 0.692187488079071,
"rewards/chosen": -0.1592234969139099,
"rewards/margins": 0.6302945017814636,
"rewards/rejected": -0.7895179986953735,
"step": 1930
},
{
"epoch": 2.0,
"eval_logits/chosen": -2.042747735977173,
"eval_logits/rejected": -1.9887516498565674,
"eval_logps/chosen": -265.97637939453125,
"eval_logps/rejected": -232.0824737548828,
"eval_loss": 0.5326370596885681,
"eval_rewards/accuracies": 0.7294999957084656,
"eval_rewards/chosen": -0.14086098968982697,
"eval_rewards/margins": 0.7020561099052429,
"eval_rewards/rejected": -0.8429170250892639,
"eval_runtime": 1167.6557,
"eval_samples_per_second": 1.713,
"eval_steps_per_second": 0.428,
"step": 1936
},
{
"epoch": 2.0,
"learning_rate": 1.8446230386528893e-07,
"logits/chosen": -2.411083698272705,
"logits/rejected": -2.3344886302948,
"logps/chosen": -260.4184265136719,
"logps/rejected": -229.34713745117188,
"loss": 0.5528,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.18735817074775696,
"rewards/margins": 0.6163454055786133,
"rewards/rejected": -0.8037036061286926,
"step": 1940
},
{
"epoch": 2.01,
"learning_rate": 1.82548794489093e-07,
"logits/chosen": -2.391366481781006,
"logits/rejected": -2.3589439392089844,
"logps/chosen": -272.74444580078125,
"logps/rejected": -228.60281372070312,
"loss": 0.5464,
"rewards/accuracies": 0.7406250238418579,
"rewards/chosen": -0.15350715816020966,
"rewards/margins": 0.6585405468940735,
"rewards/rejected": -0.8120476603507996,
"step": 1950
},
{
"epoch": 2.02,
"learning_rate": 1.8063528511289706e-07,
"logits/chosen": -2.397200107574463,
"logits/rejected": -2.3327198028564453,
"logps/chosen": -258.4478759765625,
"logps/rejected": -224.2578582763672,
"loss": 0.5434,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.20096781849861145,
"rewards/margins": 0.657593846321106,
"rewards/rejected": -0.8585616946220398,
"step": 1960
},
{
"epoch": 2.03,
"learning_rate": 1.7872177573670112e-07,
"logits/chosen": -2.400557279586792,
"logits/rejected": -2.35810923576355,
"logps/chosen": -275.8924865722656,
"logps/rejected": -239.3294219970703,
"loss": 0.5145,
"rewards/accuracies": 0.7578125,
"rewards/chosen": -0.11401806026697159,
"rewards/margins": 0.747878909111023,
"rewards/rejected": -0.8618971109390259,
"step": 1970
},
{
"epoch": 2.04,
"learning_rate": 1.7680826636050515e-07,
"logits/chosen": -2.369227647781372,
"logits/rejected": -2.3667426109313965,
"logps/chosen": -257.553955078125,
"logps/rejected": -230.169677734375,
"loss": 0.5367,
"rewards/accuracies": 0.7328125238418579,
"rewards/chosen": -0.15207555890083313,
"rewards/margins": 0.6957732439041138,
"rewards/rejected": -0.8478488922119141,
"step": 1980
},
{
"epoch": 2.06,
"learning_rate": 1.7489475698430921e-07,
"logits/chosen": -2.372884511947632,
"logits/rejected": -2.3310484886169434,
"logps/chosen": -282.4217224121094,
"logps/rejected": -233.046875,
"loss": 0.5341,
"rewards/accuracies": 0.7203124761581421,
"rewards/chosen": -0.12857168912887573,
"rewards/margins": 0.752483606338501,
"rewards/rejected": -0.8810552358627319,
"step": 1990
},
{
"epoch": 2.07,
"learning_rate": 1.7298124760811328e-07,
"logits/chosen": -2.370082378387451,
"logits/rejected": -2.3288538455963135,
"logps/chosen": -253.7472686767578,
"logps/rejected": -234.3776092529297,
"loss": 0.5121,
"rewards/accuracies": 0.760937511920929,
"rewards/chosen": -0.08212677389383316,
"rewards/margins": 0.7719866633415222,
"rewards/rejected": -0.8541134595870972,
"step": 2000
},
{
"epoch": 2.08,
"learning_rate": 1.7106773823191734e-07,
"logits/chosen": -2.378678798675537,
"logits/rejected": -2.3208470344543457,
"logps/chosen": -267.8801574707031,
"logps/rejected": -231.2415771484375,
"loss": 0.573,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.16784097254276276,
"rewards/margins": 0.585302472114563,
"rewards/rejected": -0.7531434893608093,
"step": 2010
},
{
"epoch": 2.09,
"learning_rate": 1.691542288557214e-07,
"logits/chosen": -2.3666415214538574,
"logits/rejected": -2.316760540008545,
"logps/chosen": -260.22723388671875,
"logps/rejected": -225.22976684570312,
"loss": 0.5166,
"rewards/accuracies": 0.745312511920929,
"rewards/chosen": -0.12360270321369171,
"rewards/margins": 0.7310017347335815,
"rewards/rejected": -0.8546044230461121,
"step": 2020
},
{
"epoch": 2.1,
"learning_rate": 1.6724071947952544e-07,
"logits/chosen": -2.3446133136749268,
"logits/rejected": -2.2931389808654785,
"logps/chosen": -266.8133239746094,
"logps/rejected": -237.9119415283203,
"loss": 0.5278,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.11298196017742157,
"rewards/margins": 0.7080703973770142,
"rewards/rejected": -0.8210523724555969,
"step": 2030
},
{
"epoch": 2.11,
"learning_rate": 1.653272101033295e-07,
"logits/chosen": -2.408759832382202,
"logits/rejected": -2.363680362701416,
"logps/chosen": -262.7159118652344,
"logps/rejected": -228.66390991210938,
"loss": 0.5595,
"rewards/accuracies": 0.6953125,
"rewards/chosen": -0.14217299222946167,
"rewards/margins": 0.6589146852493286,
"rewards/rejected": -0.8010876774787903,
"step": 2040
},
{
"epoch": 2.12,
"learning_rate": 1.6341370072713356e-07,
"logits/chosen": -2.408491849899292,
"logits/rejected": -2.3210110664367676,
"logps/chosen": -246.5405731201172,
"logps/rejected": -223.0271453857422,
"loss": 0.5248,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.12794676423072815,
"rewards/margins": 0.7168751955032349,
"rewards/rejected": -0.8448219299316406,
"step": 2050
},
{
"epoch": 2.13,
"learning_rate": 1.6150019135093762e-07,
"logits/chosen": -2.3539464473724365,
"logits/rejected": -2.3444278240203857,
"logps/chosen": -278.1259765625,
"logps/rejected": -243.2495880126953,
"loss": 0.5504,
"rewards/accuracies": 0.7171875238418579,
"rewards/chosen": -0.12467856705188751,
"rewards/margins": 0.669019341468811,
"rewards/rejected": -0.7936979532241821,
"step": 2060
},
{
"epoch": 2.14,
"learning_rate": 1.5958668197474169e-07,
"logits/chosen": -2.3732540607452393,
"logits/rejected": -2.3456478118896484,
"logps/chosen": -286.5888366699219,
"logps/rejected": -231.79165649414062,
"loss": 0.5346,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.09783172607421875,
"rewards/margins": 0.705902099609375,
"rewards/rejected": -0.8037338256835938,
"step": 2070
},
{
"epoch": 2.15,
"learning_rate": 1.5767317259854572e-07,
"logits/chosen": -2.4102118015289307,
"logits/rejected": -2.3785674571990967,
"logps/chosen": -252.31881713867188,
"logps/rejected": -230.2682342529297,
"loss": 0.554,
"rewards/accuracies": 0.7265625,
"rewards/chosen": -0.15130464732646942,
"rewards/margins": 0.668793797492981,
"rewards/rejected": -0.8200985193252563,
"step": 2080
},
{
"epoch": 2.16,
"learning_rate": 1.5575966322234978e-07,
"logits/chosen": -2.3902785778045654,
"logits/rejected": -2.361997127532959,
"logps/chosen": -277.2994384765625,
"logps/rejected": -236.0117645263672,
"loss": 0.5371,
"rewards/accuracies": 0.723437488079071,
"rewards/chosen": -0.10212769359350204,
"rewards/margins": 0.7493409514427185,
"rewards/rejected": -0.8514686822891235,
"step": 2090
},
{
"epoch": 2.17,
"learning_rate": 1.5384615384615385e-07,
"logits/chosen": -2.371175765991211,
"logits/rejected": -2.340148687362671,
"logps/chosen": -283.32452392578125,
"logps/rejected": -234.09335327148438,
"loss": 0.5364,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.14591889083385468,
"rewards/margins": 0.7080722451210022,
"rewards/rejected": -0.8539912104606628,
"step": 2100
},
{
"epoch": 2.18,
"learning_rate": 1.519326444699579e-07,
"logits/chosen": -2.3838436603546143,
"logits/rejected": -2.368041515350342,
"logps/chosen": -277.5657653808594,
"logps/rejected": -240.91006469726562,
"loss": 0.5296,
"rewards/accuracies": 0.7328125238418579,
"rewards/chosen": -0.11766266822814941,
"rewards/margins": 0.6887077689170837,
"rewards/rejected": -0.8063703775405884,
"step": 2110
},
{
"epoch": 2.19,
"learning_rate": 1.5001913509376197e-07,
"logits/chosen": -2.4153029918670654,
"logits/rejected": -2.3472938537597656,
"logps/chosen": -260.43841552734375,
"logps/rejected": -222.5975799560547,
"loss": 0.5246,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.14667481184005737,
"rewards/margins": 0.7188171148300171,
"rewards/rejected": -0.8654918670654297,
"step": 2120
},
{
"epoch": 2.2,
"learning_rate": 1.4810562571756603e-07,
"logits/chosen": -2.3996524810791016,
"logits/rejected": -2.36572003364563,
"logps/chosen": -283.7561340332031,
"logps/rejected": -229.9889373779297,
"loss": 0.5135,
"rewards/accuracies": 0.7484375238418579,
"rewards/chosen": -0.12039141356945038,
"rewards/margins": 0.7521576285362244,
"rewards/rejected": -0.8725490570068359,
"step": 2130
},
{
"epoch": 2.21,
"learning_rate": 1.4619211634137007e-07,
"logits/chosen": -2.396955966949463,
"logits/rejected": -2.325171709060669,
"logps/chosen": -268.83880615234375,
"logps/rejected": -232.42672729492188,
"loss": 0.4993,
"rewards/accuracies": 0.7421875,
"rewards/chosen": -0.10067176818847656,
"rewards/margins": 0.7941768765449524,
"rewards/rejected": -0.894848644733429,
"step": 2140
},
{
"epoch": 2.22,
"learning_rate": 1.4427860696517413e-07,
"logits/chosen": -2.360407590866089,
"logits/rejected": -2.3728294372558594,
"logps/chosen": -262.50665283203125,
"logps/rejected": -244.90261840820312,
"loss": 0.5567,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": -0.15976184606552124,
"rewards/margins": 0.6574433445930481,
"rewards/rejected": -0.8172051310539246,
"step": 2150
},
{
"epoch": 2.23,
"learning_rate": 1.423650975889782e-07,
"logits/chosen": -2.3352928161621094,
"logits/rejected": -2.318737745285034,
"logps/chosen": -271.6351623535156,
"logps/rejected": -232.324951171875,
"loss": 0.5505,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -0.1446889042854309,
"rewards/margins": 0.6412814259529114,
"rewards/rejected": -0.7859703302383423,
"step": 2160
},
{
"epoch": 2.24,
"learning_rate": 1.4045158821278225e-07,
"logits/chosen": -2.396017074584961,
"logits/rejected": -2.3626606464385986,
"logps/chosen": -280.76287841796875,
"logps/rejected": -232.1551513671875,
"loss": 0.5443,
"rewards/accuracies": 0.707812488079071,
"rewards/chosen": -0.1374007910490036,
"rewards/margins": 0.6923818588256836,
"rewards/rejected": -0.8297826647758484,
"step": 2170
},
{
"epoch": 2.25,
"learning_rate": 1.3853807883658632e-07,
"logits/chosen": -2.3831605911254883,
"logits/rejected": -2.367901086807251,
"logps/chosen": -274.9002990722656,
"logps/rejected": -244.9043426513672,
"loss": 0.4998,
"rewards/accuracies": 0.7718750238418579,
"rewards/chosen": -0.06897449493408203,
"rewards/margins": 0.7879935503005981,
"rewards/rejected": -0.8569680452346802,
"step": 2180
},
{
"epoch": 2.26,
"learning_rate": 1.3662456946039035e-07,
"logits/chosen": -2.3475286960601807,
"logits/rejected": -2.3350141048431396,
"logps/chosen": -258.20428466796875,
"logps/rejected": -228.5579071044922,
"loss": 0.55,
"rewards/accuracies": 0.723437488079071,
"rewards/chosen": -0.14758525788784027,
"rewards/margins": 0.6847792267799377,
"rewards/rejected": -0.8323644399642944,
"step": 2190
},
{
"epoch": 2.27,
"learning_rate": 1.3471106008419441e-07,
"logits/chosen": -2.416398763656616,
"logits/rejected": -2.3340847492218018,
"logps/chosen": -263.5863952636719,
"logps/rejected": -223.93826293945312,
"loss": 0.54,
"rewards/accuracies": 0.7109375,
"rewards/chosen": -0.1121305376291275,
"rewards/margins": 0.6978212594985962,
"rewards/rejected": -0.8099517822265625,
"step": 2200
},
{
"epoch": 2.28,
"learning_rate": 1.3279755070799848e-07,
"logits/chosen": -2.3754360675811768,
"logits/rejected": -2.3295979499816895,
"logps/chosen": -261.3006896972656,
"logps/rejected": -228.99472045898438,
"loss": 0.5379,
"rewards/accuracies": 0.7578125,
"rewards/chosen": -0.12667515873908997,
"rewards/margins": 0.695867657661438,
"rewards/rejected": -0.8225427865982056,
"step": 2210
},
{
"epoch": 2.29,
"learning_rate": 1.3088404133180254e-07,
"logits/chosen": -2.373387575149536,
"logits/rejected": -2.3520331382751465,
"logps/chosen": -273.1501770019531,
"logps/rejected": -241.6131591796875,
"loss": 0.5105,
"rewards/accuracies": 0.770312488079071,
"rewards/chosen": -0.08919095993041992,
"rewards/margins": 0.7751600742340088,
"rewards/rejected": -0.8643510937690735,
"step": 2220
},
{
"epoch": 2.3,
"learning_rate": 1.289705319556066e-07,
"logits/chosen": -2.4029157161712646,
"logits/rejected": -2.3423054218292236,
"logps/chosen": -269.0888671875,
"logps/rejected": -238.63894653320312,
"loss": 0.5064,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.11739423125982285,
"rewards/margins": 0.8117318153381348,
"rewards/rejected": -0.9291261434555054,
"step": 2230
},
{
"epoch": 2.31,
"learning_rate": 1.2705702257941064e-07,
"logits/chosen": -2.3870110511779785,
"logits/rejected": -2.3228111267089844,
"logps/chosen": -247.74105834960938,
"logps/rejected": -220.97531127929688,
"loss": 0.5221,
"rewards/accuracies": 0.7406250238418579,
"rewards/chosen": -0.1162148267030716,
"rewards/margins": 0.7085736393928528,
"rewards/rejected": -0.8247883915901184,
"step": 2240
},
{
"epoch": 2.32,
"learning_rate": 1.251435132032147e-07,
"logits/chosen": -2.408937931060791,
"logits/rejected": -2.3306527137756348,
"logps/chosen": -299.36395263671875,
"logps/rejected": -241.8893585205078,
"loss": 0.543,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.11270508915185928,
"rewards/margins": 0.6921781897544861,
"rewards/rejected": -0.8048831820487976,
"step": 2250
},
{
"epoch": 2.33,
"learning_rate": 1.2323000382701873e-07,
"logits/chosen": -2.385676383972168,
"logits/rejected": -2.3467276096343994,
"logps/chosen": -281.59686279296875,
"logps/rejected": -241.01278686523438,
"loss": 0.5353,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.1093897670507431,
"rewards/margins": 0.7332038879394531,
"rewards/rejected": -0.842593789100647,
"step": 2260
},
{
"epoch": 2.34,
"learning_rate": 1.213164944508228e-07,
"logits/chosen": -2.373408794403076,
"logits/rejected": -2.319791316986084,
"logps/chosen": -261.96563720703125,
"logps/rejected": -234.7034149169922,
"loss": 0.5529,
"rewards/accuracies": 0.7109375,
"rewards/chosen": -0.11686080694198608,
"rewards/margins": 0.6834132671356201,
"rewards/rejected": -0.8002740740776062,
"step": 2270
},
{
"epoch": 2.35,
"learning_rate": 1.1940298507462686e-07,
"logits/chosen": -2.4085376262664795,
"logits/rejected": -2.3651652336120605,
"logps/chosen": -270.48358154296875,
"logps/rejected": -242.1610565185547,
"loss": 0.5305,
"rewards/accuracies": 0.770312488079071,
"rewards/chosen": -0.1516662836074829,
"rewards/margins": 0.7256360650062561,
"rewards/rejected": -0.877302348613739,
"step": 2280
},
{
"epoch": 2.37,
"learning_rate": 1.1748947569843092e-07,
"logits/chosen": -2.3058078289031982,
"logits/rejected": -2.2898011207580566,
"logps/chosen": -253.01205444335938,
"logps/rejected": -220.3304901123047,
"loss": 0.5347,
"rewards/accuracies": 0.745312511920929,
"rewards/chosen": -0.13418254256248474,
"rewards/margins": 0.7016364336013794,
"rewards/rejected": -0.835818886756897,
"step": 2290
},
{
"epoch": 2.38,
"learning_rate": 1.1557596632223497e-07,
"logits/chosen": -2.386352062225342,
"logits/rejected": -2.3113696575164795,
"logps/chosen": -269.7099304199219,
"logps/rejected": -221.75302124023438,
"loss": 0.5503,
"rewards/accuracies": 0.7203124761581421,
"rewards/chosen": -0.17483191192150116,
"rewards/margins": 0.7187283635139465,
"rewards/rejected": -0.8935602903366089,
"step": 2300
},
{
"epoch": 2.39,
"learning_rate": 1.1366245694603903e-07,
"logits/chosen": -2.3636221885681152,
"logits/rejected": -2.342933177947998,
"logps/chosen": -258.5984802246094,
"logps/rejected": -218.21240234375,
"loss": 0.5484,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.1345369666814804,
"rewards/margins": 0.6814537048339844,
"rewards/rejected": -0.8159906268119812,
"step": 2310
},
{
"epoch": 2.4,
"learning_rate": 1.1174894756984308e-07,
"logits/chosen": -2.370859384536743,
"logits/rejected": -2.3134427070617676,
"logps/chosen": -252.6942596435547,
"logps/rejected": -211.6784210205078,
"loss": 0.5317,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.1507762372493744,
"rewards/margins": 0.69977205991745,
"rewards/rejected": -0.8505484461784363,
"step": 2320
},
{
"epoch": 2.41,
"learning_rate": 1.0983543819364714e-07,
"logits/chosen": -2.4157214164733887,
"logits/rejected": -2.365856885910034,
"logps/chosen": -278.7106628417969,
"logps/rejected": -237.4716796875,
"loss": 0.538,
"rewards/accuracies": 0.746874988079071,
"rewards/chosen": -0.13206318020820618,
"rewards/margins": 0.7201939821243286,
"rewards/rejected": -0.8522570729255676,
"step": 2330
},
{
"epoch": 2.42,
"learning_rate": 1.079219288174512e-07,
"logits/chosen": -2.3600049018859863,
"logits/rejected": -2.306662082672119,
"logps/chosen": -261.60443115234375,
"logps/rejected": -243.1952362060547,
"loss": 0.5475,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -0.16204313933849335,
"rewards/margins": 0.6782156825065613,
"rewards/rejected": -0.8402588963508606,
"step": 2340
},
{
"epoch": 2.43,
"learning_rate": 1.0600841944125525e-07,
"logits/chosen": -2.4186596870422363,
"logits/rejected": -2.345165729522705,
"logps/chosen": -258.1711730957031,
"logps/rejected": -228.2469024658203,
"loss": 0.5108,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.13342900574207306,
"rewards/margins": 0.770829439163208,
"rewards/rejected": -0.9042585492134094,
"step": 2350
},
{
"epoch": 2.44,
"learning_rate": 1.0409491006505931e-07,
"logits/chosen": -2.3762617111206055,
"logits/rejected": -2.3277175426483154,
"logps/chosen": -266.79815673828125,
"logps/rejected": -228.57821655273438,
"loss": 0.5197,
"rewards/accuracies": 0.739062488079071,
"rewards/chosen": -0.16075488924980164,
"rewards/margins": 0.7380831837654114,
"rewards/rejected": -0.8988380432128906,
"step": 2360
},
{
"epoch": 2.45,
"learning_rate": 1.0218140068886336e-07,
"logits/chosen": -2.3823940753936768,
"logits/rejected": -2.307152271270752,
"logps/chosen": -267.8171691894531,
"logps/rejected": -229.06973266601562,
"loss": 0.5275,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.13006095588207245,
"rewards/margins": 0.7317984700202942,
"rewards/rejected": -0.8618593215942383,
"step": 2370
},
{
"epoch": 2.46,
"learning_rate": 1.0026789131266743e-07,
"logits/chosen": -2.389812469482422,
"logits/rejected": -2.358701229095459,
"logps/chosen": -259.939453125,
"logps/rejected": -227.9673309326172,
"loss": 0.5258,
"rewards/accuracies": 0.746874988079071,
"rewards/chosen": -0.14463508129119873,
"rewards/margins": 0.6937167644500732,
"rewards/rejected": -0.8383519053459167,
"step": 2380
},
{
"epoch": 2.47,
"learning_rate": 9.835438193647149e-08,
"logits/chosen": -2.3749680519104004,
"logits/rejected": -2.325307846069336,
"logps/chosen": -261.16265869140625,
"logps/rejected": -235.45510864257812,
"loss": 0.5166,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.10085193812847137,
"rewards/margins": 0.7839605212211609,
"rewards/rejected": -0.8848124742507935,
"step": 2390
},
{
"epoch": 2.48,
"learning_rate": 9.644087256027554e-08,
"logits/chosen": -2.399411201477051,
"logits/rejected": -2.3411877155303955,
"logps/chosen": -270.646728515625,
"logps/rejected": -242.7877655029297,
"loss": 0.5583,
"rewards/accuracies": 0.7203124761581421,
"rewards/chosen": -0.1165170818567276,
"rewards/margins": 0.7015627026557922,
"rewards/rejected": -0.8180797696113586,
"step": 2400
},
{
"epoch": 2.49,
"learning_rate": 9.45273631840796e-08,
"logits/chosen": -2.378415584564209,
"logits/rejected": -2.3074827194213867,
"logps/chosen": -256.1658630371094,
"logps/rejected": -215.31173706054688,
"loss": 0.5382,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.16168564558029175,
"rewards/margins": 0.6914165616035461,
"rewards/rejected": -0.8531022071838379,
"step": 2410
},
{
"epoch": 2.5,
"learning_rate": 9.261385380788366e-08,
"logits/chosen": -2.4177417755126953,
"logits/rejected": -2.3251852989196777,
"logps/chosen": -267.59588623046875,
"logps/rejected": -226.686279296875,
"loss": 0.5423,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.16102801263332367,
"rewards/margins": 0.7026554942131042,
"rewards/rejected": -0.8636835813522339,
"step": 2420
},
{
"epoch": 2.51,
"learning_rate": 9.070034443168771e-08,
"logits/chosen": -2.3578057289123535,
"logits/rejected": -2.3223681449890137,
"logps/chosen": -263.61029052734375,
"logps/rejected": -225.79733276367188,
"loss": 0.5397,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.19550970196723938,
"rewards/margins": 0.7167800664901733,
"rewards/rejected": -0.9122897386550903,
"step": 2430
},
{
"epoch": 2.52,
"learning_rate": 8.878683505549177e-08,
"logits/chosen": -2.3730854988098145,
"logits/rejected": -2.358013153076172,
"logps/chosen": -274.9963073730469,
"logps/rejected": -241.24533081054688,
"loss": 0.5493,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.1562972366809845,
"rewards/margins": 0.6839101314544678,
"rewards/rejected": -0.8402072787284851,
"step": 2440
},
{
"epoch": 2.53,
"learning_rate": 8.687332567929582e-08,
"logits/chosen": -2.353519916534424,
"logits/rejected": -2.3354268074035645,
"logps/chosen": -267.2091064453125,
"logps/rejected": -218.87997436523438,
"loss": 0.5189,
"rewards/accuracies": 0.7109375,
"rewards/chosen": -0.12245980650186539,
"rewards/margins": 0.7322528958320618,
"rewards/rejected": -0.8547126650810242,
"step": 2450
},
{
"epoch": 2.54,
"learning_rate": 8.495981630309988e-08,
"logits/chosen": -2.3733015060424805,
"logits/rejected": -2.310149669647217,
"logps/chosen": -262.130126953125,
"logps/rejected": -235.95108032226562,
"loss": 0.5279,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.1203254908323288,
"rewards/margins": 0.7111250162124634,
"rewards/rejected": -0.8314505815505981,
"step": 2460
},
{
"epoch": 2.55,
"learning_rate": 8.304630692690395e-08,
"logits/chosen": -2.3978214263916016,
"logits/rejected": -2.358588218688965,
"logps/chosen": -283.1036682128906,
"logps/rejected": -232.8982391357422,
"loss": 0.5155,
"rewards/accuracies": 0.746874988079071,
"rewards/chosen": -0.09982401877641678,
"rewards/margins": 0.7901795506477356,
"rewards/rejected": -0.8900035619735718,
"step": 2470
},
{
"epoch": 2.56,
"learning_rate": 8.1132797550708e-08,
"logits/chosen": -2.3774914741516113,
"logits/rejected": -2.3199660778045654,
"logps/chosen": -270.4402160644531,
"logps/rejected": -229.8076934814453,
"loss": 0.5217,
"rewards/accuracies": 0.7578125,
"rewards/chosen": -0.1376962959766388,
"rewards/margins": 0.7224219441413879,
"rewards/rejected": -0.8601182699203491,
"step": 2480
},
{
"epoch": 2.57,
"learning_rate": 7.921928817451206e-08,
"logits/chosen": -2.3702144622802734,
"logits/rejected": -2.3372480869293213,
"logps/chosen": -272.0224609375,
"logps/rejected": -220.9506072998047,
"loss": 0.4807,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.062098145484924316,
"rewards/margins": 0.82757568359375,
"rewards/rejected": -0.8896737098693848,
"step": 2490
},
{
"epoch": 2.58,
"learning_rate": 7.73057787983161e-08,
"logits/chosen": -2.3614370822906494,
"logits/rejected": -2.3565754890441895,
"logps/chosen": -270.15325927734375,
"logps/rejected": -231.0701141357422,
"loss": 0.5093,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.09890525788068771,
"rewards/margins": 0.8027753829956055,
"rewards/rejected": -0.901680588722229,
"step": 2500
},
{
"epoch": 2.59,
"learning_rate": 7.539226942212017e-08,
"logits/chosen": -2.379781484603882,
"logits/rejected": -2.3308448791503906,
"logps/chosen": -271.2726135253906,
"logps/rejected": -247.5769805908203,
"loss": 0.5444,
"rewards/accuracies": 0.7109375,
"rewards/chosen": -0.13810952007770538,
"rewards/margins": 0.7006896138191223,
"rewards/rejected": -0.8387991189956665,
"step": 2510
},
{
"epoch": 2.6,
"learning_rate": 7.347876004592423e-08,
"logits/chosen": -2.4164352416992188,
"logits/rejected": -2.363954782485962,
"logps/chosen": -271.45989990234375,
"logps/rejected": -234.0578155517578,
"loss": 0.526,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.17485982179641724,
"rewards/margins": 0.7395257949829102,
"rewards/rejected": -0.9143856167793274,
"step": 2520
},
{
"epoch": 2.61,
"learning_rate": 7.156525066972828e-08,
"logits/chosen": -2.429539918899536,
"logits/rejected": -2.355285882949829,
"logps/chosen": -284.6403503417969,
"logps/rejected": -238.6908721923828,
"loss": 0.5199,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.11224106699228287,
"rewards/margins": 0.7983044385910034,
"rewards/rejected": -0.9105455279350281,
"step": 2530
},
{
"epoch": 2.62,
"learning_rate": 6.965174129353234e-08,
"logits/chosen": -2.368342876434326,
"logits/rejected": -2.3081254959106445,
"logps/chosen": -260.9881286621094,
"logps/rejected": -239.78683471679688,
"loss": 0.5422,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.15342268347740173,
"rewards/margins": 0.718788743019104,
"rewards/rejected": -0.8722113370895386,
"step": 2540
},
{
"epoch": 2.63,
"learning_rate": 6.773823191733639e-08,
"logits/chosen": -2.3808670043945312,
"logits/rejected": -2.32783842086792,
"logps/chosen": -272.7002868652344,
"logps/rejected": -221.1647186279297,
"loss": 0.5194,
"rewards/accuracies": 0.745312511920929,
"rewards/chosen": -0.1014653667807579,
"rewards/margins": 0.7284099459648132,
"rewards/rejected": -0.8298752903938293,
"step": 2550
},
{
"epoch": 2.64,
"learning_rate": 6.582472254114045e-08,
"logits/chosen": -2.453993320465088,
"logits/rejected": -2.3969106674194336,
"logps/chosen": -275.2949523925781,
"logps/rejected": -238.8881378173828,
"loss": 0.5372,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.09556527435779572,
"rewards/margins": 0.740602433681488,
"rewards/rejected": -0.8361676931381226,
"step": 2560
},
{
"epoch": 2.65,
"learning_rate": 6.391121316494451e-08,
"logits/chosen": -2.3907103538513184,
"logits/rejected": -2.350787878036499,
"logps/chosen": -250.9322967529297,
"logps/rejected": -234.1465606689453,
"loss": 0.5312,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.15345308184623718,
"rewards/margins": 0.7323213815689087,
"rewards/rejected": -0.8857744336128235,
"step": 2570
},
{
"epoch": 2.66,
"learning_rate": 6.199770378874856e-08,
"logits/chosen": -2.387080669403076,
"logits/rejected": -2.35870623588562,
"logps/chosen": -269.1571350097656,
"logps/rejected": -229.3518524169922,
"loss": 0.5205,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.12354423105716705,
"rewards/margins": 0.7709532380104065,
"rewards/rejected": -0.8944975137710571,
"step": 2580
},
{
"epoch": 2.68,
"learning_rate": 6.008419441255262e-08,
"logits/chosen": -2.398855209350586,
"logits/rejected": -2.381904125213623,
"logps/chosen": -263.2884521484375,
"logps/rejected": -231.7559051513672,
"loss": 0.5259,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.05873938649892807,
"rewards/margins": 0.7153197526931763,
"rewards/rejected": -0.7740591168403625,
"step": 2590
},
{
"epoch": 2.69,
"learning_rate": 5.817068503635668e-08,
"logits/chosen": -2.376080274581909,
"logits/rejected": -2.316380739212036,
"logps/chosen": -281.10455322265625,
"logps/rejected": -218.64511108398438,
"loss": 0.5151,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.09104409068822861,
"rewards/margins": 0.7734732627868652,
"rewards/rejected": -0.864517331123352,
"step": 2600
},
{
"epoch": 2.7,
"learning_rate": 5.6257175660160735e-08,
"logits/chosen": -2.380017042160034,
"logits/rejected": -2.3436522483825684,
"logps/chosen": -273.26165771484375,
"logps/rejected": -228.38821411132812,
"loss": 0.5224,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.13183800876140594,
"rewards/margins": 0.7871755957603455,
"rewards/rejected": -0.9190136194229126,
"step": 2610
},
{
"epoch": 2.71,
"learning_rate": 5.4343666283964784e-08,
"logits/chosen": -2.355607509613037,
"logits/rejected": -2.3353257179260254,
"logps/chosen": -262.3599548339844,
"logps/rejected": -226.27297973632812,
"loss": 0.5438,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.16399501264095306,
"rewards/margins": 0.6985915899276733,
"rewards/rejected": -0.8625866174697876,
"step": 2620
},
{
"epoch": 2.72,
"learning_rate": 5.243015690776884e-08,
"logits/chosen": -2.3956310749053955,
"logits/rejected": -2.3475804328918457,
"logps/chosen": -264.69793701171875,
"logps/rejected": -217.0175323486328,
"loss": 0.5229,
"rewards/accuracies": 0.729687511920929,
"rewards/chosen": -0.12037453800439835,
"rewards/margins": 0.7271707653999329,
"rewards/rejected": -0.8475452661514282,
"step": 2630
},
{
"epoch": 2.73,
"learning_rate": 5.05166475315729e-08,
"logits/chosen": -2.327115297317505,
"logits/rejected": -2.3179469108581543,
"logps/chosen": -250.42251586914062,
"logps/rejected": -225.97705078125,
"loss": 0.5338,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.1434091329574585,
"rewards/margins": 0.6914544701576233,
"rewards/rejected": -0.8348636627197266,
"step": 2640
},
{
"epoch": 2.74,
"learning_rate": 4.860313815537696e-08,
"logits/chosen": -2.4228968620300293,
"logits/rejected": -2.358617067337036,
"logps/chosen": -266.973388671875,
"logps/rejected": -219.0054168701172,
"loss": 0.5307,
"rewards/accuracies": 0.7265625,
"rewards/chosen": -0.08964172005653381,
"rewards/margins": 0.7375173568725586,
"rewards/rejected": -0.8271591067314148,
"step": 2650
},
{
"epoch": 2.75,
"learning_rate": 4.668962877918101e-08,
"logits/chosen": -2.3782241344451904,
"logits/rejected": -2.3420677185058594,
"logps/chosen": -271.78472900390625,
"logps/rejected": -224.3458251953125,
"loss": 0.4925,
"rewards/accuracies": 0.753125011920929,
"rewards/chosen": -0.07608253508806229,
"rewards/margins": 0.8127967715263367,
"rewards/rejected": -0.8888792991638184,
"step": 2660
},
{
"epoch": 2.76,
"learning_rate": 4.477611940298507e-08,
"logits/chosen": -2.362567901611328,
"logits/rejected": -2.3487753868103027,
"logps/chosen": -260.4725646972656,
"logps/rejected": -230.2348175048828,
"loss": 0.504,
"rewards/accuracies": 0.754687488079071,
"rewards/chosen": -0.11674080789089203,
"rewards/margins": 0.8006342649459839,
"rewards/rejected": -0.9173750877380371,
"step": 2670
},
{
"epoch": 2.77,
"learning_rate": 4.2862610026789124e-08,
"logits/chosen": -2.368887186050415,
"logits/rejected": -2.3095037937164307,
"logps/chosen": -267.6027526855469,
"logps/rejected": -227.1664276123047,
"loss": 0.5355,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.12747621536254883,
"rewards/margins": 0.7349743843078613,
"rewards/rejected": -0.8624505996704102,
"step": 2680
},
{
"epoch": 2.78,
"learning_rate": 4.0949100650593186e-08,
"logits/chosen": -2.4295105934143066,
"logits/rejected": -2.3712687492370605,
"logps/chosen": -271.1334228515625,
"logps/rejected": -226.98959350585938,
"loss": 0.5366,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.14556117355823517,
"rewards/margins": 0.7037054896354675,
"rewards/rejected": -0.8492666482925415,
"step": 2690
},
{
"epoch": 2.79,
"learning_rate": 3.903559127439724e-08,
"logits/chosen": -2.404041290283203,
"logits/rejected": -2.3408515453338623,
"logps/chosen": -271.45184326171875,
"logps/rejected": -231.26318359375,
"loss": 0.5223,
"rewards/accuracies": 0.754687488079071,
"rewards/chosen": -0.15005668997764587,
"rewards/margins": 0.7375911474227905,
"rewards/rejected": -0.887647807598114,
"step": 2700
},
{
"epoch": 2.8,
"learning_rate": 3.71220818982013e-08,
"logits/chosen": -2.4113287925720215,
"logits/rejected": -2.363337993621826,
"logps/chosen": -279.56695556640625,
"logps/rejected": -228.7524871826172,
"loss": 0.5678,
"rewards/accuracies": 0.6890624761581421,
"rewards/chosen": -0.18398186564445496,
"rewards/margins": 0.6596510410308838,
"rewards/rejected": -0.8436328768730164,
"step": 2710
},
{
"epoch": 2.81,
"learning_rate": 3.520857252200535e-08,
"logits/chosen": -2.4288249015808105,
"logits/rejected": -2.3564791679382324,
"logps/chosen": -271.6515808105469,
"logps/rejected": -229.5021514892578,
"loss": 0.5407,
"rewards/accuracies": 0.7171875238418579,
"rewards/chosen": -0.18123161792755127,
"rewards/margins": 0.7020525932312012,
"rewards/rejected": -0.8832842111587524,
"step": 2720
},
{
"epoch": 2.82,
"learning_rate": 3.3295063145809414e-08,
"logits/chosen": -2.3590943813323975,
"logits/rejected": -2.322199583053589,
"logps/chosen": -273.1612854003906,
"logps/rejected": -253.64633178710938,
"loss": 0.5437,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.17031243443489075,
"rewards/margins": 0.713148295879364,
"rewards/rejected": -0.8834608197212219,
"step": 2730
},
{
"epoch": 2.83,
"learning_rate": 3.138155376961347e-08,
"logits/chosen": -2.3528659343719482,
"logits/rejected": -2.3328776359558105,
"logps/chosen": -256.59613037109375,
"logps/rejected": -226.8491973876953,
"loss": 0.5234,
"rewards/accuracies": 0.723437488079071,
"rewards/chosen": -0.12790945172309875,
"rewards/margins": 0.7292603254318237,
"rewards/rejected": -0.8571697473526001,
"step": 2740
},
{
"epoch": 2.84,
"learning_rate": 2.9468044393417525e-08,
"logits/chosen": -2.332599639892578,
"logits/rejected": -2.328411340713501,
"logps/chosen": -260.6733093261719,
"logps/rejected": -226.01119995117188,
"loss": 0.5406,
"rewards/accuracies": 0.7203124761581421,
"rewards/chosen": -0.1711007058620453,
"rewards/margins": 0.6720742583274841,
"rewards/rejected": -0.8431750535964966,
"step": 2750
},
{
"epoch": 2.85,
"learning_rate": 2.755453501722158e-08,
"logits/chosen": -2.3848772048950195,
"logits/rejected": -2.346205949783325,
"logps/chosen": -268.3501281738281,
"logps/rejected": -224.84347534179688,
"loss": 0.5294,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.13995392620563507,
"rewards/margins": 0.7762855887413025,
"rewards/rejected": -0.9162395596504211,
"step": 2760
},
{
"epoch": 2.86,
"learning_rate": 2.564102564102564e-08,
"logits/chosen": -2.38297438621521,
"logits/rejected": -2.3261475563049316,
"logps/chosen": -265.07781982421875,
"logps/rejected": -244.471923828125,
"loss": 0.5524,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.13383716344833374,
"rewards/margins": 0.6434152722358704,
"rewards/rejected": -0.7772524952888489,
"step": 2770
},
{
"epoch": 2.87,
"learning_rate": 2.3727516264829695e-08,
"logits/chosen": -2.3448472023010254,
"logits/rejected": -2.3202641010284424,
"logps/chosen": -266.0987854003906,
"logps/rejected": -228.6033172607422,
"loss": 0.5201,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.1439387947320938,
"rewards/margins": 0.7299402952194214,
"rewards/rejected": -0.873879075050354,
"step": 2780
},
{
"epoch": 2.88,
"learning_rate": 2.1814006888633754e-08,
"logits/chosen": -2.355379104614258,
"logits/rejected": -2.3448832035064697,
"logps/chosen": -268.690185546875,
"logps/rejected": -234.4865264892578,
"loss": 0.5581,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.17621631920337677,
"rewards/margins": 0.6596941351890564,
"rewards/rejected": -0.835910439491272,
"step": 2790
},
{
"epoch": 2.89,
"learning_rate": 1.990049751243781e-08,
"logits/chosen": -2.355900287628174,
"logits/rejected": -2.32261061668396,
"logps/chosen": -264.06536865234375,
"logps/rejected": -232.2172088623047,
"loss": 0.5227,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.11503295600414276,
"rewards/margins": 0.7390708327293396,
"rewards/rejected": -0.8541038632392883,
"step": 2800
},
{
"epoch": 2.9,
"learning_rate": 1.7986988136241865e-08,
"logits/chosen": -2.380585193634033,
"logits/rejected": -2.325172185897827,
"logps/chosen": -271.6562805175781,
"logps/rejected": -234.0508575439453,
"loss": 0.5377,
"rewards/accuracies": 0.7203124761581421,
"rewards/chosen": -0.16687723994255066,
"rewards/margins": 0.6961434483528137,
"rewards/rejected": -0.8630207180976868,
"step": 2810
},
{
"epoch": 2.91,
"learning_rate": 1.6073478760045924e-08,
"logits/chosen": -2.3646774291992188,
"logits/rejected": -2.3574256896972656,
"logps/chosen": -282.1201171875,
"logps/rejected": -234.2088165283203,
"loss": 0.5145,
"rewards/accuracies": 0.7421875,
"rewards/chosen": -0.048953305929899216,
"rewards/margins": 0.788312554359436,
"rewards/rejected": -0.8372658491134644,
"step": 2820
},
{
"epoch": 2.92,
"learning_rate": 1.4159969383849981e-08,
"logits/chosen": -2.371241569519043,
"logits/rejected": -2.355045795440674,
"logps/chosen": -280.1076965332031,
"logps/rejected": -234.8966522216797,
"loss": 0.5564,
"rewards/accuracies": 0.7109375,
"rewards/chosen": -0.15984012186527252,
"rewards/margins": 0.6876562833786011,
"rewards/rejected": -0.8474963903427124,
"step": 2830
},
{
"epoch": 2.93,
"learning_rate": 1.2246460007654037e-08,
"logits/chosen": -2.360264778137207,
"logits/rejected": -2.332968235015869,
"logps/chosen": -278.0101013183594,
"logps/rejected": -239.9487762451172,
"loss": 0.5575,
"rewards/accuracies": 0.714062511920929,
"rewards/chosen": -0.13736246526241302,
"rewards/margins": 0.6766383051872253,
"rewards/rejected": -0.8140007853507996,
"step": 2840
},
{
"epoch": 2.94,
"learning_rate": 1.0332950631458094e-08,
"logits/chosen": -2.400036334991455,
"logits/rejected": -2.3746438026428223,
"logps/chosen": -267.7570495605469,
"logps/rejected": -229.16140747070312,
"loss": 0.5313,
"rewards/accuracies": 0.7484375238418579,
"rewards/chosen": -0.12042717635631561,
"rewards/margins": 0.7909914255142212,
"rewards/rejected": -0.9114185571670532,
"step": 2850
},
{
"epoch": 2.95,
"learning_rate": 8.419441255262151e-09,
"logits/chosen": -2.3523342609405518,
"logits/rejected": -2.3188953399658203,
"logps/chosen": -260.3684387207031,
"logps/rejected": -233.06326293945312,
"loss": 0.5271,
"rewards/accuracies": 0.7328125238418579,
"rewards/chosen": -0.15820932388305664,
"rewards/margins": 0.7365429997444153,
"rewards/rejected": -0.8947523236274719,
"step": 2860
},
{
"epoch": 2.96,
"learning_rate": 6.505931879066207e-09,
"logits/chosen": -2.3432793617248535,
"logits/rejected": -2.33192777633667,
"logps/chosen": -278.02117919921875,
"logps/rejected": -233.4646453857422,
"loss": 0.5247,
"rewards/accuracies": 0.739062488079071,
"rewards/chosen": -0.08122755587100983,
"rewards/margins": 0.7956343293190002,
"rewards/rejected": -0.8768618702888489,
"step": 2870
},
{
"epoch": 2.97,
"learning_rate": 4.592422502870264e-09,
"logits/chosen": -2.4073646068573,
"logits/rejected": -2.375094175338745,
"logps/chosen": -280.04608154296875,
"logps/rejected": -233.2005615234375,
"loss": 0.5261,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.13482233881950378,
"rewards/margins": 0.7431889772415161,
"rewards/rejected": -0.8780113458633423,
"step": 2880
},
{
"epoch": 2.98,
"learning_rate": 2.6789131266743202e-09,
"logits/chosen": -2.374481439590454,
"logits/rejected": -2.320697784423828,
"logps/chosen": -255.5072784423828,
"logps/rejected": -207.7611083984375,
"loss": 0.5271,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.14782702922821045,
"rewards/margins": 0.7393444180488586,
"rewards/rejected": -0.8871713876724243,
"step": 2890
},
{
"epoch": 3.0,
"learning_rate": 7.654037504783773e-10,
"logits/chosen": -2.381277561187744,
"logits/rejected": -2.313739061355591,
"logps/chosen": -267.82568359375,
"logps/rejected": -234.2742156982422,
"loss": 0.5194,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.10935642570257187,
"rewards/margins": 0.7639234662055969,
"rewards/rejected": -0.873279869556427,
"step": 2900
},
{
"epoch": 3.0,
"eval_logits/chosen": -2.0344715118408203,
"eval_logits/rejected": -1.9804012775421143,
"eval_logps/chosen": -265.97662353515625,
"eval_logps/rejected": -232.47203063964844,
"eval_loss": 0.5272051095962524,
"eval_rewards/accuracies": 0.734000027179718,
"eval_rewards/chosen": -0.1408846527338028,
"eval_rewards/margins": 0.7409887909889221,
"eval_rewards/rejected": -0.8818734884262085,
"eval_runtime": 1090.2134,
"eval_samples_per_second": 1.835,
"eval_steps_per_second": 0.459,
"step": 2904
},
{
"epoch": 3.0,
"step": 2904,
"total_flos": 0.0,
"train_loss": 0.5639242924154626,
"train_runtime": 165279.5111,
"train_samples_per_second": 1.125,
"train_steps_per_second": 0.018
}
],
"logging_steps": 10,
"max_steps": 2904,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}