|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9993222089532967, |
|
"eval_steps": 100, |
|
"global_step": 2904, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.7182130584192438e-09, |
|
"logits/chosen": -2.447075843811035, |
|
"logits/rejected": -2.526996612548828, |
|
"logps/chosen": -235.39663696289062, |
|
"logps/rejected": -214.08815002441406, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.718213058419244e-08, |
|
"logits/chosen": -2.487886667251587, |
|
"logits/rejected": -2.427130699157715, |
|
"logps/chosen": -280.10888671875, |
|
"logps/rejected": -230.16168212890625, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/chosen": 0.0025838064029812813, |
|
"rewards/margins": 0.0049818274565041065, |
|
"rewards/rejected": -0.0023980215191841125, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.436426116838488e-08, |
|
"logits/chosen": -2.41877818107605, |
|
"logits/rejected": -2.356771230697632, |
|
"logps/chosen": -255.56265258789062, |
|
"logps/rejected": -226.37399291992188, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.503125011920929, |
|
"rewards/chosen": 0.001528903958387673, |
|
"rewards/margins": 0.0006666237604804337, |
|
"rewards/rejected": 0.0008622803725302219, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.154639175257731e-08, |
|
"logits/chosen": -2.42828369140625, |
|
"logits/rejected": -2.4059910774230957, |
|
"logps/chosen": -272.57012939453125, |
|
"logps/rejected": -227.35250854492188, |
|
"loss": 0.6945, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": -0.001070805243216455, |
|
"rewards/margins": -0.0018140410538762808, |
|
"rewards/rejected": 0.000743235694244504, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.872852233676976e-08, |
|
"logits/chosen": -2.425325870513916, |
|
"logits/rejected": -2.374124050140381, |
|
"logps/chosen": -249.1795654296875, |
|
"logps/rejected": -220.6439971923828, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.520312488079071, |
|
"rewards/chosen": 0.0025015759747475386, |
|
"rewards/margins": 8.866000280249864e-05, |
|
"rewards/rejected": 0.0024129163939505816, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.59106529209622e-08, |
|
"logits/chosen": -2.4614310264587402, |
|
"logits/rejected": -2.416882038116455, |
|
"logps/chosen": -259.7109680175781, |
|
"logps/rejected": -220.2974090576172, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": 0.0015415346715599298, |
|
"rewards/margins": 0.003707319498062134, |
|
"rewards/rejected": -0.0021657845936715603, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0309278350515462e-07, |
|
"logits/chosen": -2.462627649307251, |
|
"logits/rejected": -2.4049839973449707, |
|
"logps/chosen": -259.0118713378906, |
|
"logps/rejected": -228.43917846679688, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.002671582391485572, |
|
"rewards/margins": 0.0019277830142527819, |
|
"rewards/rejected": 0.0007437997264787555, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.202749140893471e-07, |
|
"logits/chosen": -2.4417717456817627, |
|
"logits/rejected": -2.4220786094665527, |
|
"logps/chosen": -267.39825439453125, |
|
"logps/rejected": -210.96157836914062, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.503125011920929, |
|
"rewards/chosen": 0.001063968287780881, |
|
"rewards/margins": 0.002977523719891906, |
|
"rewards/rejected": -0.001913555315695703, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3745704467353952e-07, |
|
"logits/chosen": -2.453876495361328, |
|
"logits/rejected": -2.3886351585388184, |
|
"logps/chosen": -280.5273132324219, |
|
"logps/rejected": -225.0200653076172, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": 0.0006787125021219254, |
|
"rewards/margins": 0.0013104949612170458, |
|
"rewards/rejected": -0.0006317828083410859, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.5463917525773197e-07, |
|
"logits/chosen": -2.4767956733703613, |
|
"logits/rejected": -2.3978798389434814, |
|
"logps/chosen": -271.4781799316406, |
|
"logps/rejected": -231.6018524169922, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.503125011920929, |
|
"rewards/chosen": -0.00017936174117494375, |
|
"rewards/margins": 0.0006834475207142532, |
|
"rewards/rejected": -0.0008628091891296208, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.718213058419244e-07, |
|
"logits/chosen": -2.4933345317840576, |
|
"logits/rejected": -2.397916555404663, |
|
"logps/chosen": -265.00872802734375, |
|
"logps/rejected": -215.407470703125, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.0026833172887563705, |
|
"rewards/margins": 0.003812385257333517, |
|
"rewards/rejected": -0.0011290680849924684, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8900343642611682e-07, |
|
"logits/chosen": -2.4396605491638184, |
|
"logits/rejected": -2.366703748703003, |
|
"logps/chosen": -283.7935791015625, |
|
"logps/rejected": -214.5601806640625, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": 0.0025542343501001596, |
|
"rewards/margins": 0.00826872419565916, |
|
"rewards/rejected": -0.005714490078389645, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.0618556701030925e-07, |
|
"logits/chosen": -2.4569156169891357, |
|
"logits/rejected": -2.429029703140259, |
|
"logps/chosen": -271.7438049316406, |
|
"logps/rejected": -229.4224395751953, |
|
"loss": 0.6882, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.004260816611349583, |
|
"rewards/margins": 0.010780954733490944, |
|
"rewards/rejected": -0.0065201385878026485, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.2336769759450173e-07, |
|
"logits/chosen": -2.452051877975464, |
|
"logits/rejected": -2.3855373859405518, |
|
"logps/chosen": -267.55743408203125, |
|
"logps/rejected": -212.14273071289062, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.0025894823484122753, |
|
"rewards/margins": 0.005025609862059355, |
|
"rewards/rejected": -0.0024361279793083668, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.405498281786942e-07, |
|
"logits/chosen": -2.4718971252441406, |
|
"logits/rejected": -2.417950391769409, |
|
"logps/chosen": -274.26593017578125, |
|
"logps/rejected": -212.1128692626953, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": 0.0036728009581565857, |
|
"rewards/margins": 0.008318398147821426, |
|
"rewards/rejected": -0.004645597655326128, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.5773195876288655e-07, |
|
"logits/chosen": -2.419431209564209, |
|
"logits/rejected": -2.3849945068359375, |
|
"logps/chosen": -250.10806274414062, |
|
"logps/rejected": -210.3776397705078, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.542187511920929, |
|
"rewards/chosen": 0.0029598295222967863, |
|
"rewards/margins": 0.007620878517627716, |
|
"rewards/rejected": -0.004661048296838999, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.7491408934707903e-07, |
|
"logits/chosen": -2.4403343200683594, |
|
"logits/rejected": -2.378030776977539, |
|
"logps/chosen": -267.47332763671875, |
|
"logps/rejected": -218.4069061279297, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.0031638103537261486, |
|
"rewards/margins": 0.009145173244178295, |
|
"rewards/rejected": -0.0059813628904521465, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.9209621993127146e-07, |
|
"logits/chosen": -2.4039931297302246, |
|
"logits/rejected": -2.3714652061462402, |
|
"logps/chosen": -277.943359375, |
|
"logps/rejected": -221.7199249267578, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.5484374761581421, |
|
"rewards/chosen": 0.008871063590049744, |
|
"rewards/margins": 0.012961235828697681, |
|
"rewards/rejected": -0.004090171307325363, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.0927835051546394e-07, |
|
"logits/chosen": -2.41255784034729, |
|
"logits/rejected": -2.382023572921753, |
|
"logps/chosen": -271.4554443359375, |
|
"logps/rejected": -226.9301300048828, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": 0.005444863811135292, |
|
"rewards/margins": 0.01286339946091175, |
|
"rewards/rejected": -0.007418536581099033, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.2646048109965636e-07, |
|
"logits/chosen": -2.4459285736083984, |
|
"logits/rejected": -2.394118547439575, |
|
"logps/chosen": -276.55389404296875, |
|
"logps/rejected": -222.62655639648438, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.01423065084964037, |
|
"rewards/margins": 0.02362729236483574, |
|
"rewards/rejected": -0.009396640583872795, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.436426116838488e-07, |
|
"logits/chosen": -2.4238436222076416, |
|
"logits/rejected": -2.393543243408203, |
|
"logps/chosen": -249.68899536132812, |
|
"logps/rejected": -214.36233520507812, |
|
"loss": 0.6785, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.014417588710784912, |
|
"rewards/margins": 0.0309614147990942, |
|
"rewards/rejected": -0.01654382422566414, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.608247422680412e-07, |
|
"logits/chosen": -2.4502434730529785, |
|
"logits/rejected": -2.4075448513031006, |
|
"logps/chosen": -270.61175537109375, |
|
"logps/rejected": -235.2810516357422, |
|
"loss": 0.6815, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.01293298788368702, |
|
"rewards/margins": 0.025450533255934715, |
|
"rewards/rejected": -0.01251754630357027, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.7800687285223364e-07, |
|
"logits/chosen": -2.414132595062256, |
|
"logits/rejected": -2.364130735397339, |
|
"logps/chosen": -263.3313903808594, |
|
"logps/rejected": -219.0230712890625, |
|
"loss": 0.6793, |
|
"rewards/accuracies": 0.604687511920929, |
|
"rewards/chosen": 0.014896327629685402, |
|
"rewards/margins": 0.030013080686330795, |
|
"rewards/rejected": -0.015116755850613117, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.9518900343642607e-07, |
|
"logits/chosen": -2.4107182025909424, |
|
"logits/rejected": -2.3757405281066895, |
|
"logps/chosen": -273.1572265625, |
|
"logps/rejected": -231.4423065185547, |
|
"loss": 0.6761, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.015099003911018372, |
|
"rewards/margins": 0.037129949778318405, |
|
"rewards/rejected": -0.022030945867300034, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.123711340206185e-07, |
|
"logits/chosen": -2.4387900829315186, |
|
"logits/rejected": -2.396888256072998, |
|
"logps/chosen": -271.6656799316406, |
|
"logps/rejected": -233.677734375, |
|
"loss": 0.6727, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.021000446751713753, |
|
"rewards/margins": 0.04467698931694031, |
|
"rewards/rejected": -0.023676546290516853, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.2955326460481097e-07, |
|
"logits/chosen": -2.431246519088745, |
|
"logits/rejected": -2.461184501647949, |
|
"logps/chosen": -264.908447265625, |
|
"logps/rejected": -225.65451049804688, |
|
"loss": 0.6699, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": 0.022870570421218872, |
|
"rewards/margins": 0.05118563771247864, |
|
"rewards/rejected": -0.028315063565969467, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.4673539518900345e-07, |
|
"logits/chosen": -2.409027576446533, |
|
"logits/rejected": -2.4082815647125244, |
|
"logps/chosen": -249.64242553710938, |
|
"logps/rejected": -204.5191650390625, |
|
"loss": 0.6666, |
|
"rewards/accuracies": 0.676562488079071, |
|
"rewards/chosen": 0.02077900990843773, |
|
"rewards/margins": 0.05811852216720581, |
|
"rewards/rejected": -0.03733951598405838, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.639175257731959e-07, |
|
"logits/chosen": -2.4640724658966064, |
|
"logits/rejected": -2.438767910003662, |
|
"logps/chosen": -281.8011169433594, |
|
"logps/rejected": -224.46932983398438, |
|
"loss": 0.6628, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.025280708447098732, |
|
"rewards/margins": 0.06713660806417465, |
|
"rewards/rejected": -0.04185590520501137, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.810996563573884e-07, |
|
"logits/chosen": -2.441326141357422, |
|
"logits/rejected": -2.3782386779785156, |
|
"logps/chosen": -266.28228759765625, |
|
"logps/rejected": -217.6759796142578, |
|
"loss": 0.6539, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": 0.02818796969950199, |
|
"rewards/margins": 0.08737680315971375, |
|
"rewards/rejected": -0.059188831597566605, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.982817869415807e-07, |
|
"logits/chosen": -2.4530272483825684, |
|
"logits/rejected": -2.4197421073913574, |
|
"logps/chosen": -251.4274444580078, |
|
"logps/rejected": -206.58395385742188, |
|
"loss": 0.6597, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.019450683146715164, |
|
"rewards/margins": 0.07725103944540024, |
|
"rewards/rejected": -0.05780036002397537, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.982778415614236e-07, |
|
"logits/chosen": -2.4357409477233887, |
|
"logits/rejected": -2.401296615600586, |
|
"logps/chosen": -258.9688415527344, |
|
"logps/rejected": -214.4955291748047, |
|
"loss": 0.6529, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": 0.02522132731974125, |
|
"rewards/margins": 0.09243801981210709, |
|
"rewards/rejected": -0.06721669435501099, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.963643321852277e-07, |
|
"logits/chosen": -2.433469772338867, |
|
"logits/rejected": -2.397340774536133, |
|
"logps/chosen": -264.56365966796875, |
|
"logps/rejected": -223.6669464111328, |
|
"loss": 0.6494, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": 0.027543241158127785, |
|
"rewards/margins": 0.10247315466403961, |
|
"rewards/rejected": -0.07492991536855698, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.944508228090318e-07, |
|
"logits/chosen": -2.4279608726501465, |
|
"logits/rejected": -2.383455514907837, |
|
"logps/chosen": -268.522216796875, |
|
"logps/rejected": -215.8023223876953, |
|
"loss": 0.643, |
|
"rewards/accuracies": 0.723437488079071, |
|
"rewards/chosen": 0.032382432371377945, |
|
"rewards/margins": 0.11742101609706879, |
|
"rewards/rejected": -0.08503858745098114, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.925373134328357e-07, |
|
"logits/chosen": -2.483980178833008, |
|
"logits/rejected": -2.4091663360595703, |
|
"logps/chosen": -266.2663879394531, |
|
"logps/rejected": -230.7337188720703, |
|
"loss": 0.6403, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.03822886198759079, |
|
"rewards/margins": 0.12609949707984924, |
|
"rewards/rejected": -0.08787062764167786, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.906238040566398e-07, |
|
"logits/chosen": -2.437373161315918, |
|
"logits/rejected": -2.3692476749420166, |
|
"logps/chosen": -252.1580047607422, |
|
"logps/rejected": -221.46554565429688, |
|
"loss": 0.6414, |
|
"rewards/accuracies": 0.682812511920929, |
|
"rewards/chosen": 0.034671518951654434, |
|
"rewards/margins": 0.12736742198467255, |
|
"rewards/rejected": -0.09269589185714722, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.887102946804438e-07, |
|
"logits/chosen": -2.457171678543091, |
|
"logits/rejected": -2.3946237564086914, |
|
"logps/chosen": -263.380615234375, |
|
"logps/rejected": -218.726318359375, |
|
"loss": 0.6377, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.023257676512002945, |
|
"rewards/margins": 0.13810031116008759, |
|
"rewards/rejected": -0.11484263837337494, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.867967853042479e-07, |
|
"logits/chosen": -2.4557504653930664, |
|
"logits/rejected": -2.4013724327087402, |
|
"logps/chosen": -267.2643737792969, |
|
"logps/rejected": -222.85366821289062, |
|
"loss": 0.6286, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": 0.03796042129397392, |
|
"rewards/margins": 0.160946324467659, |
|
"rewards/rejected": -0.12298589944839478, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.84883275928052e-07, |
|
"logits/chosen": -2.4332971572875977, |
|
"logits/rejected": -2.421247959136963, |
|
"logps/chosen": -266.8581237792969, |
|
"logps/rejected": -235.67788696289062, |
|
"loss": 0.6366, |
|
"rewards/accuracies": 0.6734374761581421, |
|
"rewards/chosen": 0.01746644265949726, |
|
"rewards/margins": 0.14841753244400024, |
|
"rewards/rejected": -0.13095109164714813, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.82969766551856e-07, |
|
"logits/chosen": -2.417196750640869, |
|
"logits/rejected": -2.37961483001709, |
|
"logps/chosen": -261.7236633300781, |
|
"logps/rejected": -229.08639526367188, |
|
"loss": 0.6354, |
|
"rewards/accuracies": 0.651562511920929, |
|
"rewards/chosen": 0.01633612811565399, |
|
"rewards/margins": 0.1533532738685608, |
|
"rewards/rejected": -0.1370171457529068, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.810562571756601e-07, |
|
"logits/chosen": -2.4581520557403564, |
|
"logits/rejected": -2.3880105018615723, |
|
"logps/chosen": -263.3890686035156, |
|
"logps/rejected": -218.2093505859375, |
|
"loss": 0.6132, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": 0.046173859387636185, |
|
"rewards/margins": 0.2001974880695343, |
|
"rewards/rejected": -0.15402361750602722, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.791427477994642e-07, |
|
"logits/chosen": -2.4509260654449463, |
|
"logits/rejected": -2.4113948345184326, |
|
"logps/chosen": -270.0736083984375, |
|
"logps/rejected": -221.9901123046875, |
|
"loss": 0.6236, |
|
"rewards/accuracies": 0.692187488079071, |
|
"rewards/chosen": 0.04222818464040756, |
|
"rewards/margins": 0.1874697059392929, |
|
"rewards/rejected": -0.14524152874946594, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.772292384232682e-07, |
|
"logits/chosen": -2.4471030235290527, |
|
"logits/rejected": -2.4141643047332764, |
|
"logps/chosen": -261.27337646484375, |
|
"logps/rejected": -230.60299682617188, |
|
"loss": 0.6294, |
|
"rewards/accuracies": 0.6546875238418579, |
|
"rewards/chosen": 0.021040040999650955, |
|
"rewards/margins": 0.18236112594604492, |
|
"rewards/rejected": -0.16132107377052307, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.753157290470723e-07, |
|
"logits/chosen": -2.460665464401245, |
|
"logits/rejected": -2.4335570335388184, |
|
"logps/chosen": -276.3302917480469, |
|
"logps/rejected": -226.70639038085938, |
|
"loss": 0.6125, |
|
"rewards/accuracies": 0.6859375238418579, |
|
"rewards/chosen": 0.026431281119585037, |
|
"rewards/margins": 0.2242995798587799, |
|
"rewards/rejected": -0.19786831736564636, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.7340221967087635e-07, |
|
"logits/chosen": -2.4207959175109863, |
|
"logits/rejected": -2.383884906768799, |
|
"logps/chosen": -250.6901397705078, |
|
"logps/rejected": -207.92062377929688, |
|
"loss": 0.6128, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": 0.01974855735898018, |
|
"rewards/margins": 0.21391530334949493, |
|
"rewards/rejected": -0.19416674971580505, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.714887102946804e-07, |
|
"logits/chosen": -2.457104206085205, |
|
"logits/rejected": -2.3864612579345703, |
|
"logps/chosen": -274.47650146484375, |
|
"logps/rejected": -225.985107421875, |
|
"loss": 0.6012, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": 0.04322098195552826, |
|
"rewards/margins": 0.24149248003959656, |
|
"rewards/rejected": -0.19827154278755188, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.6957520091848447e-07, |
|
"logits/chosen": -2.4684674739837646, |
|
"logits/rejected": -2.432194948196411, |
|
"logps/chosen": -262.0184020996094, |
|
"logps/rejected": -226.8969268798828, |
|
"loss": 0.6132, |
|
"rewards/accuracies": 0.6703125238418579, |
|
"rewards/chosen": 0.012870723381638527, |
|
"rewards/margins": 0.226064994931221, |
|
"rewards/rejected": -0.21319429576396942, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.6766169154228853e-07, |
|
"logits/chosen": -2.4258971214294434, |
|
"logits/rejected": -2.3564021587371826, |
|
"logps/chosen": -256.30084228515625, |
|
"logps/rejected": -219.12112426757812, |
|
"loss": 0.6188, |
|
"rewards/accuracies": 0.6734374761581421, |
|
"rewards/chosen": -0.004575688857585192, |
|
"rewards/margins": 0.21516656875610352, |
|
"rewards/rejected": -0.21974226832389832, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.657481821660926e-07, |
|
"logits/chosen": -2.4722161293029785, |
|
"logits/rejected": -2.4338574409484863, |
|
"logps/chosen": -279.41644287109375, |
|
"logps/rejected": -232.3635711669922, |
|
"loss": 0.6072, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.012542584910988808, |
|
"rewards/margins": 0.2474808394908905, |
|
"rewards/rejected": -0.23493823409080505, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.6383467278989666e-07, |
|
"logits/chosen": -2.381640911102295, |
|
"logits/rejected": -2.4078078269958496, |
|
"logps/chosen": -262.5255126953125, |
|
"logps/rejected": -226.96853637695312, |
|
"loss": 0.5953, |
|
"rewards/accuracies": 0.707812488079071, |
|
"rewards/chosen": 0.03892933949828148, |
|
"rewards/margins": 0.2794772982597351, |
|
"rewards/rejected": -0.24054794013500214, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.6192116341370067e-07, |
|
"logits/chosen": -2.436652421951294, |
|
"logits/rejected": -2.3565993309020996, |
|
"logps/chosen": -263.81829833984375, |
|
"logps/rejected": -223.61801147460938, |
|
"loss": 0.5897, |
|
"rewards/accuracies": 0.723437488079071, |
|
"rewards/chosen": 0.016417725011706352, |
|
"rewards/margins": 0.2946879267692566, |
|
"rewards/rejected": -0.2782701849937439, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.6000765403750473e-07, |
|
"logits/chosen": -2.3938071727752686, |
|
"logits/rejected": -2.37441086769104, |
|
"logps/chosen": -273.1866760253906, |
|
"logps/rejected": -219.1422576904297, |
|
"loss": 0.5987, |
|
"rewards/accuracies": 0.7046874761581421, |
|
"rewards/chosen": 0.022179026156663895, |
|
"rewards/margins": 0.27144354581832886, |
|
"rewards/rejected": -0.24926450848579407, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.580941446613088e-07, |
|
"logits/chosen": -2.438375473022461, |
|
"logits/rejected": -2.4063642024993896, |
|
"logps/chosen": -268.3760681152344, |
|
"logps/rejected": -213.6297607421875, |
|
"loss": 0.5779, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": 0.025703424587845802, |
|
"rewards/margins": 0.3301311433315277, |
|
"rewards/rejected": -0.30442774295806885, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.5618063528511285e-07, |
|
"logits/chosen": -2.4285857677459717, |
|
"logits/rejected": -2.3742969036102295, |
|
"logps/chosen": -270.7893371582031, |
|
"logps/rejected": -229.7726593017578, |
|
"loss": 0.5886, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.017922762781381607, |
|
"rewards/margins": 0.3229644298553467, |
|
"rewards/rejected": -0.305041640996933, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.542671259089169e-07, |
|
"logits/chosen": -2.4130568504333496, |
|
"logits/rejected": -2.3629188537597656, |
|
"logps/chosen": -272.3194885253906, |
|
"logps/rejected": -231.18997192382812, |
|
"loss": 0.5947, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.015474101528525352, |
|
"rewards/margins": 0.3130945861339569, |
|
"rewards/rejected": -0.2976204752922058, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.52353616532721e-07, |
|
"logits/chosen": -2.443058490753174, |
|
"logits/rejected": -2.3707220554351807, |
|
"logps/chosen": -265.5616760253906, |
|
"logps/rejected": -224.46688842773438, |
|
"loss": 0.5945, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -0.0283407811075449, |
|
"rewards/margins": 0.30165895819664, |
|
"rewards/rejected": -0.32999974489212036, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.5044010715652504e-07, |
|
"logits/chosen": -2.459993362426758, |
|
"logits/rejected": -2.4190433025360107, |
|
"logps/chosen": -262.33197021484375, |
|
"logps/rejected": -231.3585662841797, |
|
"loss": 0.5988, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": 0.00041465210961177945, |
|
"rewards/margins": 0.29224497079849243, |
|
"rewards/rejected": -0.29183030128479004, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.485265977803291e-07, |
|
"logits/chosen": -2.3841280937194824, |
|
"logits/rejected": -2.3862245082855225, |
|
"logps/chosen": -268.51177978515625, |
|
"logps/rejected": -231.3872833251953, |
|
"loss": 0.5903, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.008536433801054955, |
|
"rewards/margins": 0.32063713669776917, |
|
"rewards/rejected": -0.31210070848464966, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.4661308840413316e-07, |
|
"logits/chosen": -2.4563305377960205, |
|
"logits/rejected": -2.423436403274536, |
|
"logps/chosen": -267.9896545410156, |
|
"logps/rejected": -222.6366729736328, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.7171875238418579, |
|
"rewards/chosen": 0.011319964192807674, |
|
"rewards/margins": 0.36020052433013916, |
|
"rewards/rejected": -0.3488805890083313, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.446995790279372e-07, |
|
"logits/chosen": -2.4537854194641113, |
|
"logits/rejected": -2.3811707496643066, |
|
"logps/chosen": -270.5040588378906, |
|
"logps/rejected": -231.43017578125, |
|
"loss": 0.574, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": 0.019544053822755814, |
|
"rewards/margins": 0.3704259693622589, |
|
"rewards/rejected": -0.3508819341659546, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.4278606965174123e-07, |
|
"logits/chosen": -2.447350025177002, |
|
"logits/rejected": -2.388247013092041, |
|
"logps/chosen": -271.6213684082031, |
|
"logps/rejected": -223.79696655273438, |
|
"loss": 0.5809, |
|
"rewards/accuracies": 0.7046874761581421, |
|
"rewards/chosen": 0.0032621710561215878, |
|
"rewards/margins": 0.367009699344635, |
|
"rewards/rejected": -0.3637475371360779, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.408725602755453e-07, |
|
"logits/chosen": -2.457573175430298, |
|
"logits/rejected": -2.429401397705078, |
|
"logps/chosen": -266.47222900390625, |
|
"logps/rejected": -231.684814453125, |
|
"loss": 0.5816, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -0.007931029424071312, |
|
"rewards/margins": 0.3559093475341797, |
|
"rewards/rejected": -0.36384040117263794, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.3895905089934936e-07, |
|
"logits/chosen": -2.4467155933380127, |
|
"logits/rejected": -2.4398138523101807, |
|
"logps/chosen": -280.1789855957031, |
|
"logps/rejected": -237.6522216796875, |
|
"loss": 0.5711, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": 0.0163104385137558, |
|
"rewards/margins": 0.3898230493068695, |
|
"rewards/rejected": -0.3735126256942749, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.370455415231534e-07, |
|
"logits/chosen": -2.4456491470336914, |
|
"logits/rejected": -2.397401809692383, |
|
"logps/chosen": -257.31146240234375, |
|
"logps/rejected": -213.8458709716797, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.7046874761581421, |
|
"rewards/chosen": 0.0058257849887013435, |
|
"rewards/margins": 0.402109295129776, |
|
"rewards/rejected": -0.39628344774246216, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.351320321469575e-07, |
|
"logits/chosen": -2.455310821533203, |
|
"logits/rejected": -2.4044442176818848, |
|
"logps/chosen": -269.50531005859375, |
|
"logps/rejected": -223.09915161132812, |
|
"loss": 0.5824, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.03774386644363403, |
|
"rewards/margins": 0.37117189168930054, |
|
"rewards/rejected": -0.40891575813293457, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.3321852277076154e-07, |
|
"logits/chosen": -2.429537773132324, |
|
"logits/rejected": -2.4004569053649902, |
|
"logps/chosen": -278.3745422363281, |
|
"logps/rejected": -238.91348266601562, |
|
"loss": 0.5602, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": -0.02088163048028946, |
|
"rewards/margins": 0.4280461370944977, |
|
"rewards/rejected": -0.4489278197288513, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.313050133945656e-07, |
|
"logits/chosen": -2.462010622024536, |
|
"logits/rejected": -2.382342576980591, |
|
"logps/chosen": -274.82489013671875, |
|
"logps/rejected": -228.21871948242188, |
|
"loss": 0.5689, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.027593884617090225, |
|
"rewards/margins": 0.4239775538444519, |
|
"rewards/rejected": -0.451571524143219, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.2939150401836967e-07, |
|
"logits/chosen": -2.408452033996582, |
|
"logits/rejected": -2.367763042449951, |
|
"logps/chosen": -279.24713134765625, |
|
"logps/rejected": -234.92257690429688, |
|
"loss": 0.5812, |
|
"rewards/accuracies": 0.7046874761581421, |
|
"rewards/chosen": -0.029710102826356888, |
|
"rewards/margins": 0.3825686275959015, |
|
"rewards/rejected": -0.4122787117958069, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.2747799464217373e-07, |
|
"logits/chosen": -2.400705575942993, |
|
"logits/rejected": -2.386396884918213, |
|
"logps/chosen": -265.80059814453125, |
|
"logps/rejected": -221.22183227539062, |
|
"loss": 0.5795, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.01276162825524807, |
|
"rewards/margins": 0.3836382031440735, |
|
"rewards/rejected": -0.3963998258113861, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.255644852659778e-07, |
|
"logits/chosen": -2.434727191925049, |
|
"logits/rejected": -2.3701629638671875, |
|
"logps/chosen": -265.0262145996094, |
|
"logps/rejected": -230.69918823242188, |
|
"loss": 0.5857, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.052033863961696625, |
|
"rewards/margins": 0.3843652307987213, |
|
"rewards/rejected": -0.43639907240867615, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.236509758897818e-07, |
|
"logits/chosen": -2.4166369438171387, |
|
"logits/rejected": -2.3753108978271484, |
|
"logps/chosen": -263.7073059082031, |
|
"logps/rejected": -228.69186401367188, |
|
"loss": 0.5598, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.031519632786512375, |
|
"rewards/margins": 0.4231399893760681, |
|
"rewards/rejected": -0.454659640789032, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2173746651358586e-07, |
|
"logits/chosen": -2.494065761566162, |
|
"logits/rejected": -2.3916873931884766, |
|
"logps/chosen": -277.77325439453125, |
|
"logps/rejected": -226.1985321044922, |
|
"loss": 0.5795, |
|
"rewards/accuracies": 0.714062511920929, |
|
"rewards/chosen": -0.04932181164622307, |
|
"rewards/margins": 0.41205042600631714, |
|
"rewards/rejected": -0.4613722264766693, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.198239571373899e-07, |
|
"logits/chosen": -2.431324005126953, |
|
"logits/rejected": -2.4029393196105957, |
|
"logps/chosen": -280.7895812988281, |
|
"logps/rejected": -239.1829833984375, |
|
"loss": 0.584, |
|
"rewards/accuracies": 0.7171875238418579, |
|
"rewards/chosen": -0.054767437279224396, |
|
"rewards/margins": 0.40931397676467896, |
|
"rewards/rejected": -0.46408137679100037, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.17910447761194e-07, |
|
"logits/chosen": -2.4472877979278564, |
|
"logits/rejected": -2.357172727584839, |
|
"logps/chosen": -252.1331329345703, |
|
"logps/rejected": -216.9487762451172, |
|
"loss": 0.5796, |
|
"rewards/accuracies": 0.692187488079071, |
|
"rewards/chosen": -0.049075834453105927, |
|
"rewards/margins": 0.3829793632030487, |
|
"rewards/rejected": -0.43205517530441284, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.1599693838499805e-07, |
|
"logits/chosen": -2.3492183685302734, |
|
"logits/rejected": -2.34523606300354, |
|
"logps/chosen": -248.2432403564453, |
|
"logps/rejected": -214.99880981445312, |
|
"loss": 0.5851, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.07765182107686996, |
|
"rewards/margins": 0.3819560408592224, |
|
"rewards/rejected": -0.45960789918899536, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.140834290088021e-07, |
|
"logits/chosen": -2.3994088172912598, |
|
"logits/rejected": -2.3783352375030518, |
|
"logps/chosen": -246.6106719970703, |
|
"logps/rejected": -206.70840454101562, |
|
"loss": 0.5663, |
|
"rewards/accuracies": 0.707812488079071, |
|
"rewards/chosen": -0.04350767284631729, |
|
"rewards/margins": 0.439382404088974, |
|
"rewards/rejected": -0.4828900694847107, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.121699196326062e-07, |
|
"logits/chosen": -2.3943963050842285, |
|
"logits/rejected": -2.3858072757720947, |
|
"logps/chosen": -266.1705627441406, |
|
"logps/rejected": -225.35940551757812, |
|
"loss": 0.5473, |
|
"rewards/accuracies": 0.723437488079071, |
|
"rewards/chosen": -0.010749602690339088, |
|
"rewards/margins": 0.48278599977493286, |
|
"rewards/rejected": -0.49353551864624023, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.1025641025641024e-07, |
|
"logits/chosen": -2.470837354660034, |
|
"logits/rejected": -2.391634464263916, |
|
"logps/chosen": -269.9073791503906, |
|
"logps/rejected": -229.0169677734375, |
|
"loss": 0.5639, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.05836876481771469, |
|
"rewards/margins": 0.463728666305542, |
|
"rewards/rejected": -0.5220974087715149, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.083429008802143e-07, |
|
"logits/chosen": -2.399672746658325, |
|
"logits/rejected": -2.386239528656006, |
|
"logps/chosen": -265.0301513671875, |
|
"logps/rejected": -216.77737426757812, |
|
"loss": 0.5693, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.05191594362258911, |
|
"rewards/margins": 0.4625419080257416, |
|
"rewards/rejected": -0.5144578218460083, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.0642939150401836e-07, |
|
"logits/chosen": -2.4455151557922363, |
|
"logits/rejected": -2.3676414489746094, |
|
"logps/chosen": -263.79571533203125, |
|
"logps/rejected": -222.31787109375, |
|
"loss": 0.5713, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.07387879490852356, |
|
"rewards/margins": 0.4389980435371399, |
|
"rewards/rejected": -0.5128768086433411, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.0451588212782237e-07, |
|
"logits/chosen": -2.4102261066436768, |
|
"logits/rejected": -2.353691577911377, |
|
"logps/chosen": -277.6340026855469, |
|
"logps/rejected": -241.7203826904297, |
|
"loss": 0.5791, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.08135993033647537, |
|
"rewards/margins": 0.44334641098976135, |
|
"rewards/rejected": -0.524706244468689, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.0260237275162643e-07, |
|
"logits/chosen": -2.430101156234741, |
|
"logits/rejected": -2.385629177093506, |
|
"logps/chosen": -267.8277587890625, |
|
"logps/rejected": -237.6192169189453, |
|
"loss": 0.5621, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.09289533644914627, |
|
"rewards/margins": 0.47283419966697693, |
|
"rewards/rejected": -0.5657294988632202, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.006888633754305e-07, |
|
"logits/chosen": -2.424495220184326, |
|
"logits/rejected": -2.3845698833465576, |
|
"logps/chosen": -265.8463134765625, |
|
"logps/rejected": -226.7728729248047, |
|
"loss": 0.5831, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -0.05096619576215744, |
|
"rewards/margins": 0.43721461296081543, |
|
"rewards/rejected": -0.4881807863712311, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.9877535399923456e-07, |
|
"logits/chosen": -2.4253883361816406, |
|
"logits/rejected": -2.3850014209747314, |
|
"logps/chosen": -272.5957946777344, |
|
"logps/rejected": -233.60498046875, |
|
"loss": 0.5633, |
|
"rewards/accuracies": 0.7171875238418579, |
|
"rewards/chosen": -0.06406211853027344, |
|
"rewards/margins": 0.48712214827537537, |
|
"rewards/rejected": -0.5511842370033264, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.968618446230386e-07, |
|
"logits/chosen": -2.4437859058380127, |
|
"logits/rejected": -2.3819785118103027, |
|
"logps/chosen": -264.5028381347656, |
|
"logps/rejected": -227.0218048095703, |
|
"loss": 0.5708, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.06855222582817078, |
|
"rewards/margins": 0.4592631459236145, |
|
"rewards/rejected": -0.5278154015541077, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.949483352468427e-07, |
|
"logits/chosen": -2.427250385284424, |
|
"logits/rejected": -2.3620200157165527, |
|
"logps/chosen": -254.1734161376953, |
|
"logps/rejected": -229.9873046875, |
|
"loss": 0.5823, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.0878441333770752, |
|
"rewards/margins": 0.43157902359962463, |
|
"rewards/rejected": -0.5194231271743774, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9303482587064674e-07, |
|
"logits/chosen": -2.4152088165283203, |
|
"logits/rejected": -2.399456024169922, |
|
"logps/chosen": -268.06689453125, |
|
"logps/rejected": -232.3248748779297, |
|
"loss": 0.5626, |
|
"rewards/accuracies": 0.723437488079071, |
|
"rewards/chosen": -0.09431511908769608, |
|
"rewards/margins": 0.4891575872898102, |
|
"rewards/rejected": -0.5834725499153137, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.911213164944508e-07, |
|
"logits/chosen": -2.461259126663208, |
|
"logits/rejected": -2.4431066513061523, |
|
"logps/chosen": -260.641845703125, |
|
"logps/rejected": -233.93637084960938, |
|
"loss": 0.5791, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.07183202356100082, |
|
"rewards/margins": 0.44370943307876587, |
|
"rewards/rejected": -0.5155414342880249, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.8920780711825487e-07, |
|
"logits/chosen": -2.423548698425293, |
|
"logits/rejected": -2.3617987632751465, |
|
"logps/chosen": -264.8348083496094, |
|
"logps/rejected": -234.61605834960938, |
|
"loss": 0.5649, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.08685998618602753, |
|
"rewards/margins": 0.47184914350509644, |
|
"rewards/rejected": -0.5587090849876404, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.8729429774205893e-07, |
|
"logits/chosen": -2.4239916801452637, |
|
"logits/rejected": -2.3515267372131348, |
|
"logps/chosen": -258.3001403808594, |
|
"logps/rejected": -219.2425079345703, |
|
"loss": 0.5832, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.12072154134511948, |
|
"rewards/margins": 0.434969425201416, |
|
"rewards/rejected": -0.5556910037994385, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.8538078836586294e-07, |
|
"logits/chosen": -2.4307010173797607, |
|
"logits/rejected": -2.3626708984375, |
|
"logps/chosen": -283.5355224609375, |
|
"logps/rejected": -235.6796417236328, |
|
"loss": 0.5489, |
|
"rewards/accuracies": 0.745312511920929, |
|
"rewards/chosen": -0.06075868755578995, |
|
"rewards/margins": 0.5323190689086914, |
|
"rewards/rejected": -0.5930777788162231, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.83467278989667e-07, |
|
"logits/chosen": -2.445495128631592, |
|
"logits/rejected": -2.368015766143799, |
|
"logps/chosen": -273.6012878417969, |
|
"logps/rejected": -237.4881134033203, |
|
"loss": 0.5823, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -0.09077002108097076, |
|
"rewards/margins": 0.45550060272216797, |
|
"rewards/rejected": -0.5462706685066223, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.8155376961347106e-07, |
|
"logits/chosen": -2.431802988052368, |
|
"logits/rejected": -2.3802406787872314, |
|
"logps/chosen": -268.13336181640625, |
|
"logps/rejected": -231.0006561279297, |
|
"loss": 0.5636, |
|
"rewards/accuracies": 0.7171875238418579, |
|
"rewards/chosen": -0.08810480684041977, |
|
"rewards/margins": 0.49154072999954224, |
|
"rewards/rejected": -0.579645574092865, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.796402602372751e-07, |
|
"logits/chosen": -2.4426496028900146, |
|
"logits/rejected": -2.385349750518799, |
|
"logps/chosen": -276.5380859375, |
|
"logps/rejected": -233.1389617919922, |
|
"loss": 0.5482, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.05111056566238403, |
|
"rewards/margins": 0.542784571647644, |
|
"rewards/rejected": -0.5938950777053833, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.777267508610792e-07, |
|
"logits/chosen": -2.383472204208374, |
|
"logits/rejected": -2.399059295654297, |
|
"logps/chosen": -272.26556396484375, |
|
"logps/rejected": -223.87905883789062, |
|
"loss": 0.5578, |
|
"rewards/accuracies": 0.714062511920929, |
|
"rewards/chosen": -0.09871871769428253, |
|
"rewards/margins": 0.5212680101394653, |
|
"rewards/rejected": -0.6199867129325867, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.7581324148488325e-07, |
|
"logits/chosen": -2.4192233085632324, |
|
"logits/rejected": -2.3954081535339355, |
|
"logps/chosen": -273.0626525878906, |
|
"logps/rejected": -239.1441192626953, |
|
"loss": 0.5488, |
|
"rewards/accuracies": 0.7171875238418579, |
|
"rewards/chosen": -0.10023512691259384, |
|
"rewards/margins": 0.5407330989837646, |
|
"rewards/rejected": -0.6409682035446167, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.738997321086873e-07, |
|
"logits/chosen": -2.406310558319092, |
|
"logits/rejected": -2.383169651031494, |
|
"logps/chosen": -268.0104064941406, |
|
"logps/rejected": -233.89749145507812, |
|
"loss": 0.57, |
|
"rewards/accuracies": 0.6890624761581421, |
|
"rewards/chosen": -0.09341312944889069, |
|
"rewards/margins": 0.4971606135368347, |
|
"rewards/rejected": -0.5905737280845642, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7198622273249137e-07, |
|
"logits/chosen": -2.4181647300720215, |
|
"logits/rejected": -2.3776590824127197, |
|
"logps/chosen": -284.4306640625, |
|
"logps/rejected": -238.550537109375, |
|
"loss": 0.5654, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.09716256707906723, |
|
"rewards/margins": 0.506054699420929, |
|
"rewards/rejected": -0.6032172441482544, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -2.079043388366699, |
|
"eval_logits/rejected": -2.0256688594818115, |
|
"eval_logps/chosen": -265.5612487792969, |
|
"eval_logps/rejected": -229.98611450195312, |
|
"eval_loss": 0.5545315742492676, |
|
"eval_rewards/accuracies": 0.7160000205039978, |
|
"eval_rewards/chosen": -0.09934788197278976, |
|
"eval_rewards/margins": 0.5339328050613403, |
|
"eval_rewards/rejected": -0.6332806348800659, |
|
"eval_runtime": 1088.7146, |
|
"eval_samples_per_second": 1.837, |
|
"eval_steps_per_second": 0.459, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.7007271335629544e-07, |
|
"logits/chosen": -2.449903964996338, |
|
"logits/rejected": -2.3904850482940674, |
|
"logps/chosen": -269.0638732910156, |
|
"logps/rejected": -230.3978271484375, |
|
"loss": 0.5468, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.05560935288667679, |
|
"rewards/margins": 0.5521260499954224, |
|
"rewards/rejected": -0.6077354550361633, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.681592039800995e-07, |
|
"logits/chosen": -2.4261183738708496, |
|
"logits/rejected": -2.3550448417663574, |
|
"logps/chosen": -267.64080810546875, |
|
"logps/rejected": -227.04812622070312, |
|
"loss": 0.545, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.09351503103971481, |
|
"rewards/margins": 0.5480056405067444, |
|
"rewards/rejected": -0.641520619392395, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.662456946039035e-07, |
|
"logits/chosen": -2.4298439025878906, |
|
"logits/rejected": -2.3898258209228516, |
|
"logps/chosen": -277.6336364746094, |
|
"logps/rejected": -225.4404296875, |
|
"loss": 0.5438, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.08874578773975372, |
|
"rewards/margins": 0.5600773096084595, |
|
"rewards/rejected": -0.6488231420516968, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.6433218522770757e-07, |
|
"logits/chosen": -2.440823793411255, |
|
"logits/rejected": -2.3596456050872803, |
|
"logps/chosen": -280.1471862792969, |
|
"logps/rejected": -238.19503784179688, |
|
"loss": 0.5523, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.09118635207414627, |
|
"rewards/margins": 0.556471049785614, |
|
"rewards/rejected": -0.6476574540138245, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.6241867585151163e-07, |
|
"logits/chosen": -2.4096405506134033, |
|
"logits/rejected": -2.34090256690979, |
|
"logps/chosen": -257.96527099609375, |
|
"logps/rejected": -223.86474609375, |
|
"loss": 0.5731, |
|
"rewards/accuracies": 0.714062511920929, |
|
"rewards/chosen": -0.13044361770153046, |
|
"rewards/margins": 0.49574214220046997, |
|
"rewards/rejected": -0.6261857151985168, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.605051664753157e-07, |
|
"logits/chosen": -2.436314105987549, |
|
"logits/rejected": -2.3611092567443848, |
|
"logps/chosen": -272.37335205078125, |
|
"logps/rejected": -231.5602264404297, |
|
"loss": 0.5459, |
|
"rewards/accuracies": 0.729687511920929, |
|
"rewards/chosen": -0.07726888358592987, |
|
"rewards/margins": 0.5683926343917847, |
|
"rewards/rejected": -0.645661473274231, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.5859165709911975e-07, |
|
"logits/chosen": -2.4350028038024902, |
|
"logits/rejected": -2.3586974143981934, |
|
"logps/chosen": -275.226806640625, |
|
"logps/rejected": -223.6283721923828, |
|
"loss": 0.5453, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.06087593361735344, |
|
"rewards/margins": 0.5964738130569458, |
|
"rewards/rejected": -0.6573497653007507, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.566781477229238e-07, |
|
"logits/chosen": -2.400864601135254, |
|
"logits/rejected": -2.3652467727661133, |
|
"logps/chosen": -255.86477661132812, |
|
"logps/rejected": -222.24752807617188, |
|
"loss": 0.5751, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.1341889202594757, |
|
"rewards/margins": 0.5123754739761353, |
|
"rewards/rejected": -0.6465644240379333, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.547646383467279e-07, |
|
"logits/chosen": -2.3898608684539795, |
|
"logits/rejected": -2.379241466522217, |
|
"logps/chosen": -261.6153869628906, |
|
"logps/rejected": -223.2140655517578, |
|
"loss": 0.5499, |
|
"rewards/accuracies": 0.714062511920929, |
|
"rewards/chosen": -0.08567940443754196, |
|
"rewards/margins": 0.5808093547821045, |
|
"rewards/rejected": -0.666488766670227, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.5285112897053194e-07, |
|
"logits/chosen": -2.4234771728515625, |
|
"logits/rejected": -2.4022397994995117, |
|
"logps/chosen": -280.1412048339844, |
|
"logps/rejected": -242.2364959716797, |
|
"loss": 0.5684, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": -0.11185096204280853, |
|
"rewards/margins": 0.5072935223579407, |
|
"rewards/rejected": -0.6191444993019104, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.50937619594336e-07, |
|
"logits/chosen": -2.4101145267486572, |
|
"logits/rejected": -2.34965181350708, |
|
"logps/chosen": -268.9992370605469, |
|
"logps/rejected": -218.4785614013672, |
|
"loss": 0.54, |
|
"rewards/accuracies": 0.7171875238418579, |
|
"rewards/chosen": -0.09902816265821457, |
|
"rewards/margins": 0.6141443252563477, |
|
"rewards/rejected": -0.7131724953651428, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.4902411021814007e-07, |
|
"logits/chosen": -2.4058384895324707, |
|
"logits/rejected": -2.3813834190368652, |
|
"logps/chosen": -261.517333984375, |
|
"logps/rejected": -220.6446990966797, |
|
"loss": 0.5651, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.11156700551509857, |
|
"rewards/margins": 0.5487754940986633, |
|
"rewards/rejected": -0.6603423357009888, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.4711060084194413e-07, |
|
"logits/chosen": -2.4069576263427734, |
|
"logits/rejected": -2.3752903938293457, |
|
"logps/chosen": -265.21124267578125, |
|
"logps/rejected": -223.32421875, |
|
"loss": 0.5423, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.1168740764260292, |
|
"rewards/margins": 0.5669043064117432, |
|
"rewards/rejected": -0.6837784051895142, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4519709146574814e-07, |
|
"logits/chosen": -2.4091246128082275, |
|
"logits/rejected": -2.359158515930176, |
|
"logps/chosen": -261.7292175292969, |
|
"logps/rejected": -225.2208709716797, |
|
"loss": 0.5656, |
|
"rewards/accuracies": 0.7046874761581421, |
|
"rewards/chosen": -0.12864422798156738, |
|
"rewards/margins": 0.5372087955474854, |
|
"rewards/rejected": -0.6658530831336975, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.432835820895522e-07, |
|
"logits/chosen": -2.466919183731079, |
|
"logits/rejected": -2.3888449668884277, |
|
"logps/chosen": -277.5549011230469, |
|
"logps/rejected": -240.7705841064453, |
|
"loss": 0.5392, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.05221106857061386, |
|
"rewards/margins": 0.6037675738334656, |
|
"rewards/rejected": -0.6559786796569824, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.4137007271335626e-07, |
|
"logits/chosen": -2.4204328060150146, |
|
"logits/rejected": -2.3684065341949463, |
|
"logps/chosen": -269.5735778808594, |
|
"logps/rejected": -219.9510498046875, |
|
"loss": 0.5382, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.09449413418769836, |
|
"rewards/margins": 0.6241403818130493, |
|
"rewards/rejected": -0.7186344861984253, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.394565633371603e-07, |
|
"logits/chosen": -2.431792736053467, |
|
"logits/rejected": -2.3539392948150635, |
|
"logps/chosen": -279.9765930175781, |
|
"logps/rejected": -239.1553955078125, |
|
"loss": 0.5447, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.07704336196184158, |
|
"rewards/margins": 0.6232292652130127, |
|
"rewards/rejected": -0.7002726197242737, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.375430539609644e-07, |
|
"logits/chosen": -2.404470920562744, |
|
"logits/rejected": -2.3776755332946777, |
|
"logps/chosen": -256.79559326171875, |
|
"logps/rejected": -227.1933135986328, |
|
"loss": 0.5584, |
|
"rewards/accuracies": 0.7171875238418579, |
|
"rewards/chosen": -0.11403951793909073, |
|
"rewards/margins": 0.545345664024353, |
|
"rewards/rejected": -0.6593851447105408, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.3562954458476845e-07, |
|
"logits/chosen": -2.357815980911255, |
|
"logits/rejected": -2.331373691558838, |
|
"logps/chosen": -253.86587524414062, |
|
"logps/rejected": -217.3060760498047, |
|
"loss": 0.5472, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.10227999836206436, |
|
"rewards/margins": 0.5686275362968445, |
|
"rewards/rejected": -0.6709075570106506, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.337160352085725e-07, |
|
"logits/chosen": -2.409895420074463, |
|
"logits/rejected": -2.3179931640625, |
|
"logps/chosen": -266.31640625, |
|
"logps/rejected": -218.92160034179688, |
|
"loss": 0.5432, |
|
"rewards/accuracies": 0.7359374761581421, |
|
"rewards/chosen": -0.10928237438201904, |
|
"rewards/margins": 0.6051470041275024, |
|
"rewards/rejected": -0.7144292593002319, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.3180252583237657e-07, |
|
"logits/chosen": -2.4335556030273438, |
|
"logits/rejected": -2.3714287281036377, |
|
"logps/chosen": -277.588623046875, |
|
"logps/rejected": -233.22079467773438, |
|
"loss": 0.5461, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.14634881913661957, |
|
"rewards/margins": 0.5770747661590576, |
|
"rewards/rejected": -0.7234236001968384, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.2988901645618063e-07, |
|
"logits/chosen": -2.4803996086120605, |
|
"logits/rejected": -2.409782886505127, |
|
"logps/chosen": -267.01678466796875, |
|
"logps/rejected": -247.248291015625, |
|
"loss": 0.5607, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.1365794688463211, |
|
"rewards/margins": 0.5790367126464844, |
|
"rewards/rejected": -0.7156162261962891, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.279755070799847e-07, |
|
"logits/chosen": -2.3643290996551514, |
|
"logits/rejected": -2.3453285694122314, |
|
"logps/chosen": -260.19134521484375, |
|
"logps/rejected": -226.71481323242188, |
|
"loss": 0.5624, |
|
"rewards/accuracies": 0.698437511920929, |
|
"rewards/chosen": -0.1081305742263794, |
|
"rewards/margins": 0.571107029914856, |
|
"rewards/rejected": -0.6792376637458801, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.260619977037887e-07, |
|
"logits/chosen": -2.3916163444519043, |
|
"logits/rejected": -2.358982563018799, |
|
"logps/chosen": -260.97052001953125, |
|
"logps/rejected": -222.0037078857422, |
|
"loss": 0.5276, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.10513798892498016, |
|
"rewards/margins": 0.6672986149787903, |
|
"rewards/rejected": -0.7724366188049316, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.2414848832759277e-07, |
|
"logits/chosen": -2.3794853687286377, |
|
"logits/rejected": -2.3386852741241455, |
|
"logps/chosen": -257.59130859375, |
|
"logps/rejected": -222.3406219482422, |
|
"loss": 0.5545, |
|
"rewards/accuracies": 0.7046874761581421, |
|
"rewards/chosen": -0.13189749419689178, |
|
"rewards/margins": 0.5676501393318176, |
|
"rewards/rejected": -0.6995476484298706, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.2223497895139683e-07, |
|
"logits/chosen": -2.384241819381714, |
|
"logits/rejected": -2.3438777923583984, |
|
"logps/chosen": -272.30767822265625, |
|
"logps/rejected": -231.92471313476562, |
|
"loss": 0.5362, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.1082894578576088, |
|
"rewards/margins": 0.6236446499824524, |
|
"rewards/rejected": -0.731934130191803, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.203214695752009e-07, |
|
"logits/chosen": -2.4498887062072754, |
|
"logits/rejected": -2.382390260696411, |
|
"logps/chosen": -270.11798095703125, |
|
"logps/rejected": -228.3955841064453, |
|
"loss": 0.5393, |
|
"rewards/accuracies": 0.7484375238418579, |
|
"rewards/chosen": -0.13679789006710052, |
|
"rewards/margins": 0.6304437518119812, |
|
"rewards/rejected": -0.7672415971755981, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.1840796019900495e-07, |
|
"logits/chosen": -2.3304078578948975, |
|
"logits/rejected": -2.328829288482666, |
|
"logps/chosen": -244.64791870117188, |
|
"logps/rejected": -224.4540252685547, |
|
"loss": 0.5672, |
|
"rewards/accuracies": 0.723437488079071, |
|
"rewards/chosen": -0.1754181832075119, |
|
"rewards/margins": 0.5484617948532104, |
|
"rewards/rejected": -0.7238799333572388, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.16494450822809e-07, |
|
"logits/chosen": -2.4004368782043457, |
|
"logits/rejected": -2.394761562347412, |
|
"logps/chosen": -260.854248046875, |
|
"logps/rejected": -217.267333984375, |
|
"loss": 0.5264, |
|
"rewards/accuracies": 0.7328125238418579, |
|
"rewards/chosen": -0.09097670018672943, |
|
"rewards/margins": 0.6562029123306274, |
|
"rewards/rejected": -0.7471795678138733, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.145809414466131e-07, |
|
"logits/chosen": -2.420809268951416, |
|
"logits/rejected": -2.393630266189575, |
|
"logps/chosen": -268.07220458984375, |
|
"logps/rejected": -243.11996459960938, |
|
"loss": 0.5434, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.12649384140968323, |
|
"rewards/margins": 0.5802772641181946, |
|
"rewards/rejected": -0.706771194934845, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.1266743207041714e-07, |
|
"logits/chosen": -2.4304604530334473, |
|
"logits/rejected": -2.3626341819763184, |
|
"logps/chosen": -274.98638916015625, |
|
"logps/rejected": -246.15872192382812, |
|
"loss": 0.5499, |
|
"rewards/accuracies": 0.739062488079071, |
|
"rewards/chosen": -0.07647743821144104, |
|
"rewards/margins": 0.5869981646537781, |
|
"rewards/rejected": -0.6634755730628967, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.107539226942212e-07, |
|
"logits/chosen": -2.392775774002075, |
|
"logits/rejected": -2.3546760082244873, |
|
"logps/chosen": -280.3741149902344, |
|
"logps/rejected": -248.8837127685547, |
|
"loss": 0.5092, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -0.0769033133983612, |
|
"rewards/margins": 0.693490743637085, |
|
"rewards/rejected": -0.7703940868377686, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.0884041331802526e-07, |
|
"logits/chosen": -2.3941831588745117, |
|
"logits/rejected": -2.349119186401367, |
|
"logps/chosen": -275.7878112792969, |
|
"logps/rejected": -238.73684692382812, |
|
"loss": 0.5188, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.11901184171438217, |
|
"rewards/margins": 0.6922268867492676, |
|
"rewards/rejected": -0.8112386465072632, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.0692690394182927e-07, |
|
"logits/chosen": -2.3558011054992676, |
|
"logits/rejected": -2.365652322769165, |
|
"logps/chosen": -254.7240447998047, |
|
"logps/rejected": -225.5684051513672, |
|
"loss": 0.5479, |
|
"rewards/accuracies": 0.7015625238418579, |
|
"rewards/chosen": -0.11073043197393417, |
|
"rewards/margins": 0.6586212515830994, |
|
"rewards/rejected": -0.7693516612052917, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.0501339456563334e-07, |
|
"logits/chosen": -2.400010108947754, |
|
"logits/rejected": -2.3430371284484863, |
|
"logps/chosen": -282.26483154296875, |
|
"logps/rejected": -241.29495239257812, |
|
"loss": 0.553, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.1404508799314499, |
|
"rewards/margins": 0.5930649042129517, |
|
"rewards/rejected": -0.7335157990455627, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.030998851894374e-07, |
|
"logits/chosen": -2.429117202758789, |
|
"logits/rejected": -2.380638360977173, |
|
"logps/chosen": -277.0819396972656, |
|
"logps/rejected": -231.4957733154297, |
|
"loss": 0.5398, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.10495986044406891, |
|
"rewards/margins": 0.6354261040687561, |
|
"rewards/rejected": -0.740385890007019, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.0118637581324146e-07, |
|
"logits/chosen": -2.3560943603515625, |
|
"logits/rejected": -2.299285650253296, |
|
"logps/chosen": -283.2480773925781, |
|
"logps/rejected": -236.7747802734375, |
|
"loss": 0.5339, |
|
"rewards/accuracies": 0.745312511920929, |
|
"rewards/chosen": -0.15119323134422302, |
|
"rewards/margins": 0.6373868584632874, |
|
"rewards/rejected": -0.788580060005188, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.992728664370455e-07, |
|
"logits/chosen": -2.4186995029449463, |
|
"logits/rejected": -2.403923511505127, |
|
"logps/chosen": -265.8408203125, |
|
"logps/rejected": -224.98312377929688, |
|
"loss": 0.5481, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.1380973756313324, |
|
"rewards/margins": 0.6355406045913696, |
|
"rewards/rejected": -0.7736380100250244, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.973593570608496e-07, |
|
"logits/chosen": -2.3618245124816895, |
|
"logits/rejected": -2.340223550796509, |
|
"logps/chosen": -267.28338623046875, |
|
"logps/rejected": -229.21469116210938, |
|
"loss": 0.5572, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.126164972782135, |
|
"rewards/margins": 0.6094905138015747, |
|
"rewards/rejected": -0.7356554865837097, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.9544584768465365e-07, |
|
"logits/chosen": -2.4243083000183105, |
|
"logits/rejected": -2.398084878921509, |
|
"logps/chosen": -256.0418395996094, |
|
"logps/rejected": -234.45346069335938, |
|
"loss": 0.5438, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.10923846065998077, |
|
"rewards/margins": 0.6375387907028198, |
|
"rewards/rejected": -0.7467772364616394, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.935323383084577e-07, |
|
"logits/chosen": -2.448951005935669, |
|
"logits/rejected": -2.3950791358947754, |
|
"logps/chosen": -276.74725341796875, |
|
"logps/rejected": -226.75149536132812, |
|
"loss": 0.5584, |
|
"rewards/accuracies": 0.707812488079071, |
|
"rewards/chosen": -0.17273911833763123, |
|
"rewards/margins": 0.5847989916801453, |
|
"rewards/rejected": -0.7575381994247437, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.9161882893226177e-07, |
|
"logits/chosen": -2.427473545074463, |
|
"logits/rejected": -2.3801541328430176, |
|
"logps/chosen": -267.68463134765625, |
|
"logps/rejected": -228.783447265625, |
|
"loss": 0.5465, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.1168685331940651, |
|
"rewards/margins": 0.6201252937316895, |
|
"rewards/rejected": -0.7369938492774963, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.8970531955606583e-07, |
|
"logits/chosen": -2.4376165866851807, |
|
"logits/rejected": -2.39223051071167, |
|
"logps/chosen": -269.39691162109375, |
|
"logps/rejected": -226.72702026367188, |
|
"loss": 0.5143, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.10355620086193085, |
|
"rewards/margins": 0.7153445482254028, |
|
"rewards/rejected": -0.8189007639884949, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8779181017986984e-07, |
|
"logits/chosen": -2.3847761154174805, |
|
"logits/rejected": -2.358484983444214, |
|
"logps/chosen": -265.6216125488281, |
|
"logps/rejected": -226.9099578857422, |
|
"loss": 0.5786, |
|
"rewards/accuracies": 0.6859375238418579, |
|
"rewards/chosen": -0.1487416923046112, |
|
"rewards/margins": 0.582770049571991, |
|
"rewards/rejected": -0.7315118312835693, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.858783008036739e-07, |
|
"logits/chosen": -2.441329002380371, |
|
"logits/rejected": -2.405198335647583, |
|
"logps/chosen": -254.2424774169922, |
|
"logps/rejected": -216.15487670898438, |
|
"loss": 0.5328, |
|
"rewards/accuracies": 0.739062488079071, |
|
"rewards/chosen": -0.10592655837535858, |
|
"rewards/margins": 0.6519125699996948, |
|
"rewards/rejected": -0.7578392624855042, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.8396479142747797e-07, |
|
"logits/chosen": -2.353024482727051, |
|
"logits/rejected": -2.3756861686706543, |
|
"logps/chosen": -271.5851135253906, |
|
"logps/rejected": -226.3388214111328, |
|
"loss": 0.5633, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.11950352042913437, |
|
"rewards/margins": 0.5914410948753357, |
|
"rewards/rejected": -0.710944652557373, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8205128205128203e-07, |
|
"logits/chosen": -2.4454894065856934, |
|
"logits/rejected": -2.4075827598571777, |
|
"logps/chosen": -278.7067565917969, |
|
"logps/rejected": -233.1806182861328, |
|
"loss": 0.5468, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.1553649604320526, |
|
"rewards/margins": 0.6472987532615662, |
|
"rewards/rejected": -0.8026638031005859, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.801377726750861e-07, |
|
"logits/chosen": -2.41646671295166, |
|
"logits/rejected": -2.380006790161133, |
|
"logps/chosen": -259.50830078125, |
|
"logps/rejected": -234.66000366210938, |
|
"loss": 0.5442, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.11458615958690643, |
|
"rewards/margins": 0.6320740580558777, |
|
"rewards/rejected": -0.7466602325439453, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.7822426329889015e-07, |
|
"logits/chosen": -2.3705825805664062, |
|
"logits/rejected": -2.3389930725097656, |
|
"logps/chosen": -264.9784240722656, |
|
"logps/rejected": -235.85598754882812, |
|
"loss": 0.5239, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.11364629119634628, |
|
"rewards/margins": 0.6920466423034668, |
|
"rewards/rejected": -0.8056928515434265, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.763107539226942e-07, |
|
"logits/chosen": -2.3917994499206543, |
|
"logits/rejected": -2.361053705215454, |
|
"logps/chosen": -273.13323974609375, |
|
"logps/rejected": -237.2021026611328, |
|
"loss": 0.5537, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.1213529109954834, |
|
"rewards/margins": 0.6237030625343323, |
|
"rewards/rejected": -0.7450559735298157, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.743972445464983e-07, |
|
"logits/chosen": -2.3829543590545654, |
|
"logits/rejected": -2.3598859310150146, |
|
"logps/chosen": -256.2921447753906, |
|
"logps/rejected": -218.822998046875, |
|
"loss": 0.5447, |
|
"rewards/accuracies": 0.723437488079071, |
|
"rewards/chosen": -0.09320759773254395, |
|
"rewards/margins": 0.628312349319458, |
|
"rewards/rejected": -0.721519947052002, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.7248373517030234e-07, |
|
"logits/chosen": -2.3986709117889404, |
|
"logits/rejected": -2.3675730228424072, |
|
"logps/chosen": -261.54193115234375, |
|
"logps/rejected": -222.29812622070312, |
|
"loss": 0.5696, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.17618440091609955, |
|
"rewards/margins": 0.6171834468841553, |
|
"rewards/rejected": -0.793367862701416, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.705702257941064e-07, |
|
"logits/chosen": -2.403079032897949, |
|
"logits/rejected": -2.344881057739258, |
|
"logps/chosen": -271.6820068359375, |
|
"logps/rejected": -223.48422241210938, |
|
"loss": 0.5317, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.13215352594852448, |
|
"rewards/margins": 0.6836920976638794, |
|
"rewards/rejected": -0.8158456683158875, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.686567164179104e-07, |
|
"logits/chosen": -2.4172019958496094, |
|
"logits/rejected": -2.350555181503296, |
|
"logps/chosen": -261.85516357421875, |
|
"logps/rejected": -225.0038604736328, |
|
"loss": 0.5255, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -0.08901546150445938, |
|
"rewards/margins": 0.6642698049545288, |
|
"rewards/rejected": -0.75328528881073, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.6674320704171447e-07, |
|
"logits/chosen": -2.4008395671844482, |
|
"logits/rejected": -2.351348876953125, |
|
"logps/chosen": -267.31951904296875, |
|
"logps/rejected": -227.8149871826172, |
|
"loss": 0.5381, |
|
"rewards/accuracies": 0.7328125238418579, |
|
"rewards/chosen": -0.13619837164878845, |
|
"rewards/margins": 0.6516298055648804, |
|
"rewards/rejected": -0.7878280878067017, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.6482969766551853e-07, |
|
"logits/chosen": -2.348276138305664, |
|
"logits/rejected": -2.329331159591675, |
|
"logps/chosen": -263.51080322265625, |
|
"logps/rejected": -227.07809448242188, |
|
"loss": 0.5122, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.10638642311096191, |
|
"rewards/margins": 0.7151543498039246, |
|
"rewards/rejected": -0.8215408325195312, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.629161882893226e-07, |
|
"logits/chosen": -2.4366257190704346, |
|
"logits/rejected": -2.379861354827881, |
|
"logps/chosen": -279.295166015625, |
|
"logps/rejected": -239.08352661132812, |
|
"loss": 0.5645, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.15942886471748352, |
|
"rewards/margins": 0.6090508103370667, |
|
"rewards/rejected": -0.7684796452522278, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.6100267891312666e-07, |
|
"logits/chosen": -2.364650249481201, |
|
"logits/rejected": -2.3203299045562744, |
|
"logps/chosen": -261.32708740234375, |
|
"logps/rejected": -223.8793487548828, |
|
"loss": 0.5457, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.13680413365364075, |
|
"rewards/margins": 0.6616954207420349, |
|
"rewards/rejected": -0.798499584197998, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.590891695369307e-07, |
|
"logits/chosen": -2.414820432662964, |
|
"logits/rejected": -2.3798413276672363, |
|
"logps/chosen": -281.36065673828125, |
|
"logps/rejected": -240.29238891601562, |
|
"loss": 0.5368, |
|
"rewards/accuracies": 0.729687511920929, |
|
"rewards/chosen": -0.08666771650314331, |
|
"rewards/margins": 0.6870118975639343, |
|
"rewards/rejected": -0.7736796140670776, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.571756601607348e-07, |
|
"logits/chosen": -2.407886266708374, |
|
"logits/rejected": -2.3671507835388184, |
|
"logps/chosen": -281.9557189941406, |
|
"logps/rejected": -232.68588256835938, |
|
"loss": 0.5316, |
|
"rewards/accuracies": 0.723437488079071, |
|
"rewards/chosen": -0.127783864736557, |
|
"rewards/margins": 0.7107834219932556, |
|
"rewards/rejected": -0.8385672569274902, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5526215078453884e-07, |
|
"logits/chosen": -2.37595796585083, |
|
"logits/rejected": -2.3402533531188965, |
|
"logps/chosen": -275.40106201171875, |
|
"logps/rejected": -241.32421875, |
|
"loss": 0.5485, |
|
"rewards/accuracies": 0.7328125238418579, |
|
"rewards/chosen": -0.13988900184631348, |
|
"rewards/margins": 0.6486446261405945, |
|
"rewards/rejected": -0.788533627986908, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.533486414083429e-07, |
|
"logits/chosen": -2.383958578109741, |
|
"logits/rejected": -2.3686203956604004, |
|
"logps/chosen": -273.81549072265625, |
|
"logps/rejected": -226.5820770263672, |
|
"loss": 0.5231, |
|
"rewards/accuracies": 0.745312511920929, |
|
"rewards/chosen": -0.16685205698013306, |
|
"rewards/margins": 0.6781736016273499, |
|
"rewards/rejected": -0.8450256586074829, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.5143513203214697e-07, |
|
"logits/chosen": -2.4135966300964355, |
|
"logits/rejected": -2.339186429977417, |
|
"logps/chosen": -261.7090759277344, |
|
"logps/rejected": -232.47018432617188, |
|
"loss": 0.5607, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.18446998298168182, |
|
"rewards/margins": 0.6322883367538452, |
|
"rewards/rejected": -0.8167583346366882, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.49521622655951e-07, |
|
"logits/chosen": -2.4073891639709473, |
|
"logits/rejected": -2.3973593711853027, |
|
"logps/chosen": -263.8055725097656, |
|
"logps/rejected": -233.00167846679688, |
|
"loss": 0.5567, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.12571503221988678, |
|
"rewards/margins": 0.6185272932052612, |
|
"rewards/rejected": -0.7442423701286316, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.4760811327975504e-07, |
|
"logits/chosen": -2.3958241939544678, |
|
"logits/rejected": -2.356121063232422, |
|
"logps/chosen": -276.14556884765625, |
|
"logps/rejected": -239.56112670898438, |
|
"loss": 0.5407, |
|
"rewards/accuracies": 0.729687511920929, |
|
"rewards/chosen": -0.11243724822998047, |
|
"rewards/margins": 0.6681596040725708, |
|
"rewards/rejected": -0.7805968523025513, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.456946039035591e-07, |
|
"logits/chosen": -2.387842893600464, |
|
"logits/rejected": -2.3812038898468018, |
|
"logps/chosen": -256.08905029296875, |
|
"logps/rejected": -216.9521484375, |
|
"loss": 0.5135, |
|
"rewards/accuracies": 0.7484375238418579, |
|
"rewards/chosen": -0.11949145793914795, |
|
"rewards/margins": 0.7178138494491577, |
|
"rewards/rejected": -0.8373053669929504, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.4378109452736316e-07, |
|
"logits/chosen": -2.3569884300231934, |
|
"logits/rejected": -2.3548595905303955, |
|
"logps/chosen": -268.0955810546875, |
|
"logps/rejected": -234.91317749023438, |
|
"loss": 0.5402, |
|
"rewards/accuracies": 0.723437488079071, |
|
"rewards/chosen": -0.1568536013364792, |
|
"rewards/margins": 0.656032145023346, |
|
"rewards/rejected": -0.8128856420516968, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.418675851511672e-07, |
|
"logits/chosen": -2.394106388092041, |
|
"logits/rejected": -2.338951587677002, |
|
"logps/chosen": -258.17071533203125, |
|
"logps/rejected": -227.0476531982422, |
|
"loss": 0.5266, |
|
"rewards/accuracies": 0.7359374761581421, |
|
"rewards/chosen": -0.18490514159202576, |
|
"rewards/margins": 0.674010157585144, |
|
"rewards/rejected": -0.8589152097702026, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.399540757749713e-07, |
|
"logits/chosen": -2.4031760692596436, |
|
"logits/rejected": -2.371420383453369, |
|
"logps/chosen": -252.31594848632812, |
|
"logps/rejected": -225.9946746826172, |
|
"loss": 0.5485, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.11656501144170761, |
|
"rewards/margins": 0.6458471417427063, |
|
"rewards/rejected": -0.7624121308326721, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.3804056639877535e-07, |
|
"logits/chosen": -2.3602213859558105, |
|
"logits/rejected": -2.3286445140838623, |
|
"logps/chosen": -269.58294677734375, |
|
"logps/rejected": -239.6148681640625, |
|
"loss": 0.5509, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.10988609492778778, |
|
"rewards/margins": 0.6352638006210327, |
|
"rewards/rejected": -0.7451499700546265, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.361270570225794e-07, |
|
"logits/chosen": -2.3772807121276855, |
|
"logits/rejected": -2.3392374515533447, |
|
"logps/chosen": -273.1993713378906, |
|
"logps/rejected": -226.94155883789062, |
|
"loss": 0.5544, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.15547646582126617, |
|
"rewards/margins": 0.6606963872909546, |
|
"rewards/rejected": -0.8161728978157043, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.3421354764638345e-07, |
|
"logits/chosen": -2.339207172393799, |
|
"logits/rejected": -2.2881035804748535, |
|
"logps/chosen": -262.3006896972656, |
|
"logps/rejected": -222.82565307617188, |
|
"loss": 0.5392, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.1391618549823761, |
|
"rewards/margins": 0.6765463948249817, |
|
"rewards/rejected": -0.8157082796096802, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.323000382701875e-07, |
|
"logits/chosen": -2.36671781539917, |
|
"logits/rejected": -2.30442476272583, |
|
"logps/chosen": -262.6791076660156, |
|
"logps/rejected": -223.51834106445312, |
|
"loss": 0.5352, |
|
"rewards/accuracies": 0.7171875238418579, |
|
"rewards/chosen": -0.1453666090965271, |
|
"rewards/margins": 0.6910194754600525, |
|
"rewards/rejected": -0.8363860845565796, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.3038652889399157e-07, |
|
"logits/chosen": -2.377718448638916, |
|
"logits/rejected": -2.3732407093048096, |
|
"logps/chosen": -264.3118591308594, |
|
"logps/rejected": -235.5894775390625, |
|
"loss": 0.5258, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.12053600698709488, |
|
"rewards/margins": 0.6903436183929443, |
|
"rewards/rejected": -0.8108797073364258, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.2847301951779563e-07, |
|
"logits/chosen": -2.4043807983398438, |
|
"logits/rejected": -2.3661141395568848, |
|
"logps/chosen": -273.5931701660156, |
|
"logps/rejected": -232.48287963867188, |
|
"loss": 0.535, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.140711709856987, |
|
"rewards/margins": 0.7188085317611694, |
|
"rewards/rejected": -0.8595201373100281, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.265595101415997e-07, |
|
"logits/chosen": -2.391242265701294, |
|
"logits/rejected": -2.33647084236145, |
|
"logps/chosen": -269.0169372558594, |
|
"logps/rejected": -230.73583984375, |
|
"loss": 0.5443, |
|
"rewards/accuracies": 0.714062511920929, |
|
"rewards/chosen": -0.14084765315055847, |
|
"rewards/margins": 0.6776271462440491, |
|
"rewards/rejected": -0.8184748888015747, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.2464600076540373e-07, |
|
"logits/chosen": -2.405012845993042, |
|
"logits/rejected": -2.3291537761688232, |
|
"logps/chosen": -279.62371826171875, |
|
"logps/rejected": -237.05722045898438, |
|
"loss": 0.554, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.10567928850650787, |
|
"rewards/margins": 0.6449233293533325, |
|
"rewards/rejected": -0.750602662563324, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.227324913892078e-07, |
|
"logits/chosen": -2.3809354305267334, |
|
"logits/rejected": -2.341770648956299, |
|
"logps/chosen": -272.91741943359375, |
|
"logps/rejected": -228.8494873046875, |
|
"loss": 0.547, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.1115594357252121, |
|
"rewards/margins": 0.6728307604789734, |
|
"rewards/rejected": -0.7843901515007019, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.2081898201301186e-07, |
|
"logits/chosen": -2.344855546951294, |
|
"logits/rejected": -2.347912549972534, |
|
"logps/chosen": -284.1566162109375, |
|
"logps/rejected": -242.9143524169922, |
|
"loss": 0.559, |
|
"rewards/accuracies": 0.7328125238418579, |
|
"rewards/chosen": -0.12913444638252258, |
|
"rewards/margins": 0.6051042675971985, |
|
"rewards/rejected": -0.7342387437820435, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.1890547263681592e-07, |
|
"logits/chosen": -2.3368725776672363, |
|
"logits/rejected": -2.3267197608947754, |
|
"logps/chosen": -279.5101623535156, |
|
"logps/rejected": -228.0315399169922, |
|
"loss": 0.5412, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.123216912150383, |
|
"rewards/margins": 0.6950885653495789, |
|
"rewards/rejected": -0.818305492401123, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.1699196326061998e-07, |
|
"logits/chosen": -2.334354877471924, |
|
"logits/rejected": -2.3555445671081543, |
|
"logps/chosen": -272.8717956542969, |
|
"logps/rejected": -230.3594207763672, |
|
"loss": 0.5313, |
|
"rewards/accuracies": 0.7359374761581421, |
|
"rewards/chosen": -0.12218773365020752, |
|
"rewards/margins": 0.7177630662918091, |
|
"rewards/rejected": -0.8399508595466614, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.1507845388442402e-07, |
|
"logits/chosen": -2.4097609519958496, |
|
"logits/rejected": -2.3510959148406982, |
|
"logps/chosen": -276.52862548828125, |
|
"logps/rejected": -218.99441528320312, |
|
"loss": 0.518, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.06975328177213669, |
|
"rewards/margins": 0.6969150304794312, |
|
"rewards/rejected": -0.7666682600975037, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.1316494450822808e-07, |
|
"logits/chosen": -2.3507437705993652, |
|
"logits/rejected": -2.3511948585510254, |
|
"logps/chosen": -271.771240234375, |
|
"logps/rejected": -231.90634155273438, |
|
"loss": 0.5264, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.12042073160409927, |
|
"rewards/margins": 0.7034494876861572, |
|
"rewards/rejected": -0.8238701820373535, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1125143513203214e-07, |
|
"logits/chosen": -2.4258570671081543, |
|
"logits/rejected": -2.4029757976531982, |
|
"logps/chosen": -264.3330078125, |
|
"logps/rejected": -227.8314208984375, |
|
"loss": 0.5476, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.10862596333026886, |
|
"rewards/margins": 0.6536161303520203, |
|
"rewards/rejected": -0.7622420787811279, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.093379257558362e-07, |
|
"logits/chosen": -2.4013142585754395, |
|
"logits/rejected": -2.34897518157959, |
|
"logps/chosen": -271.6585693359375, |
|
"logps/rejected": -241.2907257080078, |
|
"loss": 0.5611, |
|
"rewards/accuracies": 0.698437511920929, |
|
"rewards/chosen": -0.11902491748332977, |
|
"rewards/margins": 0.6104603409767151, |
|
"rewards/rejected": -0.7294851541519165, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.0742441637964026e-07, |
|
"logits/chosen": -2.386214256286621, |
|
"logits/rejected": -2.33040452003479, |
|
"logps/chosen": -245.88143920898438, |
|
"logps/rejected": -216.9251251220703, |
|
"loss": 0.5401, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.14682015776634216, |
|
"rewards/margins": 0.6507130861282349, |
|
"rewards/rejected": -0.7975332736968994, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.055109070034443e-07, |
|
"logits/chosen": -2.4217278957366943, |
|
"logits/rejected": -2.3312575817108154, |
|
"logps/chosen": -260.94085693359375, |
|
"logps/rejected": -222.13607788085938, |
|
"loss": 0.5446, |
|
"rewards/accuracies": 0.745312511920929, |
|
"rewards/chosen": -0.11108909547328949, |
|
"rewards/margins": 0.6555716395378113, |
|
"rewards/rejected": -0.7666608095169067, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0359739762724836e-07, |
|
"logits/chosen": -2.406583309173584, |
|
"logits/rejected": -2.3424503803253174, |
|
"logps/chosen": -289.1400146484375, |
|
"logps/rejected": -241.73513793945312, |
|
"loss": 0.4941, |
|
"rewards/accuracies": 0.7671874761581421, |
|
"rewards/chosen": -0.09672559797763824, |
|
"rewards/margins": 0.7783478498458862, |
|
"rewards/rejected": -0.8750733137130737, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.0168388825105242e-07, |
|
"logits/chosen": -2.405856132507324, |
|
"logits/rejected": -2.350475311279297, |
|
"logps/chosen": -261.40814208984375, |
|
"logps/rejected": -229.8692169189453, |
|
"loss": 0.5521, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.15710802376270294, |
|
"rewards/margins": 0.6592746376991272, |
|
"rewards/rejected": -0.8163825869560242, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.997703788748565e-07, |
|
"logits/chosen": -2.3927724361419678, |
|
"logits/rejected": -2.332962989807129, |
|
"logps/chosen": -261.10699462890625, |
|
"logps/rejected": -237.5717010498047, |
|
"loss": 0.5378, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.1337103396654129, |
|
"rewards/margins": 0.6999514102935791, |
|
"rewards/rejected": -0.8336617350578308, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9785686949866055e-07, |
|
"logits/chosen": -2.4174818992614746, |
|
"logits/rejected": -2.361926317214966, |
|
"logps/chosen": -275.9540710449219, |
|
"logps/rejected": -229.05615234375, |
|
"loss": 0.5198, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.1561015248298645, |
|
"rewards/margins": 0.7133805155754089, |
|
"rewards/rejected": -0.8694820404052734, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9594336012246458e-07, |
|
"logits/chosen": -2.373378276824951, |
|
"logits/rejected": -2.3580093383789062, |
|
"logps/chosen": -263.26739501953125, |
|
"logps/rejected": -229.62686157226562, |
|
"loss": 0.5396, |
|
"rewards/accuracies": 0.745312511920929, |
|
"rewards/chosen": -0.15732435882091522, |
|
"rewards/margins": 0.6500160098075867, |
|
"rewards/rejected": -0.8073404431343079, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.9402985074626865e-07, |
|
"logits/chosen": -2.3866982460021973, |
|
"logits/rejected": -2.3246593475341797, |
|
"logps/chosen": -261.379150390625, |
|
"logps/rejected": -227.70016479492188, |
|
"loss": 0.5263, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.117561474442482, |
|
"rewards/margins": 0.7048689723014832, |
|
"rewards/rejected": -0.8224304914474487, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.921163413700727e-07, |
|
"logits/chosen": -2.4001078605651855, |
|
"logits/rejected": -2.3805463314056396, |
|
"logps/chosen": -281.5653381347656, |
|
"logps/rejected": -243.9423828125, |
|
"loss": 0.5362, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.10007290542125702, |
|
"rewards/margins": 0.7120274305343628, |
|
"rewards/rejected": -0.8121002316474915, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.9020283199387677e-07, |
|
"logits/chosen": -2.353015899658203, |
|
"logits/rejected": -2.3475286960601807, |
|
"logps/chosen": -268.6228942871094, |
|
"logps/rejected": -238.2252197265625, |
|
"loss": 0.5172, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.11127477884292603, |
|
"rewards/margins": 0.7399830222129822, |
|
"rewards/rejected": -0.8512576818466187, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8828932261768083e-07, |
|
"logits/chosen": -2.3818321228027344, |
|
"logits/rejected": -2.3469431400299072, |
|
"logps/chosen": -265.0734558105469, |
|
"logps/rejected": -227.3889617919922, |
|
"loss": 0.5273, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.12172959744930267, |
|
"rewards/margins": 0.716955304145813, |
|
"rewards/rejected": -0.8386849164962769, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.8637581324148487e-07, |
|
"logits/chosen": -2.401777744293213, |
|
"logits/rejected": -2.3709285259246826, |
|
"logps/chosen": -268.879638671875, |
|
"logps/rejected": -239.0655517578125, |
|
"loss": 0.5587, |
|
"rewards/accuracies": 0.692187488079071, |
|
"rewards/chosen": -0.1592234969139099, |
|
"rewards/margins": 0.6302945017814636, |
|
"rewards/rejected": -0.7895179986953735, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -2.042747735977173, |
|
"eval_logits/rejected": -1.9887516498565674, |
|
"eval_logps/chosen": -265.97637939453125, |
|
"eval_logps/rejected": -232.0824737548828, |
|
"eval_loss": 0.5326370596885681, |
|
"eval_rewards/accuracies": 0.7294999957084656, |
|
"eval_rewards/chosen": -0.14086098968982697, |
|
"eval_rewards/margins": 0.7020561099052429, |
|
"eval_rewards/rejected": -0.8429170250892639, |
|
"eval_runtime": 1167.6557, |
|
"eval_samples_per_second": 1.713, |
|
"eval_steps_per_second": 0.428, |
|
"step": 1936 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8446230386528893e-07, |
|
"logits/chosen": -2.411083698272705, |
|
"logits/rejected": -2.3344886302948, |
|
"logps/chosen": -260.4184265136719, |
|
"logps/rejected": -229.34713745117188, |
|
"loss": 0.5528, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.18735817074775696, |
|
"rewards/margins": 0.6163454055786133, |
|
"rewards/rejected": -0.8037036061286926, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.82548794489093e-07, |
|
"logits/chosen": -2.391366481781006, |
|
"logits/rejected": -2.3589439392089844, |
|
"logps/chosen": -272.74444580078125, |
|
"logps/rejected": -228.60281372070312, |
|
"loss": 0.5464, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.15350715816020966, |
|
"rewards/margins": 0.6585405468940735, |
|
"rewards/rejected": -0.8120476603507996, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.8063528511289706e-07, |
|
"logits/chosen": -2.397200107574463, |
|
"logits/rejected": -2.3327198028564453, |
|
"logps/chosen": -258.4478759765625, |
|
"logps/rejected": -224.2578582763672, |
|
"loss": 0.5434, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.20096781849861145, |
|
"rewards/margins": 0.657593846321106, |
|
"rewards/rejected": -0.8585616946220398, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7872177573670112e-07, |
|
"logits/chosen": -2.400557279586792, |
|
"logits/rejected": -2.35810923576355, |
|
"logps/chosen": -275.8924865722656, |
|
"logps/rejected": -239.3294219970703, |
|
"loss": 0.5145, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -0.11401806026697159, |
|
"rewards/margins": 0.747878909111023, |
|
"rewards/rejected": -0.8618971109390259, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.7680826636050515e-07, |
|
"logits/chosen": -2.369227647781372, |
|
"logits/rejected": -2.3667426109313965, |
|
"logps/chosen": -257.553955078125, |
|
"logps/rejected": -230.169677734375, |
|
"loss": 0.5367, |
|
"rewards/accuracies": 0.7328125238418579, |
|
"rewards/chosen": -0.15207555890083313, |
|
"rewards/margins": 0.6957732439041138, |
|
"rewards/rejected": -0.8478488922119141, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.7489475698430921e-07, |
|
"logits/chosen": -2.372884511947632, |
|
"logits/rejected": -2.3310484886169434, |
|
"logps/chosen": -282.4217224121094, |
|
"logps/rejected": -233.046875, |
|
"loss": 0.5341, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": -0.12857168912887573, |
|
"rewards/margins": 0.752483606338501, |
|
"rewards/rejected": -0.8810552358627319, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7298124760811328e-07, |
|
"logits/chosen": -2.370082378387451, |
|
"logits/rejected": -2.3288538455963135, |
|
"logps/chosen": -253.7472686767578, |
|
"logps/rejected": -234.3776092529297, |
|
"loss": 0.5121, |
|
"rewards/accuracies": 0.760937511920929, |
|
"rewards/chosen": -0.08212677389383316, |
|
"rewards/margins": 0.7719866633415222, |
|
"rewards/rejected": -0.8541134595870972, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7106773823191734e-07, |
|
"logits/chosen": -2.378678798675537, |
|
"logits/rejected": -2.3208470344543457, |
|
"logps/chosen": -267.8801574707031, |
|
"logps/rejected": -231.2415771484375, |
|
"loss": 0.573, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.16784097254276276, |
|
"rewards/margins": 0.585302472114563, |
|
"rewards/rejected": -0.7531434893608093, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.691542288557214e-07, |
|
"logits/chosen": -2.3666415214538574, |
|
"logits/rejected": -2.316760540008545, |
|
"logps/chosen": -260.22723388671875, |
|
"logps/rejected": -225.22976684570312, |
|
"loss": 0.5166, |
|
"rewards/accuracies": 0.745312511920929, |
|
"rewards/chosen": -0.12360270321369171, |
|
"rewards/margins": 0.7310017347335815, |
|
"rewards/rejected": -0.8546044230461121, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6724071947952544e-07, |
|
"logits/chosen": -2.3446133136749268, |
|
"logits/rejected": -2.2931389808654785, |
|
"logps/chosen": -266.8133239746094, |
|
"logps/rejected": -237.9119415283203, |
|
"loss": 0.5278, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.11298196017742157, |
|
"rewards/margins": 0.7080703973770142, |
|
"rewards/rejected": -0.8210523724555969, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.653272101033295e-07, |
|
"logits/chosen": -2.408759832382202, |
|
"logits/rejected": -2.363680362701416, |
|
"logps/chosen": -262.7159118652344, |
|
"logps/rejected": -228.66390991210938, |
|
"loss": 0.5595, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -0.14217299222946167, |
|
"rewards/margins": 0.6589146852493286, |
|
"rewards/rejected": -0.8010876774787903, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6341370072713356e-07, |
|
"logits/chosen": -2.408491849899292, |
|
"logits/rejected": -2.3210110664367676, |
|
"logps/chosen": -246.5405731201172, |
|
"logps/rejected": -223.0271453857422, |
|
"loss": 0.5248, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.12794676423072815, |
|
"rewards/margins": 0.7168751955032349, |
|
"rewards/rejected": -0.8448219299316406, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.6150019135093762e-07, |
|
"logits/chosen": -2.3539464473724365, |
|
"logits/rejected": -2.3444278240203857, |
|
"logps/chosen": -278.1259765625, |
|
"logps/rejected": -243.2495880126953, |
|
"loss": 0.5504, |
|
"rewards/accuracies": 0.7171875238418579, |
|
"rewards/chosen": -0.12467856705188751, |
|
"rewards/margins": 0.669019341468811, |
|
"rewards/rejected": -0.7936979532241821, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5958668197474169e-07, |
|
"logits/chosen": -2.3732540607452393, |
|
"logits/rejected": -2.3456478118896484, |
|
"logps/chosen": -286.5888366699219, |
|
"logps/rejected": -231.79165649414062, |
|
"loss": 0.5346, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.09783172607421875, |
|
"rewards/margins": 0.705902099609375, |
|
"rewards/rejected": -0.8037338256835938, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.5767317259854572e-07, |
|
"logits/chosen": -2.4102118015289307, |
|
"logits/rejected": -2.3785674571990967, |
|
"logps/chosen": -252.31881713867188, |
|
"logps/rejected": -230.2682342529297, |
|
"loss": 0.554, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.15130464732646942, |
|
"rewards/margins": 0.668793797492981, |
|
"rewards/rejected": -0.8200985193252563, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.5575966322234978e-07, |
|
"logits/chosen": -2.3902785778045654, |
|
"logits/rejected": -2.361997127532959, |
|
"logps/chosen": -277.2994384765625, |
|
"logps/rejected": -236.0117645263672, |
|
"loss": 0.5371, |
|
"rewards/accuracies": 0.723437488079071, |
|
"rewards/chosen": -0.10212769359350204, |
|
"rewards/margins": 0.7493409514427185, |
|
"rewards/rejected": -0.8514686822891235, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5384615384615385e-07, |
|
"logits/chosen": -2.371175765991211, |
|
"logits/rejected": -2.340148687362671, |
|
"logps/chosen": -283.32452392578125, |
|
"logps/rejected": -234.09335327148438, |
|
"loss": 0.5364, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.14591889083385468, |
|
"rewards/margins": 0.7080722451210022, |
|
"rewards/rejected": -0.8539912104606628, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.519326444699579e-07, |
|
"logits/chosen": -2.3838436603546143, |
|
"logits/rejected": -2.368041515350342, |
|
"logps/chosen": -277.5657653808594, |
|
"logps/rejected": -240.91006469726562, |
|
"loss": 0.5296, |
|
"rewards/accuracies": 0.7328125238418579, |
|
"rewards/chosen": -0.11766266822814941, |
|
"rewards/margins": 0.6887077689170837, |
|
"rewards/rejected": -0.8063703775405884, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.5001913509376197e-07, |
|
"logits/chosen": -2.4153029918670654, |
|
"logits/rejected": -2.3472938537597656, |
|
"logps/chosen": -260.43841552734375, |
|
"logps/rejected": -222.5975799560547, |
|
"loss": 0.5246, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.14667481184005737, |
|
"rewards/margins": 0.7188171148300171, |
|
"rewards/rejected": -0.8654918670654297, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4810562571756603e-07, |
|
"logits/chosen": -2.3996524810791016, |
|
"logits/rejected": -2.36572003364563, |
|
"logps/chosen": -283.7561340332031, |
|
"logps/rejected": -229.9889373779297, |
|
"loss": 0.5135, |
|
"rewards/accuracies": 0.7484375238418579, |
|
"rewards/chosen": -0.12039141356945038, |
|
"rewards/margins": 0.7521576285362244, |
|
"rewards/rejected": -0.8725490570068359, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.4619211634137007e-07, |
|
"logits/chosen": -2.396955966949463, |
|
"logits/rejected": -2.325171709060669, |
|
"logps/chosen": -268.83880615234375, |
|
"logps/rejected": -232.42672729492188, |
|
"loss": 0.4993, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.10067176818847656, |
|
"rewards/margins": 0.7941768765449524, |
|
"rewards/rejected": -0.894848644733429, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4427860696517413e-07, |
|
"logits/chosen": -2.360407590866089, |
|
"logits/rejected": -2.3728294372558594, |
|
"logps/chosen": -262.50665283203125, |
|
"logps/rejected": -244.90261840820312, |
|
"loss": 0.5567, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.15976184606552124, |
|
"rewards/margins": 0.6574433445930481, |
|
"rewards/rejected": -0.8172051310539246, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.423650975889782e-07, |
|
"logits/chosen": -2.3352928161621094, |
|
"logits/rejected": -2.318737745285034, |
|
"logps/chosen": -271.6351623535156, |
|
"logps/rejected": -232.324951171875, |
|
"loss": 0.5505, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.1446889042854309, |
|
"rewards/margins": 0.6412814259529114, |
|
"rewards/rejected": -0.7859703302383423, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.4045158821278225e-07, |
|
"logits/chosen": -2.396017074584961, |
|
"logits/rejected": -2.3626606464385986, |
|
"logps/chosen": -280.76287841796875, |
|
"logps/rejected": -232.1551513671875, |
|
"loss": 0.5443, |
|
"rewards/accuracies": 0.707812488079071, |
|
"rewards/chosen": -0.1374007910490036, |
|
"rewards/margins": 0.6923818588256836, |
|
"rewards/rejected": -0.8297826647758484, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.3853807883658632e-07, |
|
"logits/chosen": -2.3831605911254883, |
|
"logits/rejected": -2.367901086807251, |
|
"logps/chosen": -274.9002990722656, |
|
"logps/rejected": -244.9043426513672, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -0.06897449493408203, |
|
"rewards/margins": 0.7879935503005981, |
|
"rewards/rejected": -0.8569680452346802, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.3662456946039035e-07, |
|
"logits/chosen": -2.3475286960601807, |
|
"logits/rejected": -2.3350141048431396, |
|
"logps/chosen": -258.20428466796875, |
|
"logps/rejected": -228.5579071044922, |
|
"loss": 0.55, |
|
"rewards/accuracies": 0.723437488079071, |
|
"rewards/chosen": -0.14758525788784027, |
|
"rewards/margins": 0.6847792267799377, |
|
"rewards/rejected": -0.8323644399642944, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.3471106008419441e-07, |
|
"logits/chosen": -2.416398763656616, |
|
"logits/rejected": -2.3340847492218018, |
|
"logps/chosen": -263.5863952636719, |
|
"logps/rejected": -223.93826293945312, |
|
"loss": 0.54, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.1121305376291275, |
|
"rewards/margins": 0.6978212594985962, |
|
"rewards/rejected": -0.8099517822265625, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3279755070799848e-07, |
|
"logits/chosen": -2.3754360675811768, |
|
"logits/rejected": -2.3295979499816895, |
|
"logps/chosen": -261.3006896972656, |
|
"logps/rejected": -228.99472045898438, |
|
"loss": 0.5379, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -0.12667515873908997, |
|
"rewards/margins": 0.695867657661438, |
|
"rewards/rejected": -0.8225427865982056, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.3088404133180254e-07, |
|
"logits/chosen": -2.373387575149536, |
|
"logits/rejected": -2.3520331382751465, |
|
"logps/chosen": -273.1501770019531, |
|
"logps/rejected": -241.6131591796875, |
|
"loss": 0.5105, |
|
"rewards/accuracies": 0.770312488079071, |
|
"rewards/chosen": -0.08919095993041992, |
|
"rewards/margins": 0.7751600742340088, |
|
"rewards/rejected": -0.8643510937690735, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.289705319556066e-07, |
|
"logits/chosen": -2.4029157161712646, |
|
"logits/rejected": -2.3423054218292236, |
|
"logps/chosen": -269.0888671875, |
|
"logps/rejected": -238.63894653320312, |
|
"loss": 0.5064, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.11739423125982285, |
|
"rewards/margins": 0.8117318153381348, |
|
"rewards/rejected": -0.9291261434555054, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2705702257941064e-07, |
|
"logits/chosen": -2.3870110511779785, |
|
"logits/rejected": -2.3228111267089844, |
|
"logps/chosen": -247.74105834960938, |
|
"logps/rejected": -220.97531127929688, |
|
"loss": 0.5221, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.1162148267030716, |
|
"rewards/margins": 0.7085736393928528, |
|
"rewards/rejected": -0.8247883915901184, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.251435132032147e-07, |
|
"logits/chosen": -2.408937931060791, |
|
"logits/rejected": -2.3306527137756348, |
|
"logps/chosen": -299.36395263671875, |
|
"logps/rejected": -241.8893585205078, |
|
"loss": 0.543, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.11270508915185928, |
|
"rewards/margins": 0.6921781897544861, |
|
"rewards/rejected": -0.8048831820487976, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2323000382701873e-07, |
|
"logits/chosen": -2.385676383972168, |
|
"logits/rejected": -2.3467276096343994, |
|
"logps/chosen": -281.59686279296875, |
|
"logps/rejected": -241.01278686523438, |
|
"loss": 0.5353, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.1093897670507431, |
|
"rewards/margins": 0.7332038879394531, |
|
"rewards/rejected": -0.842593789100647, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.213164944508228e-07, |
|
"logits/chosen": -2.373408794403076, |
|
"logits/rejected": -2.319791316986084, |
|
"logps/chosen": -261.96563720703125, |
|
"logps/rejected": -234.7034149169922, |
|
"loss": 0.5529, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.11686080694198608, |
|
"rewards/margins": 0.6834132671356201, |
|
"rewards/rejected": -0.8002740740776062, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.1940298507462686e-07, |
|
"logits/chosen": -2.4085376262664795, |
|
"logits/rejected": -2.3651652336120605, |
|
"logps/chosen": -270.48358154296875, |
|
"logps/rejected": -242.1610565185547, |
|
"loss": 0.5305, |
|
"rewards/accuracies": 0.770312488079071, |
|
"rewards/chosen": -0.1516662836074829, |
|
"rewards/margins": 0.7256360650062561, |
|
"rewards/rejected": -0.877302348613739, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1748947569843092e-07, |
|
"logits/chosen": -2.3058078289031982, |
|
"logits/rejected": -2.2898011207580566, |
|
"logps/chosen": -253.01205444335938, |
|
"logps/rejected": -220.3304901123047, |
|
"loss": 0.5347, |
|
"rewards/accuracies": 0.745312511920929, |
|
"rewards/chosen": -0.13418254256248474, |
|
"rewards/margins": 0.7016364336013794, |
|
"rewards/rejected": -0.835818886756897, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1557596632223497e-07, |
|
"logits/chosen": -2.386352062225342, |
|
"logits/rejected": -2.3113696575164795, |
|
"logps/chosen": -269.7099304199219, |
|
"logps/rejected": -221.75302124023438, |
|
"loss": 0.5503, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": -0.17483191192150116, |
|
"rewards/margins": 0.7187283635139465, |
|
"rewards/rejected": -0.8935602903366089, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1366245694603903e-07, |
|
"logits/chosen": -2.3636221885681152, |
|
"logits/rejected": -2.342933177947998, |
|
"logps/chosen": -258.5984802246094, |
|
"logps/rejected": -218.21240234375, |
|
"loss": 0.5484, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.1345369666814804, |
|
"rewards/margins": 0.6814537048339844, |
|
"rewards/rejected": -0.8159906268119812, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.1174894756984308e-07, |
|
"logits/chosen": -2.370859384536743, |
|
"logits/rejected": -2.3134427070617676, |
|
"logps/chosen": -252.6942596435547, |
|
"logps/rejected": -211.6784210205078, |
|
"loss": 0.5317, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.1507762372493744, |
|
"rewards/margins": 0.69977205991745, |
|
"rewards/rejected": -0.8505484461784363, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0983543819364714e-07, |
|
"logits/chosen": -2.4157214164733887, |
|
"logits/rejected": -2.365856885910034, |
|
"logps/chosen": -278.7106628417969, |
|
"logps/rejected": -237.4716796875, |
|
"loss": 0.538, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.13206318020820618, |
|
"rewards/margins": 0.7201939821243286, |
|
"rewards/rejected": -0.8522570729255676, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.079219288174512e-07, |
|
"logits/chosen": -2.3600049018859863, |
|
"logits/rejected": -2.306662082672119, |
|
"logps/chosen": -261.60443115234375, |
|
"logps/rejected": -243.1952362060547, |
|
"loss": 0.5475, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.16204313933849335, |
|
"rewards/margins": 0.6782156825065613, |
|
"rewards/rejected": -0.8402588963508606, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0600841944125525e-07, |
|
"logits/chosen": -2.4186596870422363, |
|
"logits/rejected": -2.345165729522705, |
|
"logps/chosen": -258.1711730957031, |
|
"logps/rejected": -228.2469024658203, |
|
"loss": 0.5108, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.13342900574207306, |
|
"rewards/margins": 0.770829439163208, |
|
"rewards/rejected": -0.9042585492134094, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0409491006505931e-07, |
|
"logits/chosen": -2.3762617111206055, |
|
"logits/rejected": -2.3277175426483154, |
|
"logps/chosen": -266.79815673828125, |
|
"logps/rejected": -228.57821655273438, |
|
"loss": 0.5197, |
|
"rewards/accuracies": 0.739062488079071, |
|
"rewards/chosen": -0.16075488924980164, |
|
"rewards/margins": 0.7380831837654114, |
|
"rewards/rejected": -0.8988380432128906, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0218140068886336e-07, |
|
"logits/chosen": -2.3823940753936768, |
|
"logits/rejected": -2.307152271270752, |
|
"logps/chosen": -267.8171691894531, |
|
"logps/rejected": -229.06973266601562, |
|
"loss": 0.5275, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.13006095588207245, |
|
"rewards/margins": 0.7317984700202942, |
|
"rewards/rejected": -0.8618593215942383, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.0026789131266743e-07, |
|
"logits/chosen": -2.389812469482422, |
|
"logits/rejected": -2.358701229095459, |
|
"logps/chosen": -259.939453125, |
|
"logps/rejected": -227.9673309326172, |
|
"loss": 0.5258, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.14463508129119873, |
|
"rewards/margins": 0.6937167644500732, |
|
"rewards/rejected": -0.8383519053459167, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.835438193647149e-08, |
|
"logits/chosen": -2.3749680519104004, |
|
"logits/rejected": -2.325307846069336, |
|
"logps/chosen": -261.16265869140625, |
|
"logps/rejected": -235.45510864257812, |
|
"loss": 0.5166, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.10085193812847137, |
|
"rewards/margins": 0.7839605212211609, |
|
"rewards/rejected": -0.8848124742507935, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.644087256027554e-08, |
|
"logits/chosen": -2.399411201477051, |
|
"logits/rejected": -2.3411877155303955, |
|
"logps/chosen": -270.646728515625, |
|
"logps/rejected": -242.7877655029297, |
|
"loss": 0.5583, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": -0.1165170818567276, |
|
"rewards/margins": 0.7015627026557922, |
|
"rewards/rejected": -0.8180797696113586, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.45273631840796e-08, |
|
"logits/chosen": -2.378415584564209, |
|
"logits/rejected": -2.3074827194213867, |
|
"logps/chosen": -256.1658630371094, |
|
"logps/rejected": -215.31173706054688, |
|
"loss": 0.5382, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.16168564558029175, |
|
"rewards/margins": 0.6914165616035461, |
|
"rewards/rejected": -0.8531022071838379, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.261385380788366e-08, |
|
"logits/chosen": -2.4177417755126953, |
|
"logits/rejected": -2.3251852989196777, |
|
"logps/chosen": -267.59588623046875, |
|
"logps/rejected": -226.686279296875, |
|
"loss": 0.5423, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.16102801263332367, |
|
"rewards/margins": 0.7026554942131042, |
|
"rewards/rejected": -0.8636835813522339, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.070034443168771e-08, |
|
"logits/chosen": -2.3578057289123535, |
|
"logits/rejected": -2.3223681449890137, |
|
"logps/chosen": -263.61029052734375, |
|
"logps/rejected": -225.79733276367188, |
|
"loss": 0.5397, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.19550970196723938, |
|
"rewards/margins": 0.7167800664901733, |
|
"rewards/rejected": -0.9122897386550903, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.878683505549177e-08, |
|
"logits/chosen": -2.3730854988098145, |
|
"logits/rejected": -2.358013153076172, |
|
"logps/chosen": -274.9963073730469, |
|
"logps/rejected": -241.24533081054688, |
|
"loss": 0.5493, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.1562972366809845, |
|
"rewards/margins": 0.6839101314544678, |
|
"rewards/rejected": -0.8402072787284851, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.687332567929582e-08, |
|
"logits/chosen": -2.353519916534424, |
|
"logits/rejected": -2.3354268074035645, |
|
"logps/chosen": -267.2091064453125, |
|
"logps/rejected": -218.87997436523438, |
|
"loss": 0.5189, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.12245980650186539, |
|
"rewards/margins": 0.7322528958320618, |
|
"rewards/rejected": -0.8547126650810242, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.495981630309988e-08, |
|
"logits/chosen": -2.3733015060424805, |
|
"logits/rejected": -2.310149669647217, |
|
"logps/chosen": -262.130126953125, |
|
"logps/rejected": -235.95108032226562, |
|
"loss": 0.5279, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.1203254908323288, |
|
"rewards/margins": 0.7111250162124634, |
|
"rewards/rejected": -0.8314505815505981, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.304630692690395e-08, |
|
"logits/chosen": -2.3978214263916016, |
|
"logits/rejected": -2.358588218688965, |
|
"logps/chosen": -283.1036682128906, |
|
"logps/rejected": -232.8982391357422, |
|
"loss": 0.5155, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.09982401877641678, |
|
"rewards/margins": 0.7901795506477356, |
|
"rewards/rejected": -0.8900035619735718, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.1132797550708e-08, |
|
"logits/chosen": -2.3774914741516113, |
|
"logits/rejected": -2.3199660778045654, |
|
"logps/chosen": -270.4402160644531, |
|
"logps/rejected": -229.8076934814453, |
|
"loss": 0.5217, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -0.1376962959766388, |
|
"rewards/margins": 0.7224219441413879, |
|
"rewards/rejected": -0.8601182699203491, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.921928817451206e-08, |
|
"logits/chosen": -2.3702144622802734, |
|
"logits/rejected": -2.3372480869293213, |
|
"logps/chosen": -272.0224609375, |
|
"logps/rejected": -220.9506072998047, |
|
"loss": 0.4807, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.062098145484924316, |
|
"rewards/margins": 0.82757568359375, |
|
"rewards/rejected": -0.8896737098693848, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.73057787983161e-08, |
|
"logits/chosen": -2.3614370822906494, |
|
"logits/rejected": -2.3565754890441895, |
|
"logps/chosen": -270.15325927734375, |
|
"logps/rejected": -231.0701141357422, |
|
"loss": 0.5093, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.09890525788068771, |
|
"rewards/margins": 0.8027753829956055, |
|
"rewards/rejected": -0.901680588722229, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.539226942212017e-08, |
|
"logits/chosen": -2.379781484603882, |
|
"logits/rejected": -2.3308448791503906, |
|
"logps/chosen": -271.2726135253906, |
|
"logps/rejected": -247.5769805908203, |
|
"loss": 0.5444, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.13810952007770538, |
|
"rewards/margins": 0.7006896138191223, |
|
"rewards/rejected": -0.8387991189956665, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.347876004592423e-08, |
|
"logits/chosen": -2.4164352416992188, |
|
"logits/rejected": -2.363954782485962, |
|
"logps/chosen": -271.45989990234375, |
|
"logps/rejected": -234.0578155517578, |
|
"loss": 0.526, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.17485982179641724, |
|
"rewards/margins": 0.7395257949829102, |
|
"rewards/rejected": -0.9143856167793274, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.156525066972828e-08, |
|
"logits/chosen": -2.429539918899536, |
|
"logits/rejected": -2.355285882949829, |
|
"logps/chosen": -284.6403503417969, |
|
"logps/rejected": -238.6908721923828, |
|
"loss": 0.5199, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.11224106699228287, |
|
"rewards/margins": 0.7983044385910034, |
|
"rewards/rejected": -0.9105455279350281, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.965174129353234e-08, |
|
"logits/chosen": -2.368342876434326, |
|
"logits/rejected": -2.3081254959106445, |
|
"logps/chosen": -260.9881286621094, |
|
"logps/rejected": -239.78683471679688, |
|
"loss": 0.5422, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.15342268347740173, |
|
"rewards/margins": 0.718788743019104, |
|
"rewards/rejected": -0.8722113370895386, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.773823191733639e-08, |
|
"logits/chosen": -2.3808670043945312, |
|
"logits/rejected": -2.32783842086792, |
|
"logps/chosen": -272.7002868652344, |
|
"logps/rejected": -221.1647186279297, |
|
"loss": 0.5194, |
|
"rewards/accuracies": 0.745312511920929, |
|
"rewards/chosen": -0.1014653667807579, |
|
"rewards/margins": 0.7284099459648132, |
|
"rewards/rejected": -0.8298752903938293, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.582472254114045e-08, |
|
"logits/chosen": -2.453993320465088, |
|
"logits/rejected": -2.3969106674194336, |
|
"logps/chosen": -275.2949523925781, |
|
"logps/rejected": -238.8881378173828, |
|
"loss": 0.5372, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.09556527435779572, |
|
"rewards/margins": 0.740602433681488, |
|
"rewards/rejected": -0.8361676931381226, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.391121316494451e-08, |
|
"logits/chosen": -2.3907103538513184, |
|
"logits/rejected": -2.350787878036499, |
|
"logps/chosen": -250.9322967529297, |
|
"logps/rejected": -234.1465606689453, |
|
"loss": 0.5312, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.15345308184623718, |
|
"rewards/margins": 0.7323213815689087, |
|
"rewards/rejected": -0.8857744336128235, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.199770378874856e-08, |
|
"logits/chosen": -2.387080669403076, |
|
"logits/rejected": -2.35870623588562, |
|
"logps/chosen": -269.1571350097656, |
|
"logps/rejected": -229.3518524169922, |
|
"loss": 0.5205, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.12354423105716705, |
|
"rewards/margins": 0.7709532380104065, |
|
"rewards/rejected": -0.8944975137710571, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 6.008419441255262e-08, |
|
"logits/chosen": -2.398855209350586, |
|
"logits/rejected": -2.381904125213623, |
|
"logps/chosen": -263.2884521484375, |
|
"logps/rejected": -231.7559051513672, |
|
"loss": 0.5259, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.05873938649892807, |
|
"rewards/margins": 0.7153197526931763, |
|
"rewards/rejected": -0.7740591168403625, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.817068503635668e-08, |
|
"logits/chosen": -2.376080274581909, |
|
"logits/rejected": -2.316380739212036, |
|
"logps/chosen": -281.10455322265625, |
|
"logps/rejected": -218.64511108398438, |
|
"loss": 0.5151, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.09104409068822861, |
|
"rewards/margins": 0.7734732627868652, |
|
"rewards/rejected": -0.864517331123352, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.6257175660160735e-08, |
|
"logits/chosen": -2.380017042160034, |
|
"logits/rejected": -2.3436522483825684, |
|
"logps/chosen": -273.26165771484375, |
|
"logps/rejected": -228.38821411132812, |
|
"loss": 0.5224, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.13183800876140594, |
|
"rewards/margins": 0.7871755957603455, |
|
"rewards/rejected": -0.9190136194229126, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.4343666283964784e-08, |
|
"logits/chosen": -2.355607509613037, |
|
"logits/rejected": -2.3353257179260254, |
|
"logps/chosen": -262.3599548339844, |
|
"logps/rejected": -226.27297973632812, |
|
"loss": 0.5438, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.16399501264095306, |
|
"rewards/margins": 0.6985915899276733, |
|
"rewards/rejected": -0.8625866174697876, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.243015690776884e-08, |
|
"logits/chosen": -2.3956310749053955, |
|
"logits/rejected": -2.3475804328918457, |
|
"logps/chosen": -264.69793701171875, |
|
"logps/rejected": -217.0175323486328, |
|
"loss": 0.5229, |
|
"rewards/accuracies": 0.729687511920929, |
|
"rewards/chosen": -0.12037453800439835, |
|
"rewards/margins": 0.7271707653999329, |
|
"rewards/rejected": -0.8475452661514282, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.05166475315729e-08, |
|
"logits/chosen": -2.327115297317505, |
|
"logits/rejected": -2.3179469108581543, |
|
"logps/chosen": -250.42251586914062, |
|
"logps/rejected": -225.97705078125, |
|
"loss": 0.5338, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.1434091329574585, |
|
"rewards/margins": 0.6914544701576233, |
|
"rewards/rejected": -0.8348636627197266, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.860313815537696e-08, |
|
"logits/chosen": -2.4228968620300293, |
|
"logits/rejected": -2.358617067337036, |
|
"logps/chosen": -266.973388671875, |
|
"logps/rejected": -219.0054168701172, |
|
"loss": 0.5307, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.08964172005653381, |
|
"rewards/margins": 0.7375173568725586, |
|
"rewards/rejected": -0.8271591067314148, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.668962877918101e-08, |
|
"logits/chosen": -2.3782241344451904, |
|
"logits/rejected": -2.3420677185058594, |
|
"logps/chosen": -271.78472900390625, |
|
"logps/rejected": -224.3458251953125, |
|
"loss": 0.4925, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.07608253508806229, |
|
"rewards/margins": 0.8127967715263367, |
|
"rewards/rejected": -0.8888792991638184, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.477611940298507e-08, |
|
"logits/chosen": -2.362567901611328, |
|
"logits/rejected": -2.3487753868103027, |
|
"logps/chosen": -260.4725646972656, |
|
"logps/rejected": -230.2348175048828, |
|
"loss": 0.504, |
|
"rewards/accuracies": 0.754687488079071, |
|
"rewards/chosen": -0.11674080789089203, |
|
"rewards/margins": 0.8006342649459839, |
|
"rewards/rejected": -0.9173750877380371, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.2862610026789124e-08, |
|
"logits/chosen": -2.368887186050415, |
|
"logits/rejected": -2.3095037937164307, |
|
"logps/chosen": -267.6027526855469, |
|
"logps/rejected": -227.1664276123047, |
|
"loss": 0.5355, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.12747621536254883, |
|
"rewards/margins": 0.7349743843078613, |
|
"rewards/rejected": -0.8624505996704102, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.0949100650593186e-08, |
|
"logits/chosen": -2.4295105934143066, |
|
"logits/rejected": -2.3712687492370605, |
|
"logps/chosen": -271.1334228515625, |
|
"logps/rejected": -226.98959350585938, |
|
"loss": 0.5366, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.14556117355823517, |
|
"rewards/margins": 0.7037054896354675, |
|
"rewards/rejected": -0.8492666482925415, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.903559127439724e-08, |
|
"logits/chosen": -2.404041290283203, |
|
"logits/rejected": -2.3408515453338623, |
|
"logps/chosen": -271.45184326171875, |
|
"logps/rejected": -231.26318359375, |
|
"loss": 0.5223, |
|
"rewards/accuracies": 0.754687488079071, |
|
"rewards/chosen": -0.15005668997764587, |
|
"rewards/margins": 0.7375911474227905, |
|
"rewards/rejected": -0.887647807598114, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.71220818982013e-08, |
|
"logits/chosen": -2.4113287925720215, |
|
"logits/rejected": -2.363337993621826, |
|
"logps/chosen": -279.56695556640625, |
|
"logps/rejected": -228.7524871826172, |
|
"loss": 0.5678, |
|
"rewards/accuracies": 0.6890624761581421, |
|
"rewards/chosen": -0.18398186564445496, |
|
"rewards/margins": 0.6596510410308838, |
|
"rewards/rejected": -0.8436328768730164, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.520857252200535e-08, |
|
"logits/chosen": -2.4288249015808105, |
|
"logits/rejected": -2.3564791679382324, |
|
"logps/chosen": -271.6515808105469, |
|
"logps/rejected": -229.5021514892578, |
|
"loss": 0.5407, |
|
"rewards/accuracies": 0.7171875238418579, |
|
"rewards/chosen": -0.18123161792755127, |
|
"rewards/margins": 0.7020525932312012, |
|
"rewards/rejected": -0.8832842111587524, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.3295063145809414e-08, |
|
"logits/chosen": -2.3590943813323975, |
|
"logits/rejected": -2.322199583053589, |
|
"logps/chosen": -273.1612854003906, |
|
"logps/rejected": -253.64633178710938, |
|
"loss": 0.5437, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.17031243443489075, |
|
"rewards/margins": 0.713148295879364, |
|
"rewards/rejected": -0.8834608197212219, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.138155376961347e-08, |
|
"logits/chosen": -2.3528659343719482, |
|
"logits/rejected": -2.3328776359558105, |
|
"logps/chosen": -256.59613037109375, |
|
"logps/rejected": -226.8491973876953, |
|
"loss": 0.5234, |
|
"rewards/accuracies": 0.723437488079071, |
|
"rewards/chosen": -0.12790945172309875, |
|
"rewards/margins": 0.7292603254318237, |
|
"rewards/rejected": -0.8571697473526001, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.9468044393417525e-08, |
|
"logits/chosen": -2.332599639892578, |
|
"logits/rejected": -2.328411340713501, |
|
"logps/chosen": -260.6733093261719, |
|
"logps/rejected": -226.01119995117188, |
|
"loss": 0.5406, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": -0.1711007058620453, |
|
"rewards/margins": 0.6720742583274841, |
|
"rewards/rejected": -0.8431750535964966, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.755453501722158e-08, |
|
"logits/chosen": -2.3848772048950195, |
|
"logits/rejected": -2.346205949783325, |
|
"logps/chosen": -268.3501281738281, |
|
"logps/rejected": -224.84347534179688, |
|
"loss": 0.5294, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.13995392620563507, |
|
"rewards/margins": 0.7762855887413025, |
|
"rewards/rejected": -0.9162395596504211, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.564102564102564e-08, |
|
"logits/chosen": -2.38297438621521, |
|
"logits/rejected": -2.3261475563049316, |
|
"logps/chosen": -265.07781982421875, |
|
"logps/rejected": -244.471923828125, |
|
"loss": 0.5524, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.13383716344833374, |
|
"rewards/margins": 0.6434152722358704, |
|
"rewards/rejected": -0.7772524952888489, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.3727516264829695e-08, |
|
"logits/chosen": -2.3448472023010254, |
|
"logits/rejected": -2.3202641010284424, |
|
"logps/chosen": -266.0987854003906, |
|
"logps/rejected": -228.6033172607422, |
|
"loss": 0.5201, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.1439387947320938, |
|
"rewards/margins": 0.7299402952194214, |
|
"rewards/rejected": -0.873879075050354, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.1814006888633754e-08, |
|
"logits/chosen": -2.355379104614258, |
|
"logits/rejected": -2.3448832035064697, |
|
"logps/chosen": -268.690185546875, |
|
"logps/rejected": -234.4865264892578, |
|
"loss": 0.5581, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.17621631920337677, |
|
"rewards/margins": 0.6596941351890564, |
|
"rewards/rejected": -0.835910439491272, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.990049751243781e-08, |
|
"logits/chosen": -2.355900287628174, |
|
"logits/rejected": -2.32261061668396, |
|
"logps/chosen": -264.06536865234375, |
|
"logps/rejected": -232.2172088623047, |
|
"loss": 0.5227, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.11503295600414276, |
|
"rewards/margins": 0.7390708327293396, |
|
"rewards/rejected": -0.8541038632392883, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.7986988136241865e-08, |
|
"logits/chosen": -2.380585193634033, |
|
"logits/rejected": -2.325172185897827, |
|
"logps/chosen": -271.6562805175781, |
|
"logps/rejected": -234.0508575439453, |
|
"loss": 0.5377, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": -0.16687723994255066, |
|
"rewards/margins": 0.6961434483528137, |
|
"rewards/rejected": -0.8630207180976868, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.6073478760045924e-08, |
|
"logits/chosen": -2.3646774291992188, |
|
"logits/rejected": -2.3574256896972656, |
|
"logps/chosen": -282.1201171875, |
|
"logps/rejected": -234.2088165283203, |
|
"loss": 0.5145, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.048953305929899216, |
|
"rewards/margins": 0.788312554359436, |
|
"rewards/rejected": -0.8372658491134644, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.4159969383849981e-08, |
|
"logits/chosen": -2.371241569519043, |
|
"logits/rejected": -2.355045795440674, |
|
"logps/chosen": -280.1076965332031, |
|
"logps/rejected": -234.8966522216797, |
|
"loss": 0.5564, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.15984012186527252, |
|
"rewards/margins": 0.6876562833786011, |
|
"rewards/rejected": -0.8474963903427124, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2246460007654037e-08, |
|
"logits/chosen": -2.360264778137207, |
|
"logits/rejected": -2.332968235015869, |
|
"logps/chosen": -278.0101013183594, |
|
"logps/rejected": -239.9487762451172, |
|
"loss": 0.5575, |
|
"rewards/accuracies": 0.714062511920929, |
|
"rewards/chosen": -0.13736246526241302, |
|
"rewards/margins": 0.6766383051872253, |
|
"rewards/rejected": -0.8140007853507996, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0332950631458094e-08, |
|
"logits/chosen": -2.400036334991455, |
|
"logits/rejected": -2.3746438026428223, |
|
"logps/chosen": -267.7570495605469, |
|
"logps/rejected": -229.16140747070312, |
|
"loss": 0.5313, |
|
"rewards/accuracies": 0.7484375238418579, |
|
"rewards/chosen": -0.12042717635631561, |
|
"rewards/margins": 0.7909914255142212, |
|
"rewards/rejected": -0.9114185571670532, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.419441255262151e-09, |
|
"logits/chosen": -2.3523342609405518, |
|
"logits/rejected": -2.3188953399658203, |
|
"logps/chosen": -260.3684387207031, |
|
"logps/rejected": -233.06326293945312, |
|
"loss": 0.5271, |
|
"rewards/accuracies": 0.7328125238418579, |
|
"rewards/chosen": -0.15820932388305664, |
|
"rewards/margins": 0.7365429997444153, |
|
"rewards/rejected": -0.8947523236274719, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.505931879066207e-09, |
|
"logits/chosen": -2.3432793617248535, |
|
"logits/rejected": -2.33192777633667, |
|
"logps/chosen": -278.02117919921875, |
|
"logps/rejected": -233.4646453857422, |
|
"loss": 0.5247, |
|
"rewards/accuracies": 0.739062488079071, |
|
"rewards/chosen": -0.08122755587100983, |
|
"rewards/margins": 0.7956343293190002, |
|
"rewards/rejected": -0.8768618702888489, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.592422502870264e-09, |
|
"logits/chosen": -2.4073646068573, |
|
"logits/rejected": -2.375094175338745, |
|
"logps/chosen": -280.04608154296875, |
|
"logps/rejected": -233.2005615234375, |
|
"loss": 0.5261, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.13482233881950378, |
|
"rewards/margins": 0.7431889772415161, |
|
"rewards/rejected": -0.8780113458633423, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.6789131266743202e-09, |
|
"logits/chosen": -2.374481439590454, |
|
"logits/rejected": -2.320697784423828, |
|
"logps/chosen": -255.5072784423828, |
|
"logps/rejected": -207.7611083984375, |
|
"loss": 0.5271, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.14782702922821045, |
|
"rewards/margins": 0.7393444180488586, |
|
"rewards/rejected": -0.8871713876724243, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 7.654037504783773e-10, |
|
"logits/chosen": -2.381277561187744, |
|
"logits/rejected": -2.313739061355591, |
|
"logps/chosen": -267.82568359375, |
|
"logps/rejected": -234.2742156982422, |
|
"loss": 0.5194, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.10935642570257187, |
|
"rewards/margins": 0.7639234662055969, |
|
"rewards/rejected": -0.873279869556427, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_logits/chosen": -2.0344715118408203, |
|
"eval_logits/rejected": -1.9804012775421143, |
|
"eval_logps/chosen": -265.97662353515625, |
|
"eval_logps/rejected": -232.47203063964844, |
|
"eval_loss": 0.5272051095962524, |
|
"eval_rewards/accuracies": 0.734000027179718, |
|
"eval_rewards/chosen": -0.1408846527338028, |
|
"eval_rewards/margins": 0.7409887909889221, |
|
"eval_rewards/rejected": -0.8818734884262085, |
|
"eval_runtime": 1090.2134, |
|
"eval_samples_per_second": 1.835, |
|
"eval_steps_per_second": 0.459, |
|
"step": 2904 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2904, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5639242924154626, |
|
"train_runtime": 165279.5111, |
|
"train_samples_per_second": 1.125, |
|
"train_steps_per_second": 0.018 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2904, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|