|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1911, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.6041666666666664e-09, |
|
"logits/chosen": -2.463043451309204, |
|
"logits/rejected": -2.288743019104004, |
|
"logps/chosen": -301.1433410644531, |
|
"logps/rejected": -128.25608825683594, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.6041666666666667e-08, |
|
"logits/chosen": -2.5560247898101807, |
|
"logits/rejected": -2.555253028869629, |
|
"logps/chosen": -286.1558837890625, |
|
"logps/rejected": -256.28131103515625, |
|
"loss": 0.697, |
|
"rewards/accuracies": 0.3333333432674408, |
|
"rewards/chosen": -0.0025797931011766195, |
|
"rewards/margins": -0.008782301098108292, |
|
"rewards/rejected": 0.006202507298439741, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -2.648404598236084, |
|
"logits/rejected": -2.6275510787963867, |
|
"logps/chosen": -316.64373779296875, |
|
"logps/rejected": -314.99212646484375, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.0072450353763997555, |
|
"rewards/margins": 0.0013799279695376754, |
|
"rewards/rejected": -0.008624963462352753, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.812499999999999e-08, |
|
"logits/chosen": -2.683773994445801, |
|
"logits/rejected": -2.550048828125, |
|
"logps/chosen": -306.83282470703125, |
|
"logps/rejected": -266.6906433105469, |
|
"loss": 0.6823, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.011949767358601093, |
|
"rewards/margins": 0.020141970366239548, |
|
"rewards/rejected": -0.008192205801606178, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.634948968887329, |
|
"logits/rejected": -2.5988709926605225, |
|
"logps/chosen": -265.98968505859375, |
|
"logps/rejected": -252.77835083007812, |
|
"loss": 0.6707, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.08117702603340149, |
|
"rewards/margins": 0.05083204060792923, |
|
"rewards/rejected": 0.03034498728811741, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3020833333333334e-07, |
|
"logits/chosen": -2.644986152648926, |
|
"logits/rejected": -2.589542865753174, |
|
"logps/chosen": -323.46881103515625, |
|
"logps/rejected": -284.9610900878906, |
|
"loss": 0.6542, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.2217252552509308, |
|
"rewards/margins": 0.12405480444431305, |
|
"rewards/rejected": 0.09767045080661774, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -2.599961280822754, |
|
"logits/rejected": -2.491401195526123, |
|
"logps/chosen": -305.62152099609375, |
|
"logps/rejected": -236.73916625976562, |
|
"loss": 0.6107, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.29835107922554016, |
|
"rewards/margins": 0.23799380660057068, |
|
"rewards/rejected": 0.06035725399851799, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8229166666666666e-07, |
|
"logits/chosen": -2.5403127670288086, |
|
"logits/rejected": -2.491334915161133, |
|
"logps/chosen": -281.8306884765625, |
|
"logps/rejected": -264.59130859375, |
|
"loss": 0.6257, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.37535926699638367, |
|
"rewards/margins": 0.173495814204216, |
|
"rewards/rejected": 0.20186343789100647, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.6033613681793213, |
|
"logits/rejected": -2.548762083053589, |
|
"logps/chosen": -270.06597900390625, |
|
"logps/rejected": -262.29693603515625, |
|
"loss": 0.6014, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.4671238958835602, |
|
"rewards/margins": 0.144499734044075, |
|
"rewards/rejected": 0.3226241171360016, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.3437499999999998e-07, |
|
"logits/chosen": -2.5922348499298096, |
|
"logits/rejected": -2.584137439727783, |
|
"logps/chosen": -278.80865478515625, |
|
"logps/rejected": -279.4120178222656, |
|
"loss": 0.5943, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.295165479183197, |
|
"rewards/margins": 0.24514034390449524, |
|
"rewards/rejected": 0.05002513527870178, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.604166666666667e-07, |
|
"logits/chosen": -2.5200092792510986, |
|
"logits/rejected": -2.4520034790039062, |
|
"logps/chosen": -240.3234405517578, |
|
"logps/rejected": -248.67428588867188, |
|
"loss": 0.5838, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.4579353332519531, |
|
"rewards/margins": 0.31523841619491577, |
|
"rewards/rejected": 0.14269694685935974, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.864583333333333e-07, |
|
"logits/chosen": -2.5587222576141357, |
|
"logits/rejected": -2.5158984661102295, |
|
"logps/chosen": -334.97784423828125, |
|
"logps/rejected": -290.39935302734375, |
|
"loss": 0.5736, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.39039498567581177, |
|
"rewards/margins": 0.49432238936424255, |
|
"rewards/rejected": -0.10392741858959198, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.5714714527130127, |
|
"logits/rejected": -2.503490924835205, |
|
"logps/chosen": -287.0711364746094, |
|
"logps/rejected": -279.7371520996094, |
|
"loss": 0.5872, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.7893859148025513, |
|
"rewards/margins": 0.6542637944221497, |
|
"rewards/rejected": 0.13512210547924042, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.3854166666666667e-07, |
|
"logits/chosen": -2.6581666469573975, |
|
"logits/rejected": -2.558701753616333, |
|
"logps/chosen": -318.14178466796875, |
|
"logps/rejected": -277.52838134765625, |
|
"loss": 0.5618, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.3024521768093109, |
|
"rewards/margins": 0.6436794400215149, |
|
"rewards/rejected": -0.3412272334098816, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.645833333333333e-07, |
|
"logits/chosen": -2.6539013385772705, |
|
"logits/rejected": -2.6091341972351074, |
|
"logps/chosen": -307.194091796875, |
|
"logps/rejected": -285.154541015625, |
|
"loss": 0.5479, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.1616998016834259, |
|
"rewards/margins": 0.4194749891757965, |
|
"rewards/rejected": -0.5811747908592224, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.9062499999999997e-07, |
|
"logits/chosen": -2.4873158931732178, |
|
"logits/rejected": -2.504676342010498, |
|
"logps/chosen": -300.03448486328125, |
|
"logps/rejected": -318.03668212890625, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.03602635860443115, |
|
"rewards/margins": -0.06406474858522415, |
|
"rewards/rejected": 0.028038373216986656, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.6210286617279053, |
|
"logits/rejected": -2.6042556762695312, |
|
"logps/chosen": -286.2755432128906, |
|
"logps/rejected": -299.8374328613281, |
|
"loss": 0.5376, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.1890409290790558, |
|
"rewards/margins": 0.8602690696716309, |
|
"rewards/rejected": -0.6712281703948975, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.427083333333333e-07, |
|
"logits/chosen": -2.663649320602417, |
|
"logits/rejected": -2.611992835998535, |
|
"logps/chosen": -312.26580810546875, |
|
"logps/rejected": -271.50897216796875, |
|
"loss": 0.5935, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.08057786524295807, |
|
"rewards/margins": 0.7988370656967163, |
|
"rewards/rejected": -0.8794149160385132, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -2.6343648433685303, |
|
"logits/rejected": -2.5704476833343506, |
|
"logps/chosen": -338.59124755859375, |
|
"logps/rejected": -286.7225341796875, |
|
"loss": 0.5162, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.006776231341063976, |
|
"rewards/margins": 0.9450720548629761, |
|
"rewards/rejected": -0.9518482089042664, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.947916666666667e-07, |
|
"logits/chosen": -2.6279683113098145, |
|
"logits/rejected": -2.5350871086120605, |
|
"logps/chosen": -288.70184326171875, |
|
"logps/rejected": -262.1229248046875, |
|
"loss": 0.6639, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.2994307279586792, |
|
"rewards/margins": 0.9815131425857544, |
|
"rewards/rejected": -0.6820824146270752, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.976730657358929e-07, |
|
"logits/chosen": -2.5379586219787598, |
|
"logits/rejected": -2.49354887008667, |
|
"logps/chosen": -253.19580078125, |
|
"logps/rejected": -223.9005584716797, |
|
"loss": 0.8012, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.20366668701171875, |
|
"rewards/margins": -0.2523919641971588, |
|
"rewards/rejected": 0.04872531443834305, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.947643979057592e-07, |
|
"logits/chosen": -2.543163299560547, |
|
"logits/rejected": -2.5506227016448975, |
|
"logps/chosen": -285.0458984375, |
|
"logps/rejected": -312.3334045410156, |
|
"loss": 0.5666, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.23225872218608856, |
|
"rewards/margins": 0.626153826713562, |
|
"rewards/rejected": -0.39389508962631226, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.918557300756254e-07, |
|
"logits/chosen": -2.4932289123535156, |
|
"logits/rejected": -2.464277744293213, |
|
"logps/chosen": -292.6415100097656, |
|
"logps/rejected": -273.45648193359375, |
|
"loss": 0.5932, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.0871698409318924, |
|
"rewards/margins": 0.7229372262954712, |
|
"rewards/rejected": -0.6357674598693848, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.889470622454916e-07, |
|
"logits/chosen": -2.579195261001587, |
|
"logits/rejected": -2.464480400085449, |
|
"logps/chosen": -310.99090576171875, |
|
"logps/rejected": -287.1470031738281, |
|
"loss": 0.5378, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.05043240636587143, |
|
"rewards/margins": 0.7342410087585449, |
|
"rewards/rejected": -0.6838085651397705, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.860383944153578e-07, |
|
"logits/chosen": -2.4707000255584717, |
|
"logits/rejected": -2.3995609283447266, |
|
"logps/chosen": -292.8463439941406, |
|
"logps/rejected": -262.3541259765625, |
|
"loss": 0.5575, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.05063791200518608, |
|
"rewards/margins": 0.8500161170959473, |
|
"rewards/rejected": -0.9006540179252625, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.83129726585224e-07, |
|
"logits/chosen": -2.4374887943267822, |
|
"logits/rejected": -2.4220387935638428, |
|
"logps/chosen": -266.37774658203125, |
|
"logps/rejected": -283.33978271484375, |
|
"loss": 0.6727, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.49987784028053284, |
|
"rewards/margins": 0.5630615949630737, |
|
"rewards/rejected": -1.0629395246505737, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.802210587550902e-07, |
|
"logits/chosen": -2.405151844024658, |
|
"logits/rejected": -2.373051643371582, |
|
"logps/chosen": -323.4302062988281, |
|
"logps/rejected": -306.20367431640625, |
|
"loss": 0.7053, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.31794676184654236, |
|
"rewards/margins": 0.897225558757782, |
|
"rewards/rejected": -1.215172290802002, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.773123909249563e-07, |
|
"logits/chosen": -2.432044506072998, |
|
"logits/rejected": -2.363980770111084, |
|
"logps/chosen": -278.9121398925781, |
|
"logps/rejected": -302.1050109863281, |
|
"loss": 0.6258, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.047296181321144104, |
|
"rewards/margins": 0.663731038570404, |
|
"rewards/rejected": -0.711027204990387, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7440372309482255e-07, |
|
"logits/chosen": -2.437777042388916, |
|
"logits/rejected": -2.3604378700256348, |
|
"logps/chosen": -301.4665222167969, |
|
"logps/rejected": -265.2036437988281, |
|
"loss": 0.7178, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.1992354393005371, |
|
"rewards/margins": 0.936697781085968, |
|
"rewards/rejected": -1.13593327999115, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7149505526468876e-07, |
|
"logits/chosen": -2.397371530532837, |
|
"logits/rejected": -2.3492469787597656, |
|
"logps/chosen": -296.2301940917969, |
|
"logps/rejected": -297.2955322265625, |
|
"loss": 0.5162, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.45999306440353394, |
|
"rewards/margins": 0.6742507815361023, |
|
"rewards/rejected": -0.21425779163837433, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.6858638743455497e-07, |
|
"logits/chosen": -2.4259443283081055, |
|
"logits/rejected": -2.3763365745544434, |
|
"logps/chosen": -349.0486755371094, |
|
"logps/rejected": -336.35894775390625, |
|
"loss": 0.5774, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.4317784905433655, |
|
"rewards/margins": 0.9294607043266296, |
|
"rewards/rejected": -0.49768227338790894, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.656777196044212e-07, |
|
"logits/chosen": -2.4149937629699707, |
|
"logits/rejected": -2.299372911453247, |
|
"logps/chosen": -296.93731689453125, |
|
"logps/rejected": -269.88836669921875, |
|
"loss": 0.614, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.13364040851593018, |
|
"rewards/margins": 1.1540381908416748, |
|
"rewards/rejected": -1.020397663116455, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.627690517742874e-07, |
|
"logits/chosen": -2.3246355056762695, |
|
"logits/rejected": -2.31697940826416, |
|
"logps/chosen": -246.38571166992188, |
|
"logps/rejected": -243.2635955810547, |
|
"loss": 0.5455, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.3853219151496887, |
|
"rewards/margins": 1.1966570615768433, |
|
"rewards/rejected": -0.8113352060317993, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.5986038394415354e-07, |
|
"logits/chosen": -2.377342939376831, |
|
"logits/rejected": -2.266279697418213, |
|
"logps/chosen": -291.89923095703125, |
|
"logps/rejected": -252.13156127929688, |
|
"loss": 0.7187, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.3700163662433624, |
|
"rewards/margins": 1.021303415298462, |
|
"rewards/rejected": -1.3913196325302124, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.569517161140198e-07, |
|
"logits/chosen": -2.4199347496032715, |
|
"logits/rejected": -2.4432175159454346, |
|
"logps/chosen": -294.2767333984375, |
|
"logps/rejected": -317.6234130859375, |
|
"loss": 0.5903, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.04284637048840523, |
|
"rewards/margins": 0.6986137628555298, |
|
"rewards/rejected": -0.6557673811912537, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.5404304828388595e-07, |
|
"logits/chosen": -2.488062858581543, |
|
"logits/rejected": -2.420581340789795, |
|
"logps/chosen": -316.81109619140625, |
|
"logps/rejected": -355.86297607421875, |
|
"loss": 0.5148, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.16860897839069366, |
|
"rewards/margins": 1.251904010772705, |
|
"rewards/rejected": -1.4205129146575928, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.5113438045375216e-07, |
|
"logits/chosen": -2.372753858566284, |
|
"logits/rejected": -2.3147406578063965, |
|
"logps/chosen": -287.305419921875, |
|
"logps/rejected": -256.7542724609375, |
|
"loss": 0.526, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.243671253323555, |
|
"rewards/margins": 0.9904806017875671, |
|
"rewards/rejected": -0.7468093633651733, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.4822571262361837e-07, |
|
"logits/chosen": -2.446676731109619, |
|
"logits/rejected": -2.414977550506592, |
|
"logps/chosen": -242.71127319335938, |
|
"logps/rejected": -246.08462524414062, |
|
"loss": 0.6242, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.48434001207351685, |
|
"rewards/margins": 0.3427623510360718, |
|
"rewards/rejected": -0.8271023631095886, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.453170447934846e-07, |
|
"logits/chosen": -2.3129241466522217, |
|
"logits/rejected": -2.2660763263702393, |
|
"logps/chosen": -325.49517822265625, |
|
"logps/rejected": -257.01812744140625, |
|
"loss": 0.6439, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7354428768157959, |
|
"rewards/margins": 0.5178254842758179, |
|
"rewards/rejected": -1.2532682418823242, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.424083769633508e-07, |
|
"logits/chosen": -2.407226085662842, |
|
"logits/rejected": -2.3878862857818604, |
|
"logps/chosen": -321.883056640625, |
|
"logps/rejected": -329.2529296875, |
|
"loss": 0.6696, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6276373863220215, |
|
"rewards/margins": 0.8156329989433289, |
|
"rewards/rejected": -1.4432705640792847, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.39499709133217e-07, |
|
"logits/chosen": -2.5650746822357178, |
|
"logits/rejected": -2.5109097957611084, |
|
"logps/chosen": -315.965576171875, |
|
"logps/rejected": -329.1568298339844, |
|
"loss": 0.5373, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.22043700516223907, |
|
"rewards/margins": 0.7228590250015259, |
|
"rewards/rejected": -0.9432960748672485, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.3659104130308314e-07, |
|
"logits/chosen": -2.3766233921051025, |
|
"logits/rejected": -2.342545747756958, |
|
"logps/chosen": -270.418701171875, |
|
"logps/rejected": -277.3287048339844, |
|
"loss": 0.4702, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.33475637435913086, |
|
"rewards/margins": 1.0519344806671143, |
|
"rewards/rejected": -1.3866908550262451, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.336823734729494e-07, |
|
"logits/chosen": -2.5363078117370605, |
|
"logits/rejected": -2.420921802520752, |
|
"logps/chosen": -339.0617370605469, |
|
"logps/rejected": -314.2167663574219, |
|
"loss": 0.5078, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.20719392597675323, |
|
"rewards/margins": 1.039120078086853, |
|
"rewards/rejected": -1.2463139295578003, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.3077370564281556e-07, |
|
"logits/chosen": -2.4507744312286377, |
|
"logits/rejected": -2.3617775440216064, |
|
"logps/chosen": -305.3289794921875, |
|
"logps/rejected": -270.58843994140625, |
|
"loss": 0.5647, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5011049509048462, |
|
"rewards/margins": 0.8858562707901001, |
|
"rewards/rejected": -1.3869613409042358, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.278650378126818e-07, |
|
"logits/chosen": -2.327728033065796, |
|
"logits/rejected": -2.2920546531677246, |
|
"logps/chosen": -289.3099365234375, |
|
"logps/rejected": -333.728271484375, |
|
"loss": 0.5112, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.1836535632610321, |
|
"rewards/margins": 1.081466794013977, |
|
"rewards/rejected": -0.8978131413459778, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.24956369982548e-07, |
|
"logits/chosen": -2.4250426292419434, |
|
"logits/rejected": -2.3737356662750244, |
|
"logps/chosen": -308.11236572265625, |
|
"logps/rejected": -295.8037109375, |
|
"loss": 0.5505, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.22618088126182556, |
|
"rewards/margins": 0.990483283996582, |
|
"rewards/rejected": -0.7643024325370789, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.220477021524142e-07, |
|
"logits/chosen": -2.4373867511749268, |
|
"logits/rejected": -2.400118112564087, |
|
"logps/chosen": -290.0523681640625, |
|
"logps/rejected": -255.9022674560547, |
|
"loss": 0.5815, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.12436334788799286, |
|
"rewards/margins": 0.7458918690681458, |
|
"rewards/rejected": -0.6215284466743469, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.191390343222804e-07, |
|
"logits/chosen": -2.506086826324463, |
|
"logits/rejected": -2.4103667736053467, |
|
"logps/chosen": -305.6959533691406, |
|
"logps/rejected": -305.922607421875, |
|
"loss": 0.505, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.27497127652168274, |
|
"rewards/margins": 1.060005784034729, |
|
"rewards/rejected": -0.7850344181060791, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.162303664921466e-07, |
|
"logits/chosen": -2.460691213607788, |
|
"logits/rejected": -2.3934152126312256, |
|
"logps/chosen": -310.1314697265625, |
|
"logps/rejected": -297.3235168457031, |
|
"loss": 0.5853, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.14773890376091003, |
|
"rewards/margins": 0.7825302481651306, |
|
"rewards/rejected": -0.6347913146018982, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.1332169866201275e-07, |
|
"logits/chosen": -2.4345953464508057, |
|
"logits/rejected": -2.4074747562408447, |
|
"logps/chosen": -290.62579345703125, |
|
"logps/rejected": -277.2015380859375, |
|
"loss": 0.5713, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.5942241549491882, |
|
"rewards/margins": 0.8701769113540649, |
|
"rewards/rejected": -0.2759527266025543, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.10413030831879e-07, |
|
"logits/chosen": -2.491701602935791, |
|
"logits/rejected": -2.3788347244262695, |
|
"logps/chosen": -327.69097900390625, |
|
"logps/rejected": -256.1454162597656, |
|
"loss": 0.5509, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.3082984387874603, |
|
"rewards/margins": 0.7658659219741821, |
|
"rewards/rejected": -0.4575675129890442, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.4869401454925537, |
|
"eval_logits/rejected": -2.4248592853546143, |
|
"eval_logps/chosen": -292.5533447265625, |
|
"eval_logps/rejected": -277.5867614746094, |
|
"eval_loss": 0.6311370134353638, |
|
"eval_rewards/accuracies": 0.761904776096344, |
|
"eval_rewards/chosen": 0.8113744854927063, |
|
"eval_rewards/margins": 0.9294369220733643, |
|
"eval_rewards/rejected": -0.11806251108646393, |
|
"eval_runtime": 615.8679, |
|
"eval_samples_per_second": 3.247, |
|
"eval_steps_per_second": 0.102, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0750436300174517e-07, |
|
"logits/chosen": -2.397420883178711, |
|
"logits/rejected": -2.372736692428589, |
|
"logps/chosen": -287.88897705078125, |
|
"logps/rejected": -266.78594970703125, |
|
"loss": 0.5289, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.6826907396316528, |
|
"rewards/margins": 1.225263237953186, |
|
"rewards/rejected": -0.542572557926178, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0459569517161143e-07, |
|
"logits/chosen": -2.448612928390503, |
|
"logits/rejected": -2.335453748703003, |
|
"logps/chosen": -289.3345947265625, |
|
"logps/rejected": -280.9012145996094, |
|
"loss": 0.9872, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.408169686794281, |
|
"rewards/margins": 1.0372252464294434, |
|
"rewards/rejected": -0.6290556192398071, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.016870273414776e-07, |
|
"logits/chosen": -2.4662892818450928, |
|
"logits/rejected": -2.425401449203491, |
|
"logps/chosen": -260.6138000488281, |
|
"logps/rejected": -284.0682373046875, |
|
"loss": 0.5718, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.2825961709022522, |
|
"rewards/margins": 1.0750539302825928, |
|
"rewards/rejected": -0.7924576997756958, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.987783595113438e-07, |
|
"logits/chosen": -2.498586416244507, |
|
"logits/rejected": -2.421823501586914, |
|
"logps/chosen": -339.8152160644531, |
|
"logps/rejected": -266.1904602050781, |
|
"loss": 0.5823, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.5165579915046692, |
|
"rewards/margins": 1.7178938388824463, |
|
"rewards/rejected": -1.2013359069824219, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.9586969168121e-07, |
|
"logits/chosen": -2.491562604904175, |
|
"logits/rejected": -2.4570322036743164, |
|
"logps/chosen": -348.4375, |
|
"logps/rejected": -306.64697265625, |
|
"loss": 0.5497, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.10518566519021988, |
|
"rewards/margins": 0.8593299984931946, |
|
"rewards/rejected": -0.7541443705558777, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.929610238510762e-07, |
|
"logits/chosen": -2.3990979194641113, |
|
"logits/rejected": -2.39212965965271, |
|
"logps/chosen": -289.9490661621094, |
|
"logps/rejected": -269.9928894042969, |
|
"loss": 0.6319, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.04818814992904663, |
|
"rewards/margins": 0.6179074048995972, |
|
"rewards/rejected": -0.569719135761261, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.900523560209424e-07, |
|
"logits/chosen": -2.4815762042999268, |
|
"logits/rejected": -2.425726890563965, |
|
"logps/chosen": -312.6693420410156, |
|
"logps/rejected": -273.46728515625, |
|
"loss": 0.5107, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.3141094148159027, |
|
"rewards/margins": 1.199885606765747, |
|
"rewards/rejected": -0.8857762217521667, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.871436881908086e-07, |
|
"logits/chosen": -2.5039753913879395, |
|
"logits/rejected": -2.414092779159546, |
|
"logps/chosen": -284.1502990722656, |
|
"logps/rejected": -240.26083374023438, |
|
"loss": 0.523, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.05462036281824112, |
|
"rewards/margins": 0.9068329930305481, |
|
"rewards/rejected": -0.8522126078605652, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.842350203606748e-07, |
|
"logits/chosen": -2.487733840942383, |
|
"logits/rejected": -2.4456124305725098, |
|
"logps/chosen": -264.53790283203125, |
|
"logps/rejected": -287.30718994140625, |
|
"loss": 0.5847, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.48793086409568787, |
|
"rewards/margins": 1.0478885173797607, |
|
"rewards/rejected": -1.535819411277771, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8132635253054103e-07, |
|
"logits/chosen": -2.4525985717773438, |
|
"logits/rejected": -2.419036865234375, |
|
"logps/chosen": -292.17840576171875, |
|
"logps/rejected": -305.83258056640625, |
|
"loss": 0.5466, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.03021824359893799, |
|
"rewards/margins": 1.0404767990112305, |
|
"rewards/rejected": -1.070695161819458, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.784176847004072e-07, |
|
"logits/chosen": -2.494528293609619, |
|
"logits/rejected": -2.455793857574463, |
|
"logps/chosen": -276.6438903808594, |
|
"logps/rejected": -284.5893249511719, |
|
"loss": 0.4859, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.11220204830169678, |
|
"rewards/margins": 1.3604462146759033, |
|
"rewards/rejected": -1.2482441663742065, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.755090168702734e-07, |
|
"logits/chosen": -2.4499683380126953, |
|
"logits/rejected": -2.4269161224365234, |
|
"logps/chosen": -319.4291076660156, |
|
"logps/rejected": -330.466552734375, |
|
"loss": 0.5209, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1047038584947586, |
|
"rewards/margins": 1.4205875396728516, |
|
"rewards/rejected": -1.5252914428710938, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.726003490401396e-07, |
|
"logits/chosen": -2.4178521633148193, |
|
"logits/rejected": -2.3652777671813965, |
|
"logps/chosen": -301.78399658203125, |
|
"logps/rejected": -255.4466094970703, |
|
"loss": 0.6127, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.73511803150177, |
|
"rewards/margins": 0.9625850915908813, |
|
"rewards/rejected": -1.6977031230926514, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.696916812100058e-07, |
|
"logits/chosen": -2.414977788925171, |
|
"logits/rejected": -2.2954540252685547, |
|
"logps/chosen": -309.4778747558594, |
|
"logps/rejected": -232.9076385498047, |
|
"loss": 0.563, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.36078378558158875, |
|
"rewards/margins": 1.096639633178711, |
|
"rewards/rejected": -1.457423448562622, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.66783013379872e-07, |
|
"logits/chosen": -2.4305996894836426, |
|
"logits/rejected": -2.3547043800354004, |
|
"logps/chosen": -303.08221435546875, |
|
"logps/rejected": -249.44003295898438, |
|
"loss": 0.5194, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.32232993841171265, |
|
"rewards/margins": 1.2769782543182373, |
|
"rewards/rejected": -1.5993082523345947, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.6387434554973823e-07, |
|
"logits/chosen": -2.4183077812194824, |
|
"logits/rejected": -2.3758342266082764, |
|
"logps/chosen": -326.63739013671875, |
|
"logps/rejected": -319.60357666015625, |
|
"loss": 0.5133, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7344697117805481, |
|
"rewards/margins": 1.0054800510406494, |
|
"rewards/rejected": -1.7399498224258423, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.609656777196044e-07, |
|
"logits/chosen": -2.4492297172546387, |
|
"logits/rejected": -2.3460309505462646, |
|
"logps/chosen": -334.344970703125, |
|
"logps/rejected": -272.79498291015625, |
|
"loss": 0.5318, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6380278468132019, |
|
"rewards/margins": 1.2398582696914673, |
|
"rewards/rejected": -1.877886414527893, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.5805700988947064e-07, |
|
"logits/chosen": -2.37317156791687, |
|
"logits/rejected": -2.287410259246826, |
|
"logps/chosen": -259.41107177734375, |
|
"logps/rejected": -246.9707794189453, |
|
"loss": 0.6405, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.004200550727546215, |
|
"rewards/margins": 1.685315728187561, |
|
"rewards/rejected": -1.689516305923462, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.551483420593368e-07, |
|
"logits/chosen": -2.424691677093506, |
|
"logits/rejected": -2.3706610202789307, |
|
"logps/chosen": -337.8548889160156, |
|
"logps/rejected": -300.6462707519531, |
|
"loss": 0.4984, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.4729352593421936, |
|
"rewards/margins": 1.0950307846069336, |
|
"rewards/rejected": -1.567966103553772, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.5223967422920306e-07, |
|
"logits/chosen": -2.4483962059020996, |
|
"logits/rejected": -2.3456740379333496, |
|
"logps/chosen": -355.38446044921875, |
|
"logps/rejected": -313.3027038574219, |
|
"loss": 0.7153, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5108211636543274, |
|
"rewards/margins": 1.1368777751922607, |
|
"rewards/rejected": -1.647698998451233, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.493310063990692e-07, |
|
"logits/chosen": -2.4961283206939697, |
|
"logits/rejected": -2.4019272327423096, |
|
"logps/chosen": -258.533447265625, |
|
"logps/rejected": -235.88912963867188, |
|
"loss": 0.6123, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2880368232727051, |
|
"rewards/margins": 0.940460205078125, |
|
"rewards/rejected": -1.22849702835083, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.464223385689354e-07, |
|
"logits/chosen": -2.4469172954559326, |
|
"logits/rejected": -2.442150354385376, |
|
"logps/chosen": -263.5059814453125, |
|
"logps/rejected": -278.46112060546875, |
|
"loss": 0.5192, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.03521687909960747, |
|
"rewards/margins": 1.297978162765503, |
|
"rewards/rejected": -1.2627613544464111, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.4351367073880163e-07, |
|
"logits/chosen": -2.567368745803833, |
|
"logits/rejected": -2.5503649711608887, |
|
"logps/chosen": -263.9495544433594, |
|
"logps/rejected": -274.8172302246094, |
|
"loss": 0.4936, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.18824487924575806, |
|
"rewards/margins": 1.1973415613174438, |
|
"rewards/rejected": -1.3855865001678467, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.4060500290866783e-07, |
|
"logits/chosen": -2.5890707969665527, |
|
"logits/rejected": -2.497729539871216, |
|
"logps/chosen": -357.546875, |
|
"logps/rejected": -316.70361328125, |
|
"loss": 0.4672, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.6199100017547607, |
|
"rewards/margins": 1.5381364822387695, |
|
"rewards/rejected": -2.158046245574951, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.37696335078534e-07, |
|
"logits/chosen": -2.6034646034240723, |
|
"logits/rejected": -2.5190954208374023, |
|
"logps/chosen": -307.06573486328125, |
|
"logps/rejected": -264.7110900878906, |
|
"loss": 0.5857, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.9259228706359863, |
|
"rewards/margins": 1.1956923007965088, |
|
"rewards/rejected": -2.121615409851074, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3478766724840025e-07, |
|
"logits/chosen": -2.5817999839782715, |
|
"logits/rejected": -2.532043695449829, |
|
"logps/chosen": -291.2391662597656, |
|
"logps/rejected": -286.722900390625, |
|
"loss": 0.4794, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.822766900062561, |
|
"rewards/margins": 1.2887961864471436, |
|
"rewards/rejected": -2.111563205718994, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.318789994182664e-07, |
|
"logits/chosen": -2.596311092376709, |
|
"logits/rejected": -2.5423972606658936, |
|
"logps/chosen": -271.95330810546875, |
|
"logps/rejected": -298.80975341796875, |
|
"loss": 0.5103, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.4673972725868225, |
|
"rewards/margins": 1.1896220445632935, |
|
"rewards/rejected": -1.6570192575454712, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.2897033158813266e-07, |
|
"logits/chosen": -2.5218849182128906, |
|
"logits/rejected": -2.5217957496643066, |
|
"logps/chosen": -309.8044738769531, |
|
"logps/rejected": -282.1851806640625, |
|
"loss": 0.584, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8506789207458496, |
|
"rewards/margins": 0.7243373990058899, |
|
"rewards/rejected": -1.5750162601470947, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.260616637579988e-07, |
|
"logits/chosen": -2.621936082839966, |
|
"logits/rejected": -2.5529212951660156, |
|
"logps/chosen": -321.58624267578125, |
|
"logps/rejected": -306.6531677246094, |
|
"loss": 0.5215, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.5436080694198608, |
|
"rewards/margins": 1.283302903175354, |
|
"rewards/rejected": -1.8269107341766357, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.2315299592786503e-07, |
|
"logits/chosen": -2.537576913833618, |
|
"logits/rejected": -2.497835636138916, |
|
"logps/chosen": -290.63714599609375, |
|
"logps/rejected": -284.85650634765625, |
|
"loss": 0.5535, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6990116834640503, |
|
"rewards/margins": 1.495788812637329, |
|
"rewards/rejected": -2.194800615310669, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.2024432809773123e-07, |
|
"logits/chosen": -2.486382246017456, |
|
"logits/rejected": -2.403653860092163, |
|
"logps/chosen": -286.6750183105469, |
|
"logps/rejected": -267.35723876953125, |
|
"loss": 0.5029, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0650520324707031, |
|
"rewards/margins": 1.3269193172454834, |
|
"rewards/rejected": -2.3919713497161865, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.1733566026759744e-07, |
|
"logits/chosen": -2.5125479698181152, |
|
"logits/rejected": -2.461850643157959, |
|
"logps/chosen": -289.32379150390625, |
|
"logps/rejected": -280.98785400390625, |
|
"loss": 0.4962, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6875102519989014, |
|
"rewards/margins": 1.0723689794540405, |
|
"rewards/rejected": -1.7598793506622314, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.144269924374636e-07, |
|
"logits/chosen": -2.495903491973877, |
|
"logits/rejected": -2.4473767280578613, |
|
"logps/chosen": -277.972900390625, |
|
"logps/rejected": -305.8498840332031, |
|
"loss": 0.5665, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6580140590667725, |
|
"rewards/margins": 0.9823113679885864, |
|
"rewards/rejected": -1.6403253078460693, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.1151832460732986e-07, |
|
"logits/chosen": -2.4640355110168457, |
|
"logits/rejected": -2.3490617275238037, |
|
"logps/chosen": -321.24688720703125, |
|
"logps/rejected": -300.0312805175781, |
|
"loss": 0.7062, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.7375732660293579, |
|
"rewards/margins": 1.0734766721725464, |
|
"rewards/rejected": -1.8110501766204834, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.08609656777196e-07, |
|
"logits/chosen": -2.4589717388153076, |
|
"logits/rejected": -2.4391770362854004, |
|
"logps/chosen": -276.55450439453125, |
|
"logps/rejected": -291.0248107910156, |
|
"loss": 0.5562, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.9284421801567078, |
|
"rewards/margins": 1.0043559074401855, |
|
"rewards/rejected": -1.9327980279922485, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.0570098894706227e-07, |
|
"logits/chosen": -2.4961347579956055, |
|
"logits/rejected": -2.4345571994781494, |
|
"logps/chosen": -305.3291015625, |
|
"logps/rejected": -354.2350769042969, |
|
"loss": 0.5703, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.7021960616111755, |
|
"rewards/margins": 1.1666209697723389, |
|
"rewards/rejected": -1.8688170909881592, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.0279232111692843e-07, |
|
"logits/chosen": -2.366995096206665, |
|
"logits/rejected": -2.3412668704986572, |
|
"logps/chosen": -269.6289978027344, |
|
"logps/rejected": -294.22308349609375, |
|
"loss": 0.5518, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.0025119720958173275, |
|
"rewards/margins": 1.3077478408813477, |
|
"rewards/rejected": -1.3102598190307617, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.9988365328679463e-07, |
|
"logits/chosen": -2.5405781269073486, |
|
"logits/rejected": -2.5027382373809814, |
|
"logps/chosen": -310.4794006347656, |
|
"logps/rejected": -311.45599365234375, |
|
"loss": 0.4918, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.14495661854743958, |
|
"rewards/margins": 1.4426281452178955, |
|
"rewards/rejected": -1.5875847339630127, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9697498545666084e-07, |
|
"logits/chosen": -2.406571388244629, |
|
"logits/rejected": -2.3264360427856445, |
|
"logps/chosen": -347.81866455078125, |
|
"logps/rejected": -290.57080078125, |
|
"loss": 0.5883, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.660466730594635, |
|
"rewards/margins": 1.2641503810882568, |
|
"rewards/rejected": -1.924617052078247, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9406631762652705e-07, |
|
"logits/chosen": -2.4709229469299316, |
|
"logits/rejected": -2.382078170776367, |
|
"logps/chosen": -340.4996032714844, |
|
"logps/rejected": -315.2070617675781, |
|
"loss": 0.515, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.6704212427139282, |
|
"rewards/margins": 1.4002532958984375, |
|
"rewards/rejected": -2.0706748962402344, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.9115764979639326e-07, |
|
"logits/chosen": -2.506743907928467, |
|
"logits/rejected": -2.4725024700164795, |
|
"logps/chosen": -340.2807922363281, |
|
"logps/rejected": -301.91082763671875, |
|
"loss": 0.4824, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.5034831762313843, |
|
"rewards/margins": 1.6047446727752686, |
|
"rewards/rejected": -2.1082279682159424, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.8824898196625947e-07, |
|
"logits/chosen": -2.446906805038452, |
|
"logits/rejected": -2.392117977142334, |
|
"logps/chosen": -301.62506103515625, |
|
"logps/rejected": -307.46258544921875, |
|
"loss": 0.4731, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.13553249835968018, |
|
"rewards/margins": 1.2748148441314697, |
|
"rewards/rejected": -1.4103472232818604, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.853403141361256e-07, |
|
"logits/chosen": -2.418687343597412, |
|
"logits/rejected": -2.3500733375549316, |
|
"logps/chosen": -301.64056396484375, |
|
"logps/rejected": -261.1069030761719, |
|
"loss": 0.5395, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.5969820022583008, |
|
"rewards/margins": 0.8396062850952148, |
|
"rewards/rejected": -1.4365884065628052, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.824316463059919e-07, |
|
"logits/chosen": -2.4292044639587402, |
|
"logits/rejected": -2.3925364017486572, |
|
"logps/chosen": -310.83258056640625, |
|
"logps/rejected": -304.67486572265625, |
|
"loss": 0.484, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4251963198184967, |
|
"rewards/margins": 1.4573180675506592, |
|
"rewards/rejected": -1.8825145959854126, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.7952297847585803e-07, |
|
"logits/chosen": -2.417309284210205, |
|
"logits/rejected": -2.3521695137023926, |
|
"logps/chosen": -278.1476745605469, |
|
"logps/rejected": -281.66656494140625, |
|
"loss": 0.5149, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.28509074449539185, |
|
"rewards/margins": 1.083017110824585, |
|
"rewards/rejected": -1.3681080341339111, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.766143106457243e-07, |
|
"logits/chosen": -2.3309614658355713, |
|
"logits/rejected": -2.3364787101745605, |
|
"logps/chosen": -259.80975341796875, |
|
"logps/rejected": -250.3785400390625, |
|
"loss": 0.5875, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.18767808377742767, |
|
"rewards/margins": 1.1901742219924927, |
|
"rewards/rejected": -1.377852201461792, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.7370564281559045e-07, |
|
"logits/chosen": -2.5228657722473145, |
|
"logits/rejected": -2.431464672088623, |
|
"logps/chosen": -346.1999816894531, |
|
"logps/rejected": -306.3692932128906, |
|
"loss": 0.5132, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.02729633077979088, |
|
"rewards/margins": 1.0325729846954346, |
|
"rewards/rejected": -1.0598691701889038, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.7079697498545666e-07, |
|
"logits/chosen": -2.458400249481201, |
|
"logits/rejected": -2.4137959480285645, |
|
"logps/chosen": -322.91864013671875, |
|
"logps/rejected": -321.85699462890625, |
|
"loss": 0.477, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.24051785469055176, |
|
"rewards/margins": 1.2779700756072998, |
|
"rewards/rejected": -1.5184879302978516, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6788830715532287e-07, |
|
"logits/chosen": -2.5085196495056152, |
|
"logits/rejected": -2.441279888153076, |
|
"logps/chosen": -266.5360412597656, |
|
"logps/rejected": -265.48980712890625, |
|
"loss": 0.58, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.10649283230304718, |
|
"rewards/margins": 1.1308716535568237, |
|
"rewards/rejected": -1.2373645305633545, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6497963932518907e-07, |
|
"logits/chosen": -2.3860020637512207, |
|
"logits/rejected": -2.3790476322174072, |
|
"logps/chosen": -304.46270751953125, |
|
"logps/rejected": -309.70697021484375, |
|
"loss": 0.6191, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.43433743715286255, |
|
"rewards/margins": 0.8789669275283813, |
|
"rewards/rejected": -1.3133043050765991, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.490722894668579, |
|
"eval_logits/rejected": -2.427964210510254, |
|
"eval_logps/chosen": -300.2858581542969, |
|
"eval_logps/rejected": -286.9600524902344, |
|
"eval_loss": 0.5983877778053284, |
|
"eval_rewards/accuracies": 0.7599206566810608, |
|
"eval_rewards/chosen": 0.03812364488840103, |
|
"eval_rewards/margins": 1.0935115814208984, |
|
"eval_rewards/rejected": -1.0553878545761108, |
|
"eval_runtime": 614.9522, |
|
"eval_samples_per_second": 3.252, |
|
"eval_steps_per_second": 0.102, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6207097149505523e-07, |
|
"logits/chosen": -2.511011838912964, |
|
"logits/rejected": -2.427821159362793, |
|
"logps/chosen": -329.0254821777344, |
|
"logps/rejected": -263.37030029296875, |
|
"loss": 0.529, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.06932397931814194, |
|
"rewards/margins": 1.145455002784729, |
|
"rewards/rejected": -1.0761311054229736, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.591623036649215e-07, |
|
"logits/chosen": -2.5254805088043213, |
|
"logits/rejected": -2.4318673610687256, |
|
"logps/chosen": -304.84466552734375, |
|
"logps/rejected": -272.1488952636719, |
|
"loss": 0.4663, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.25631338357925415, |
|
"rewards/margins": 1.2234864234924316, |
|
"rewards/rejected": -1.4797999858856201, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5625363583478764e-07, |
|
"logits/chosen": -2.4320321083068848, |
|
"logits/rejected": -2.407522201538086, |
|
"logps/chosen": -285.1934814453125, |
|
"logps/rejected": -312.1936950683594, |
|
"loss": 0.4631, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.12434335052967072, |
|
"rewards/margins": 1.4510531425476074, |
|
"rewards/rejected": -1.5753967761993408, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.533449680046539e-07, |
|
"logits/chosen": -2.480064630508423, |
|
"logits/rejected": -2.439182758331299, |
|
"logps/chosen": -309.5289001464844, |
|
"logps/rejected": -296.3352355957031, |
|
"loss": 0.5437, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.2124924659729004, |
|
"rewards/margins": 1.4312546253204346, |
|
"rewards/rejected": -1.643747091293335, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5043630017452006e-07, |
|
"logits/chosen": -2.41044020652771, |
|
"logits/rejected": -2.3936314582824707, |
|
"logps/chosen": -284.94268798828125, |
|
"logps/rejected": -283.9359436035156, |
|
"loss": 0.5227, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.03999950736761093, |
|
"rewards/margins": 1.149482011795044, |
|
"rewards/rejected": -1.1094822883605957, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4752763234438627e-07, |
|
"logits/chosen": -2.430999279022217, |
|
"logits/rejected": -2.3697829246520996, |
|
"logps/chosen": -253.0732421875, |
|
"logps/rejected": -247.62130737304688, |
|
"loss": 0.4938, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.35475850105285645, |
|
"rewards/margins": 1.3529800176620483, |
|
"rewards/rejected": -0.9982213973999023, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4461896451425247e-07, |
|
"logits/chosen": -2.4376707077026367, |
|
"logits/rejected": -2.4240877628326416, |
|
"logps/chosen": -269.42938232421875, |
|
"logps/rejected": -270.54644775390625, |
|
"loss": 0.4648, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.1738893836736679, |
|
"rewards/margins": 1.2686713933944702, |
|
"rewards/rejected": -1.4425609111785889, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.417102966841187e-07, |
|
"logits/chosen": -2.458761692047119, |
|
"logits/rejected": -2.3944101333618164, |
|
"logps/chosen": -271.7908935546875, |
|
"logps/rejected": -296.3941345214844, |
|
"loss": 0.5304, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.14974147081375122, |
|
"rewards/margins": 1.2848742008209229, |
|
"rewards/rejected": -1.4346158504486084, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3880162885398483e-07, |
|
"logits/chosen": -2.4826645851135254, |
|
"logits/rejected": -2.375396251678467, |
|
"logps/chosen": -299.49560546875, |
|
"logps/rejected": -252.5529327392578, |
|
"loss": 0.532, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.49217844009399414, |
|
"rewards/margins": 1.3745396137237549, |
|
"rewards/rejected": -1.8667182922363281, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3589296102385107e-07, |
|
"logits/chosen": -2.499211072921753, |
|
"logits/rejected": -2.417027235031128, |
|
"logps/chosen": -299.8446044921875, |
|
"logps/rejected": -286.61322021484375, |
|
"loss": 0.4399, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.06040573865175247, |
|
"rewards/margins": 1.4158607721328735, |
|
"rewards/rejected": -1.4762665033340454, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3298429319371725e-07, |
|
"logits/chosen": -2.5318305492401123, |
|
"logits/rejected": -2.441438674926758, |
|
"logps/chosen": -301.87762451171875, |
|
"logps/rejected": -303.8865051269531, |
|
"loss": 0.4871, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.34866777062416077, |
|
"rewards/margins": 1.076101541519165, |
|
"rewards/rejected": -1.4247692823410034, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.3007562536358346e-07, |
|
"logits/chosen": -2.478132486343384, |
|
"logits/rejected": -2.420320749282837, |
|
"logps/chosen": -304.8691711425781, |
|
"logps/rejected": -313.31951904296875, |
|
"loss": 0.5539, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.0543731153011322, |
|
"rewards/margins": 1.4534409046173096, |
|
"rewards/rejected": -1.507813811302185, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2716695753344967e-07, |
|
"logits/chosen": -2.452592372894287, |
|
"logits/rejected": -2.422971487045288, |
|
"logps/chosen": -301.95721435546875, |
|
"logps/rejected": -311.69268798828125, |
|
"loss": 0.5074, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.30631425976753235, |
|
"rewards/margins": 1.4355709552764893, |
|
"rewards/rejected": -1.7418854236602783, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.2425828970331587e-07, |
|
"logits/chosen": -2.5267157554626465, |
|
"logits/rejected": -2.4898681640625, |
|
"logps/chosen": -267.6355285644531, |
|
"logps/rejected": -277.17376708984375, |
|
"loss": 0.5519, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5024998784065247, |
|
"rewards/margins": 1.3192273378372192, |
|
"rewards/rejected": -1.8217273950576782, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.2134962187318208e-07, |
|
"logits/chosen": -2.405796527862549, |
|
"logits/rejected": -2.327157497406006, |
|
"logps/chosen": -305.180908203125, |
|
"logps/rejected": -281.836669921875, |
|
"loss": 0.5686, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.9660147428512573, |
|
"rewards/margins": 1.525781512260437, |
|
"rewards/rejected": -2.4917960166931152, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1844095404304826e-07, |
|
"logits/chosen": -2.4776437282562256, |
|
"logits/rejected": -2.430321455001831, |
|
"logps/chosen": -330.463623046875, |
|
"logps/rejected": -324.64324951171875, |
|
"loss": 0.6433, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.0749146938323975, |
|
"rewards/margins": 0.4202847480773926, |
|
"rewards/rejected": -1.4951995611190796, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1553228621291447e-07, |
|
"logits/chosen": -2.4651365280151367, |
|
"logits/rejected": -2.394700527191162, |
|
"logps/chosen": -286.38763427734375, |
|
"logps/rejected": -294.4740295410156, |
|
"loss": 0.5214, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9769517779350281, |
|
"rewards/margins": 0.8809939622879028, |
|
"rewards/rejected": -1.857945442199707, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.1262361838278068e-07, |
|
"logits/chosen": -2.4773755073547363, |
|
"logits/rejected": -2.4409890174865723, |
|
"logps/chosen": -308.72369384765625, |
|
"logps/rejected": -298.4217224121094, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.7563016414642334, |
|
"rewards/margins": 1.2209903001785278, |
|
"rewards/rejected": -1.9772918224334717, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.0971495055264688e-07, |
|
"logits/chosen": -2.4952621459960938, |
|
"logits/rejected": -2.4689629077911377, |
|
"logps/chosen": -291.19232177734375, |
|
"logps/rejected": -306.8577575683594, |
|
"loss": 0.5187, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.24586816132068634, |
|
"rewards/margins": 1.1213955879211426, |
|
"rewards/rejected": -1.3672637939453125, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0680628272251307e-07, |
|
"logits/chosen": -2.509241819381714, |
|
"logits/rejected": -2.4504573345184326, |
|
"logps/chosen": -285.4134826660156, |
|
"logps/rejected": -277.93658447265625, |
|
"loss": 0.5123, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3394593298435211, |
|
"rewards/margins": 1.0499706268310547, |
|
"rewards/rejected": -1.3894299268722534, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0389761489237927e-07, |
|
"logits/chosen": -2.5037548542022705, |
|
"logits/rejected": -2.4983646869659424, |
|
"logps/chosen": -298.67108154296875, |
|
"logps/rejected": -290.48687744140625, |
|
"loss": 0.5539, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.12395872920751572, |
|
"rewards/margins": 1.0312166213989258, |
|
"rewards/rejected": -1.1551753282546997, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.0098894706224548e-07, |
|
"logits/chosen": -2.514279842376709, |
|
"logits/rejected": -2.473151683807373, |
|
"logps/chosen": -255.2734375, |
|
"logps/rejected": -252.98855590820312, |
|
"loss": 0.4852, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.47876453399658203, |
|
"rewards/margins": 1.0464788675308228, |
|
"rewards/rejected": -1.5252434015274048, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.980802792321117e-07, |
|
"logits/chosen": -2.471043825149536, |
|
"logits/rejected": -2.437826156616211, |
|
"logps/chosen": -336.73712158203125, |
|
"logps/rejected": -337.2440490722656, |
|
"loss": 0.5463, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4187423586845398, |
|
"rewards/margins": 1.326642394065857, |
|
"rewards/rejected": -1.7453848123550415, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9517161140197787e-07, |
|
"logits/chosen": -2.5959396362304688, |
|
"logits/rejected": -2.5564401149749756, |
|
"logps/chosen": -309.3214111328125, |
|
"logps/rejected": -300.98138427734375, |
|
"loss": 0.5264, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3669760823249817, |
|
"rewards/margins": 1.1876031160354614, |
|
"rewards/rejected": -1.5545791387557983, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9226294357184408e-07, |
|
"logits/chosen": -2.498528480529785, |
|
"logits/rejected": -2.4267897605895996, |
|
"logps/chosen": -316.4427185058594, |
|
"logps/rejected": -322.12493896484375, |
|
"loss": 0.5139, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.0588008277118206, |
|
"rewards/margins": 1.484725832939148, |
|
"rewards/rejected": -1.5435266494750977, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8935427574171028e-07, |
|
"logits/chosen": -2.542093515396118, |
|
"logits/rejected": -2.4682421684265137, |
|
"logps/chosen": -337.3454284667969, |
|
"logps/rejected": -299.1844482421875, |
|
"loss": 0.5489, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.02595728635787964, |
|
"rewards/margins": 1.281512975692749, |
|
"rewards/rejected": -1.307470440864563, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.864456079115765e-07, |
|
"logits/chosen": -2.477086067199707, |
|
"logits/rejected": -2.457650661468506, |
|
"logps/chosen": -268.6798095703125, |
|
"logps/rejected": -257.23358154296875, |
|
"loss": 0.4939, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.1567353755235672, |
|
"rewards/margins": 1.2585152387619019, |
|
"rewards/rejected": -1.415250539779663, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.835369400814427e-07, |
|
"logits/chosen": -2.544891595840454, |
|
"logits/rejected": -2.4903788566589355, |
|
"logps/chosen": -289.84234619140625, |
|
"logps/rejected": -263.73297119140625, |
|
"loss": 0.4819, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.07888557016849518, |
|
"rewards/margins": 1.23806893825531, |
|
"rewards/rejected": -1.3169543743133545, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8062827225130888e-07, |
|
"logits/chosen": -2.4458956718444824, |
|
"logits/rejected": -2.36130428314209, |
|
"logps/chosen": -296.7621765136719, |
|
"logps/rejected": -268.7115173339844, |
|
"loss": 0.5193, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.21038198471069336, |
|
"rewards/margins": 0.9879549741744995, |
|
"rewards/rejected": -1.1983369588851929, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.777196044211751e-07, |
|
"logits/chosen": -2.422639846801758, |
|
"logits/rejected": -2.3881583213806152, |
|
"logps/chosen": -267.2961730957031, |
|
"logps/rejected": -258.95159912109375, |
|
"loss": 0.5247, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.0472625195980072, |
|
"rewards/margins": 1.1689708232879639, |
|
"rewards/rejected": -1.216233491897583, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.748109365910413e-07, |
|
"logits/chosen": -2.4238734245300293, |
|
"logits/rejected": -2.3859925270080566, |
|
"logps/chosen": -241.4824981689453, |
|
"logps/rejected": -256.94635009765625, |
|
"loss": 0.471, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.039008866995573044, |
|
"rewards/margins": 1.5137543678283691, |
|
"rewards/rejected": -1.474745512008667, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.719022687609075e-07, |
|
"logits/chosen": -2.5132765769958496, |
|
"logits/rejected": -2.449302911758423, |
|
"logps/chosen": -363.523681640625, |
|
"logps/rejected": -352.0708923339844, |
|
"loss": 0.518, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5855492353439331, |
|
"rewards/margins": 1.2664353847503662, |
|
"rewards/rejected": -1.8519846200942993, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6899360093077368e-07, |
|
"logits/chosen": -2.4553329944610596, |
|
"logits/rejected": -2.4112277030944824, |
|
"logps/chosen": -280.7616882324219, |
|
"logps/rejected": -236.74765014648438, |
|
"loss": 0.5061, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.48466506600379944, |
|
"rewards/margins": 1.0938060283660889, |
|
"rewards/rejected": -1.578471064567566, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.660849331006399e-07, |
|
"logits/chosen": -2.47243070602417, |
|
"logits/rejected": -2.448956251144409, |
|
"logps/chosen": -347.1180114746094, |
|
"logps/rejected": -303.9145202636719, |
|
"loss": 0.5364, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.2242584526538849, |
|
"rewards/margins": 1.3924510478973389, |
|
"rewards/rejected": -1.6167097091674805, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.631762652705061e-07, |
|
"logits/chosen": -2.430530071258545, |
|
"logits/rejected": -2.4229390621185303, |
|
"logps/chosen": -245.01284790039062, |
|
"logps/rejected": -268.6424560546875, |
|
"loss": 0.5447, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5738204121589661, |
|
"rewards/margins": 1.0651990175247192, |
|
"rewards/rejected": -1.6390196084976196, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.602675974403723e-07, |
|
"logits/chosen": -2.509038209915161, |
|
"logits/rejected": -2.489039897918701, |
|
"logps/chosen": -279.32379150390625, |
|
"logps/rejected": -294.4464416503906, |
|
"loss": 0.5021, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5361760854721069, |
|
"rewards/margins": 1.5468828678131104, |
|
"rewards/rejected": -2.0830588340759277, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.573589296102385e-07, |
|
"logits/chosen": -2.47121262550354, |
|
"logits/rejected": -2.3763961791992188, |
|
"logps/chosen": -295.9562072753906, |
|
"logps/rejected": -312.29022216796875, |
|
"loss": 0.5798, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7061088681221008, |
|
"rewards/margins": 0.6994223594665527, |
|
"rewards/rejected": -1.4055311679840088, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.544502617801047e-07, |
|
"logits/chosen": -2.4963533878326416, |
|
"logits/rejected": -2.4046132564544678, |
|
"logps/chosen": -306.95379638671875, |
|
"logps/rejected": -269.7267150878906, |
|
"loss": 0.5349, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.14952921867370605, |
|
"rewards/margins": 1.085407018661499, |
|
"rewards/rejected": -1.2349363565444946, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.515415939499709e-07, |
|
"logits/chosen": -2.3877298831939697, |
|
"logits/rejected": -2.3528621196746826, |
|
"logps/chosen": -306.18292236328125, |
|
"logps/rejected": -281.7127990722656, |
|
"loss": 0.4883, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.3554263710975647, |
|
"rewards/margins": 1.3783906698226929, |
|
"rewards/rejected": -1.7338171005249023, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.486329261198371e-07, |
|
"logits/chosen": -2.4248931407928467, |
|
"logits/rejected": -2.3428282737731934, |
|
"logps/chosen": -265.8216247558594, |
|
"logps/rejected": -254.1871795654297, |
|
"loss": 0.5616, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.4705156683921814, |
|
"rewards/margins": 1.1333085298538208, |
|
"rewards/rejected": -1.6038239002227783, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4572425828970332e-07, |
|
"logits/chosen": -2.5125279426574707, |
|
"logits/rejected": -2.4069325923919678, |
|
"logps/chosen": -317.7018127441406, |
|
"logps/rejected": -303.3476867675781, |
|
"loss": 0.4968, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4776553511619568, |
|
"rewards/margins": 1.0890066623687744, |
|
"rewards/rejected": -1.566662073135376, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.428155904595695e-07, |
|
"logits/chosen": -2.402710437774658, |
|
"logits/rejected": -2.35219407081604, |
|
"logps/chosen": -308.2895202636719, |
|
"logps/rejected": -298.3793640136719, |
|
"loss": 0.5636, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.43602848052978516, |
|
"rewards/margins": 1.1804325580596924, |
|
"rewards/rejected": -1.6164608001708984, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.399069226294357e-07, |
|
"logits/chosen": -2.325125217437744, |
|
"logits/rejected": -2.265918254852295, |
|
"logps/chosen": -255.18252563476562, |
|
"logps/rejected": -242.65451049804688, |
|
"loss": 0.5112, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.38456660509109497, |
|
"rewards/margins": 1.3007227182388306, |
|
"rewards/rejected": -1.6852893829345703, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3699825479930191e-07, |
|
"logits/chosen": -2.444197177886963, |
|
"logits/rejected": -2.3448662757873535, |
|
"logps/chosen": -335.58575439453125, |
|
"logps/rejected": -257.6363525390625, |
|
"loss": 0.4915, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1254938691854477, |
|
"rewards/margins": 1.4525253772735596, |
|
"rewards/rejected": -1.578019380569458, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3408958696916812e-07, |
|
"logits/chosen": -2.432386875152588, |
|
"logits/rejected": -2.3468270301818848, |
|
"logps/chosen": -264.3630065917969, |
|
"logps/rejected": -253.1586456298828, |
|
"loss": 0.4865, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.010800900869071484, |
|
"rewards/margins": 1.3583967685699463, |
|
"rewards/rejected": -1.3691976070404053, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.311809191390343e-07, |
|
"logits/chosen": -2.3925037384033203, |
|
"logits/rejected": -2.3687844276428223, |
|
"logps/chosen": -298.83160400390625, |
|
"logps/rejected": -321.2250671386719, |
|
"loss": 0.5522, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.13408958911895752, |
|
"rewards/margins": 1.2063826322555542, |
|
"rewards/rejected": -1.3404719829559326, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.282722513089005e-07, |
|
"logits/chosen": -2.5323238372802734, |
|
"logits/rejected": -2.450490713119507, |
|
"logps/chosen": -303.0655517578125, |
|
"logps/rejected": -273.21636962890625, |
|
"loss": 0.4542, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.07529834657907486, |
|
"rewards/margins": 1.215362787246704, |
|
"rewards/rejected": -1.290661096572876, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2536358347876672e-07, |
|
"logits/chosen": -2.4639523029327393, |
|
"logits/rejected": -2.451842784881592, |
|
"logps/chosen": -273.89410400390625, |
|
"logps/rejected": -288.24395751953125, |
|
"loss": 0.5013, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1511283814907074, |
|
"rewards/margins": 1.046019196510315, |
|
"rewards/rejected": -1.1971476078033447, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2245491564863293e-07, |
|
"logits/chosen": -2.5214879512786865, |
|
"logits/rejected": -2.3725059032440186, |
|
"logps/chosen": -365.9342041015625, |
|
"logps/rejected": -288.50860595703125, |
|
"loss": 0.4655, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.24411487579345703, |
|
"rewards/margins": 1.6229133605957031, |
|
"rewards/rejected": -1.3787983655929565, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.195462478184991e-07, |
|
"logits/chosen": -2.4407520294189453, |
|
"logits/rejected": -2.403568744659424, |
|
"logps/chosen": -266.7708740234375, |
|
"logps/rejected": -256.9794006347656, |
|
"loss": 0.5342, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.008794727735221386, |
|
"rewards/margins": 1.0573989152908325, |
|
"rewards/rejected": -1.0486040115356445, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -2.479328155517578, |
|
"eval_logits/rejected": -2.420834541320801, |
|
"eval_logps/chosen": -299.6731872558594, |
|
"eval_logps/rejected": -287.96630859375, |
|
"eval_loss": 0.5819299817085266, |
|
"eval_rewards/accuracies": 0.7559523582458496, |
|
"eval_rewards/chosen": 0.09938977658748627, |
|
"eval_rewards/margins": 1.2554062604904175, |
|
"eval_rewards/rejected": -1.1560163497924805, |
|
"eval_runtime": 614.7519, |
|
"eval_samples_per_second": 3.253, |
|
"eval_steps_per_second": 0.102, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1663757998836531e-07, |
|
"logits/chosen": -2.4385485649108887, |
|
"logits/rejected": -2.414762496948242, |
|
"logps/chosen": -282.46331787109375, |
|
"logps/rejected": -271.0342712402344, |
|
"loss": 0.5393, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.030192693695425987, |
|
"rewards/margins": 1.1163215637207031, |
|
"rewards/rejected": -1.0861289501190186, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1372891215823152e-07, |
|
"logits/chosen": -2.475036144256592, |
|
"logits/rejected": -2.431018352508545, |
|
"logps/chosen": -273.4059753417969, |
|
"logps/rejected": -279.97149658203125, |
|
"loss": 0.4813, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.07051552832126617, |
|
"rewards/margins": 1.225347638130188, |
|
"rewards/rejected": -1.295863151550293, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1082024432809772e-07, |
|
"logits/chosen": -2.434101104736328, |
|
"logits/rejected": -2.3855373859405518, |
|
"logps/chosen": -270.7154235839844, |
|
"logps/rejected": -257.188720703125, |
|
"loss": 0.4726, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.03814787417650223, |
|
"rewards/margins": 1.2096832990646362, |
|
"rewards/rejected": -1.2478312253952026, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0791157649796392e-07, |
|
"logits/chosen": -2.509958267211914, |
|
"logits/rejected": -2.5057830810546875, |
|
"logps/chosen": -268.060791015625, |
|
"logps/rejected": -294.9040832519531, |
|
"loss": 0.6052, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.09406580030918121, |
|
"rewards/margins": 0.9256828427314758, |
|
"rewards/rejected": -1.019748568534851, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0500290866783013e-07, |
|
"logits/chosen": -2.391065835952759, |
|
"logits/rejected": -2.3701300621032715, |
|
"logps/chosen": -280.59686279296875, |
|
"logps/rejected": -243.96798706054688, |
|
"loss": 0.5019, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.04815078154206276, |
|
"rewards/margins": 0.9994159936904907, |
|
"rewards/rejected": -0.9512651562690735, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0209424083769633e-07, |
|
"logits/chosen": -2.428290367126465, |
|
"logits/rejected": -2.394292116165161, |
|
"logps/chosen": -284.85968017578125, |
|
"logps/rejected": -287.145263671875, |
|
"loss": 0.505, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.22867484390735626, |
|
"rewards/margins": 1.033279299736023, |
|
"rewards/rejected": -1.2619540691375732, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.918557300756253e-08, |
|
"logits/chosen": -2.4378373622894287, |
|
"logits/rejected": -2.369236946105957, |
|
"logps/chosen": -293.5215759277344, |
|
"logps/rejected": -246.56997680664062, |
|
"loss": 0.4905, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.13024893403053284, |
|
"rewards/margins": 1.4804332256317139, |
|
"rewards/rejected": -1.6106821298599243, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.627690517742873e-08, |
|
"logits/chosen": -2.4206690788269043, |
|
"logits/rejected": -2.409813165664673, |
|
"logps/chosen": -333.97467041015625, |
|
"logps/rejected": -314.40960693359375, |
|
"loss": 0.5211, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.2985494136810303, |
|
"rewards/margins": 1.2202610969543457, |
|
"rewards/rejected": -0.9217117428779602, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.336823734729494e-08, |
|
"logits/chosen": -2.426513671875, |
|
"logits/rejected": -2.4083962440490723, |
|
"logps/chosen": -312.02655029296875, |
|
"logps/rejected": -314.4944152832031, |
|
"loss": 0.5172, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1636694371700287, |
|
"rewards/margins": 1.20877206325531, |
|
"rewards/rejected": -1.372441291809082, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.045956951716113e-08, |
|
"logits/chosen": -2.4410054683685303, |
|
"logits/rejected": -2.421079158782959, |
|
"logps/chosen": -268.4046630859375, |
|
"logps/rejected": -259.6822509765625, |
|
"loss": 0.5011, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.018908673897385597, |
|
"rewards/margins": 1.4655755758285522, |
|
"rewards/rejected": -1.446666955947876, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.755090168702734e-08, |
|
"logits/chosen": -2.4191842079162598, |
|
"logits/rejected": -2.3796703815460205, |
|
"logps/chosen": -282.9931945800781, |
|
"logps/rejected": -297.3874206542969, |
|
"loss": 0.488, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.26676809787750244, |
|
"rewards/margins": 1.4654661417007446, |
|
"rewards/rejected": -1.732234239578247, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.464223385689353e-08, |
|
"logits/chosen": -2.474801540374756, |
|
"logits/rejected": -2.439415693283081, |
|
"logps/chosen": -316.919921875, |
|
"logps/rejected": -286.6101989746094, |
|
"loss": 0.5092, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.14554055035114288, |
|
"rewards/margins": 1.1523851156234741, |
|
"rewards/rejected": -1.2979257106781006, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.173356602675974e-08, |
|
"logits/chosen": -2.501138210296631, |
|
"logits/rejected": -2.423609495162964, |
|
"logps/chosen": -267.6105651855469, |
|
"logps/rejected": -268.2532653808594, |
|
"loss": 0.5253, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.7160999178886414, |
|
"rewards/margins": 0.7922013998031616, |
|
"rewards/rejected": -1.5083013772964478, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.882489819662593e-08, |
|
"logits/chosen": -2.4721310138702393, |
|
"logits/rejected": -2.4814186096191406, |
|
"logps/chosen": -304.85931396484375, |
|
"logps/rejected": -421.44427490234375, |
|
"loss": 0.4832, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4838688373565674, |
|
"rewards/margins": 1.3697353601455688, |
|
"rewards/rejected": -1.8536040782928467, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.591623036649214e-08, |
|
"logits/chosen": -2.331878185272217, |
|
"logits/rejected": -2.3361971378326416, |
|
"logps/chosen": -296.43096923828125, |
|
"logps/rejected": -293.191650390625, |
|
"loss": 0.5881, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4644225537776947, |
|
"rewards/margins": 1.1996773481369019, |
|
"rewards/rejected": -1.664099931716919, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.300756253635834e-08, |
|
"logits/chosen": -2.472486972808838, |
|
"logits/rejected": -2.420685291290283, |
|
"logps/chosen": -268.98712158203125, |
|
"logps/rejected": -287.7394714355469, |
|
"loss": 0.4996, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.22886808216571808, |
|
"rewards/margins": 1.0310173034667969, |
|
"rewards/rejected": -1.259885311126709, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.009889470622454e-08, |
|
"logits/chosen": -2.480839967727661, |
|
"logits/rejected": -2.405022144317627, |
|
"logps/chosen": -327.4227600097656, |
|
"logps/rejected": -299.2853088378906, |
|
"loss": 0.4532, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.21133089065551758, |
|
"rewards/margins": 1.356836199760437, |
|
"rewards/rejected": -1.568166971206665, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.719022687609075e-08, |
|
"logits/chosen": -2.4610321521759033, |
|
"logits/rejected": -2.3211312294006348, |
|
"logps/chosen": -367.6954650878906, |
|
"logps/rejected": -312.0871887207031, |
|
"loss": 0.4964, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6737295389175415, |
|
"rewards/margins": 1.504465103149414, |
|
"rewards/rejected": -2.178194522857666, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.428155904595695e-08, |
|
"logits/chosen": -2.350032091140747, |
|
"logits/rejected": -2.3311705589294434, |
|
"logps/chosen": -309.73712158203125, |
|
"logps/rejected": -270.7311096191406, |
|
"loss": 0.4952, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.18244963884353638, |
|
"rewards/margins": 1.2423228025436401, |
|
"rewards/rejected": -1.4247725009918213, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.137289121582315e-08, |
|
"logits/chosen": -2.5109975337982178, |
|
"logits/rejected": -2.3817737102508545, |
|
"logps/chosen": -335.9524841308594, |
|
"logps/rejected": -272.06829833984375, |
|
"loss": 0.4946, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4245755672454834, |
|
"rewards/margins": 1.40361487865448, |
|
"rewards/rejected": -1.828190565109253, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.846422338568935e-08, |
|
"logits/chosen": -2.4695639610290527, |
|
"logits/rejected": -2.414325714111328, |
|
"logps/chosen": -304.23468017578125, |
|
"logps/rejected": -285.8380432128906, |
|
"loss": 0.4495, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.3581593334674835, |
|
"rewards/margins": 1.1380908489227295, |
|
"rewards/rejected": -1.4962502717971802, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.555555555555555e-08, |
|
"logits/chosen": -2.439877986907959, |
|
"logits/rejected": -2.3664920330047607, |
|
"logps/chosen": -350.97137451171875, |
|
"logps/rejected": -327.41302490234375, |
|
"loss": 0.5355, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5186697244644165, |
|
"rewards/margins": 1.1480152606964111, |
|
"rewards/rejected": -1.6666851043701172, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.264688772542175e-08, |
|
"logits/chosen": -2.455570697784424, |
|
"logits/rejected": -2.409554958343506, |
|
"logps/chosen": -292.89715576171875, |
|
"logps/rejected": -287.56573486328125, |
|
"loss": 0.4603, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.08683060109615326, |
|
"rewards/margins": 0.9011920690536499, |
|
"rewards/rejected": -0.98802250623703, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.973821989528795e-08, |
|
"logits/chosen": -2.502526044845581, |
|
"logits/rejected": -2.46052885055542, |
|
"logps/chosen": -266.27947998046875, |
|
"logps/rejected": -262.73822021484375, |
|
"loss": 0.5074, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.008827775716781616, |
|
"rewards/margins": 1.2833327054977417, |
|
"rewards/rejected": -1.2921605110168457, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.682955206515416e-08, |
|
"logits/chosen": -2.494965076446533, |
|
"logits/rejected": -2.4605846405029297, |
|
"logps/chosen": -316.22882080078125, |
|
"logps/rejected": -368.8150939941406, |
|
"loss": 0.5432, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.25845271348953247, |
|
"rewards/margins": 1.4049198627471924, |
|
"rewards/rejected": -1.6633726358413696, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.392088423502036e-08, |
|
"logits/chosen": -2.481264352798462, |
|
"logits/rejected": -2.5107004642486572, |
|
"logps/chosen": -274.2122802734375, |
|
"logps/rejected": -280.37933349609375, |
|
"loss": 0.5557, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5907906293869019, |
|
"rewards/margins": 1.1111408472061157, |
|
"rewards/rejected": -1.701931357383728, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.101221640488656e-08, |
|
"logits/chosen": -2.4728500843048096, |
|
"logits/rejected": -2.408026695251465, |
|
"logps/chosen": -308.61956787109375, |
|
"logps/rejected": -288.31170654296875, |
|
"loss": 0.4805, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.6883036494255066, |
|
"rewards/margins": 1.1805965900421143, |
|
"rewards/rejected": -1.8689002990722656, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.810354857475276e-08, |
|
"logits/chosen": -2.3637709617614746, |
|
"logits/rejected": -2.3329529762268066, |
|
"logps/chosen": -324.7041320800781, |
|
"logps/rejected": -270.0013122558594, |
|
"loss": 0.5471, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.544477105140686, |
|
"rewards/margins": 1.064997673034668, |
|
"rewards/rejected": -1.6094748973846436, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.519488074461896e-08, |
|
"logits/chosen": -2.447537660598755, |
|
"logits/rejected": -2.386204242706299, |
|
"logps/chosen": -268.69378662109375, |
|
"logps/rejected": -276.32843017578125, |
|
"loss": 0.3955, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3514997661113739, |
|
"rewards/margins": 1.253027319908142, |
|
"rewards/rejected": -1.6045271158218384, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.228621291448516e-08, |
|
"logits/chosen": -2.504725694656372, |
|
"logits/rejected": -2.4065499305725098, |
|
"logps/chosen": -324.8893127441406, |
|
"logps/rejected": -262.2546081542969, |
|
"loss": 0.4898, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.5291447043418884, |
|
"rewards/margins": 1.2341575622558594, |
|
"rewards/rejected": -1.763302206993103, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.9377545084351366e-08, |
|
"logits/chosen": -2.4359753131866455, |
|
"logits/rejected": -2.3973264694213867, |
|
"logps/chosen": -380.8512878417969, |
|
"logps/rejected": -345.4535827636719, |
|
"loss": 0.4781, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.39544662833213806, |
|
"rewards/margins": 1.4492541551589966, |
|
"rewards/rejected": -1.844700813293457, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.6468877254217567e-08, |
|
"logits/chosen": -2.455008029937744, |
|
"logits/rejected": -2.406989574432373, |
|
"logps/chosen": -321.1400451660156, |
|
"logps/rejected": -326.90203857421875, |
|
"loss": 0.5332, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.45269957184791565, |
|
"rewards/margins": 1.45340895652771, |
|
"rewards/rejected": -1.9061084985733032, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3560209424083768e-08, |
|
"logits/chosen": -2.451988458633423, |
|
"logits/rejected": -2.390340805053711, |
|
"logps/chosen": -319.3875427246094, |
|
"logps/rejected": -321.97186279296875, |
|
"loss": 0.5775, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6661805510520935, |
|
"rewards/margins": 0.7991847395896912, |
|
"rewards/rejected": -1.4653651714324951, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.065154159394997e-08, |
|
"logits/chosen": -2.439758539199829, |
|
"logits/rejected": -2.419706106185913, |
|
"logps/chosen": -330.24810791015625, |
|
"logps/rejected": -308.7535095214844, |
|
"loss": 0.5889, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5779649019241333, |
|
"rewards/margins": 0.9780583381652832, |
|
"rewards/rejected": -1.5560232400894165, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.7742873763816174e-08, |
|
"logits/chosen": -2.4510066509246826, |
|
"logits/rejected": -2.406909704208374, |
|
"logps/chosen": -338.01861572265625, |
|
"logps/rejected": -291.27923583984375, |
|
"loss": 0.5524, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.372661292552948, |
|
"rewards/margins": 1.7962923049926758, |
|
"rewards/rejected": -2.1689534187316895, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.4834205933682373e-08, |
|
"logits/chosen": -2.5106232166290283, |
|
"logits/rejected": -2.4458587169647217, |
|
"logps/chosen": -300.896728515625, |
|
"logps/rejected": -278.838623046875, |
|
"loss": 0.5813, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.4824046492576599, |
|
"rewards/margins": 1.491176962852478, |
|
"rewards/rejected": -1.9735815525054932, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.1925538103548575e-08, |
|
"logits/chosen": -2.4239859580993652, |
|
"logits/rejected": -2.373213768005371, |
|
"logps/chosen": -280.8854675292969, |
|
"logps/rejected": -301.01092529296875, |
|
"loss": 0.5219, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.43918657302856445, |
|
"rewards/margins": 1.376401662826538, |
|
"rewards/rejected": -1.8155882358551025, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.016870273414776e-09, |
|
"logits/chosen": -2.4339070320129395, |
|
"logits/rejected": -2.3967947959899902, |
|
"logps/chosen": -354.99462890625, |
|
"logps/rejected": -309.2855529785156, |
|
"loss": 0.4764, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.24851647019386292, |
|
"rewards/margins": 1.5067845582962036, |
|
"rewards/rejected": -1.7553011178970337, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.1082024432809765e-09, |
|
"logits/chosen": -2.4029338359832764, |
|
"logits/rejected": -2.4133036136627197, |
|
"logps/chosen": -268.3423767089844, |
|
"logps/rejected": -301.10321044921875, |
|
"loss": 0.5074, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8797990083694458, |
|
"rewards/margins": 0.8209834098815918, |
|
"rewards/rejected": -1.7007821798324585, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.1995346131471783e-09, |
|
"logits/chosen": -2.4465489387512207, |
|
"logits/rejected": -2.4295449256896973, |
|
"logps/chosen": -268.50006103515625, |
|
"logps/rejected": -266.83685302734375, |
|
"loss": 0.479, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.589029848575592, |
|
"rewards/margins": 1.1926295757293701, |
|
"rewards/rejected": -1.781659483909607, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.9086678301337986e-10, |
|
"logits/chosen": -2.482949733734131, |
|
"logits/rejected": -2.426959991455078, |
|
"logps/chosen": -303.0064392089844, |
|
"logps/rejected": -287.6535339355469, |
|
"loss": 0.5333, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.44241657853126526, |
|
"rewards/margins": 0.9881950616836548, |
|
"rewards/rejected": -1.4306116104125977, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1911, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5469513680846325, |
|
"train_runtime": 31987.9403, |
|
"train_samples_per_second": 1.911, |
|
"train_steps_per_second": 0.06 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1911, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|