|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.999508116084604, |
|
"eval_steps": 200, |
|
"global_step": 1016, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.901960784313726e-08, |
|
"logits/chosen": -2.0737838745117188, |
|
"logits/rejected": -2.1456010341644287, |
|
"logps/chosen": -95.6572265625, |
|
"logps/rejected": -106.55765533447266, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.901960784313725e-07, |
|
"logits/chosen": -2.165830373764038, |
|
"logits/rejected": -2.060776948928833, |
|
"logps/chosen": -121.03773498535156, |
|
"logps/rejected": -87.5294189453125, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.00047249632189050317, |
|
"rewards/margins": 0.002704059472307563, |
|
"rewards/rejected": -0.002231562975794077, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.80392156862745e-07, |
|
"logits/chosen": -2.1349050998687744, |
|
"logits/rejected": -2.016066312789917, |
|
"logps/chosen": -130.94175720214844, |
|
"logps/rejected": -105.7674789428711, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.0016262540593743324, |
|
"rewards/margins": -0.0006325626163743436, |
|
"rewards/rejected": -0.0009936915012076497, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.4705882352941177e-06, |
|
"logits/chosen": -2.2818872928619385, |
|
"logits/rejected": -2.1805636882781982, |
|
"logps/chosen": -121.03263854980469, |
|
"logps/rejected": -104.84712982177734, |
|
"loss": 0.6942, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.003329185303300619, |
|
"rewards/margins": -0.002142944373190403, |
|
"rewards/rejected": -0.0011862408136948943, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.96078431372549e-06, |
|
"logits/chosen": -2.2757694721221924, |
|
"logits/rejected": -2.156691074371338, |
|
"logps/chosen": -126.5389633178711, |
|
"logps/rejected": -105.20024108886719, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0010745985200628638, |
|
"rewards/margins": 0.002502765040844679, |
|
"rewards/rejected": -0.0014281660551205277, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.450980392156863e-06, |
|
"logits/chosen": -2.3002381324768066, |
|
"logits/rejected": -2.206784725189209, |
|
"logps/chosen": -124.18415832519531, |
|
"logps/rejected": -98.63652801513672, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0012203993974253535, |
|
"rewards/margins": 0.003048995044082403, |
|
"rewards/rejected": -0.0018285956466570497, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"logits/chosen": -2.3112730979919434, |
|
"logits/rejected": -2.232532501220703, |
|
"logps/chosen": -126.93055725097656, |
|
"logps/rejected": -109.7610092163086, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.01018393412232399, |
|
"rewards/margins": 0.0034624538384377956, |
|
"rewards/rejected": 0.006721480283886194, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.431372549019608e-06, |
|
"logits/chosen": -2.2930376529693604, |
|
"logits/rejected": -2.187328815460205, |
|
"logps/chosen": -119.68111419677734, |
|
"logps/rejected": -93.93470001220703, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.007645626552402973, |
|
"rewards/margins": 0.003846182022243738, |
|
"rewards/rejected": 0.0037994447629898787, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.92156862745098e-06, |
|
"logits/chosen": -2.144854784011841, |
|
"logits/rejected": -2.016728401184082, |
|
"logps/chosen": -132.2415313720703, |
|
"logps/rejected": -106.7207260131836, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.015773242339491844, |
|
"rewards/margins": 0.006012483034282923, |
|
"rewards/rejected": 0.009760759770870209, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.411764705882353e-06, |
|
"logits/chosen": -2.095534324645996, |
|
"logits/rejected": -1.9636192321777344, |
|
"logps/chosen": -106.81976318359375, |
|
"logps/rejected": -83.68408966064453, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.02147563174366951, |
|
"rewards/margins": 0.009103062562644482, |
|
"rewards/rejected": 0.012372570112347603, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.901960784313726e-06, |
|
"logits/chosen": -2.0606656074523926, |
|
"logits/rejected": -1.87876296043396, |
|
"logps/chosen": -141.498779296875, |
|
"logps/rejected": -105.37713623046875, |
|
"loss": 0.6811, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.04738330841064453, |
|
"rewards/margins": 0.024673232808709145, |
|
"rewards/rejected": 0.022710075601935387, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.9990549169459415e-06, |
|
"logits/chosen": -2.2754921913146973, |
|
"logits/rejected": -2.135282516479492, |
|
"logps/chosen": -124.19816589355469, |
|
"logps/rejected": -98.95077514648438, |
|
"loss": 0.6777, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.056061066687107086, |
|
"rewards/margins": 0.03164363652467728, |
|
"rewards/rejected": 0.02441743016242981, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.995216741642263e-06, |
|
"logits/chosen": -2.280646324157715, |
|
"logits/rejected": -2.206347942352295, |
|
"logps/chosen": -115.8212890625, |
|
"logps/rejected": -96.82814025878906, |
|
"loss": 0.6805, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.07867949455976486, |
|
"rewards/margins": 0.02664627507328987, |
|
"rewards/rejected": 0.05203322693705559, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.988430936991089e-06, |
|
"logits/chosen": -2.2835030555725098, |
|
"logits/rejected": -2.140094041824341, |
|
"logps/chosen": -127.94944763183594, |
|
"logps/rejected": -101.47581481933594, |
|
"loss": 0.6635, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.10174749791622162, |
|
"rewards/margins": 0.06262228637933731, |
|
"rewards/rejected": 0.039125215262174606, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.978705519144525e-06, |
|
"logits/chosen": -2.18971586227417, |
|
"logits/rejected": -2.0279135704040527, |
|
"logps/chosen": -140.58509826660156, |
|
"logps/rejected": -104.29924011230469, |
|
"loss": 0.664, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.07688155025243759, |
|
"rewards/margins": 0.061921559274196625, |
|
"rewards/rejected": 0.014959996566176414, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.966051976854862e-06, |
|
"logits/chosen": -2.333808183670044, |
|
"logits/rejected": -2.2144458293914795, |
|
"logps/chosen": -111.2592544555664, |
|
"logps/rejected": -89.93299865722656, |
|
"loss": 0.6526, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.11892716586589813, |
|
"rewards/margins": 0.08827908337116241, |
|
"rewards/rejected": 0.030648082494735718, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.950485257902782e-06, |
|
"logits/chosen": -2.209681749343872, |
|
"logits/rejected": -2.1354687213897705, |
|
"logps/chosen": -122.34986877441406, |
|
"logps/rejected": -97.61170959472656, |
|
"loss": 0.6622, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.09915992617607117, |
|
"rewards/margins": 0.06800667941570282, |
|
"rewards/rejected": 0.031153246760368347, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.932023751439358e-06, |
|
"logits/chosen": -2.276695489883423, |
|
"logits/rejected": -2.118220329284668, |
|
"logps/chosen": -131.77114868164062, |
|
"logps/rejected": -103.4157943725586, |
|
"loss": 0.6666, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.10167907178401947, |
|
"rewards/margins": 0.06214705854654312, |
|
"rewards/rejected": 0.039532024413347244, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9106892662627395e-06, |
|
"logits/chosen": -2.347627878189087, |
|
"logits/rejected": -2.223806858062744, |
|
"logps/chosen": -125.82316589355469, |
|
"logps/rejected": -102.32342529296875, |
|
"loss": 0.6605, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.13379618525505066, |
|
"rewards/margins": 0.07378261536359787, |
|
"rewards/rejected": 0.06001356244087219, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.886507005055149e-06, |
|
"logits/chosen": -2.299999713897705, |
|
"logits/rejected": -2.124567985534668, |
|
"logps/chosen": -135.9125518798828, |
|
"logps/rejected": -102.34548950195312, |
|
"loss": 0.6471, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.12664374709129333, |
|
"rewards/margins": 0.10224989801645279, |
|
"rewards/rejected": 0.024393849074840546, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.859505534610658e-06, |
|
"logits/chosen": -2.2595176696777344, |
|
"logits/rejected": -2.167226552963257, |
|
"logps/chosen": -115.63578033447266, |
|
"logps/rejected": -97.40113830566406, |
|
"loss": 0.652, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.07887722551822662, |
|
"rewards/margins": 0.09930779039859772, |
|
"rewards/rejected": -0.020430563017725945, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_logits/chosen": -2.1231298446655273, |
|
"eval_logits/rejected": -2.0235936641693115, |
|
"eval_logps/chosen": -120.2629165649414, |
|
"eval_logps/rejected": -98.29754638671875, |
|
"eval_loss": 0.6594940423965454, |
|
"eval_rewards/accuracies": 0.6415094137191772, |
|
"eval_rewards/chosen": 0.04982735216617584, |
|
"eval_rewards/margins": 0.08182442933320999, |
|
"eval_rewards/rejected": -0.031997084617614746, |
|
"eval_runtime": 417.8564, |
|
"eval_samples_per_second": 1.0, |
|
"eval_steps_per_second": 0.127, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.829716752088893e-06, |
|
"logits/chosen": -2.202777147293091, |
|
"logits/rejected": -2.1421408653259277, |
|
"logps/chosen": -107.44587707519531, |
|
"logps/rejected": -101.29158020019531, |
|
"loss": 0.6594, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.03508143126964569, |
|
"rewards/margins": 0.07955195009708405, |
|
"rewards/rejected": -0.04447052255272865, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.797175847334535e-06, |
|
"logits/chosen": -2.217074155807495, |
|
"logits/rejected": -2.0960960388183594, |
|
"logps/chosen": -130.84152221679688, |
|
"logps/rejected": -107.61418151855469, |
|
"loss": 0.6708, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0030643828213214874, |
|
"rewards/margins": 0.05633828788995743, |
|
"rewards/rejected": -0.05327390506863594, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.761921261307143e-06, |
|
"logits/chosen": -2.2311947345733643, |
|
"logits/rejected": -2.0887582302093506, |
|
"logps/chosen": -123.64229583740234, |
|
"logps/rejected": -103.31034851074219, |
|
"loss": 0.655, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.009160916320979595, |
|
"rewards/margins": 0.09632667899131775, |
|
"rewards/rejected": -0.10548758506774902, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.723994640670377e-06, |
|
"logits/chosen": -2.2253684997558594, |
|
"logits/rejected": -2.0454909801483154, |
|
"logps/chosen": -137.86196899414062, |
|
"logps/rejected": -104.11041259765625, |
|
"loss": 0.6146, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.011105505749583244, |
|
"rewards/margins": 0.1803070306777954, |
|
"rewards/rejected": -0.1914125233888626, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.68344078859431e-06, |
|
"logits/chosen": -2.1830849647521973, |
|
"logits/rejected": -2.1306838989257812, |
|
"logps/chosen": -119.04063415527344, |
|
"logps/rejected": -113.45658111572266, |
|
"loss": 0.6407, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.10984460264444351, |
|
"rewards/margins": 0.1269720494747162, |
|
"rewards/rejected": -0.2368166148662567, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.6403076118289006e-06, |
|
"logits/chosen": -2.151690721511841, |
|
"logits/rejected": -2.0012199878692627, |
|
"logps/chosen": -135.11477661132812, |
|
"logps/rejected": -110.16157531738281, |
|
"loss": 0.6217, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.162130668759346, |
|
"rewards/margins": 0.17287474870681763, |
|
"rewards/rejected": -0.33500543236732483, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5946460641111776e-06, |
|
"logits/chosen": -2.3390426635742188, |
|
"logits/rejected": -2.1155288219451904, |
|
"logps/chosen": -132.8888397216797, |
|
"logps/rejected": -110.8713607788086, |
|
"loss": 0.5927, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.19292452931404114, |
|
"rewards/margins": 0.24748054146766663, |
|
"rewards/rejected": -0.44040507078170776, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.546510085972983e-06, |
|
"logits/chosen": -2.3495311737060547, |
|
"logits/rejected": -2.2263104915618896, |
|
"logps/chosen": -143.91481018066406, |
|
"logps/rejected": -119.75496673583984, |
|
"loss": 0.6171, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.20113949477672577, |
|
"rewards/margins": 0.19329218566417694, |
|
"rewards/rejected": -0.3944316804409027, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.495956541020376e-06, |
|
"logits/chosen": -2.3487744331359863, |
|
"logits/rejected": -2.1971921920776367, |
|
"logps/chosen": -150.26535034179688, |
|
"logps/rejected": -133.6148681640625, |
|
"loss": 0.6038, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.36308565735816956, |
|
"rewards/margins": 0.2375185787677765, |
|
"rewards/rejected": -0.6006041765213013, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.443045148759978e-06, |
|
"logits/chosen": -2.3486955165863037, |
|
"logits/rejected": -2.1962506771087646, |
|
"logps/chosen": -159.32064819335938, |
|
"logps/rejected": -126.43644714355469, |
|
"loss": 0.5853, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.44245219230651855, |
|
"rewards/margins": 0.2853606343269348, |
|
"rewards/rejected": -0.7278127670288086, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.3878384140516025e-06, |
|
"logits/chosen": -2.3736767768859863, |
|
"logits/rejected": -2.249598741531372, |
|
"logps/chosen": -146.68006896972656, |
|
"logps/rejected": -135.25997924804688, |
|
"loss": 0.5898, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.41068369150161743, |
|
"rewards/margins": 0.26723065972328186, |
|
"rewards/rejected": -0.6779143214225769, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.330401553270522e-06, |
|
"logits/chosen": -2.298905372619629, |
|
"logits/rejected": -2.1949081420898438, |
|
"logps/chosen": -146.1105194091797, |
|
"logps/rejected": -136.11727905273438, |
|
"loss": 0.6003, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.551005482673645, |
|
"rewards/margins": 0.2657201588153839, |
|
"rewards/rejected": -0.8167255520820618, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.2708024172665795e-06, |
|
"logits/chosen": -2.402360439300537, |
|
"logits/rejected": -2.2175159454345703, |
|
"logps/chosen": -150.5807342529297, |
|
"logps/rejected": -125.05631256103516, |
|
"loss": 0.5465, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.7037911415100098, |
|
"rewards/margins": 0.3860488533973694, |
|
"rewards/rejected": -1.089840054512024, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.209111411211174e-06, |
|
"logits/chosen": -2.413839340209961, |
|
"logits/rejected": -2.253542423248291, |
|
"logps/chosen": -142.4720001220703, |
|
"logps/rejected": -127.50254821777344, |
|
"loss": 0.5395, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7327712178230286, |
|
"rewards/margins": 0.4077509045600891, |
|
"rewards/rejected": -1.1405221223831177, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.145401411426788e-06, |
|
"logits/chosen": -2.4574408531188965, |
|
"logits/rejected": -2.3272013664245605, |
|
"logps/chosen": -143.1080322265625, |
|
"logps/rejected": -130.01637268066406, |
|
"loss": 0.5795, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7288345098495483, |
|
"rewards/margins": 0.36709967255592346, |
|
"rewards/rejected": -1.0959341526031494, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.079747679297314e-06, |
|
"logits/chosen": -2.295055627822876, |
|
"logits/rejected": -2.179664134979248, |
|
"logps/chosen": -157.7494354248047, |
|
"logps/rejected": -137.73941040039062, |
|
"loss": 0.5973, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.8279803395271301, |
|
"rewards/margins": 0.3018009066581726, |
|
"rewards/rejected": -1.1297812461853027, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.012227772360889e-06, |
|
"logits/chosen": -2.2948107719421387, |
|
"logits/rejected": -2.1263232231140137, |
|
"logps/chosen": -167.21339416503906, |
|
"logps/rejected": -150.82669067382812, |
|
"loss": 0.5349, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.9286147356033325, |
|
"rewards/margins": 0.44435009360313416, |
|
"rewards/rejected": -1.3729647397994995, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.942921452690245e-06, |
|
"logits/chosen": -2.3513195514678955, |
|
"logits/rejected": -2.25927734375, |
|
"logps/chosen": -173.61428833007812, |
|
"logps/rejected": -163.9345703125, |
|
"loss": 0.6164, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.3186113834381104, |
|
"rewards/margins": 0.24015231430530548, |
|
"rewards/rejected": -1.558763861656189, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.871910592668817e-06, |
|
"logits/chosen": -2.433640480041504, |
|
"logits/rejected": -2.300931215286255, |
|
"logps/chosen": -170.8921356201172, |
|
"logps/rejected": -165.83462524414062, |
|
"loss": 0.503, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.420196294784546, |
|
"rewards/margins": 0.5389910340309143, |
|
"rewards/rejected": -1.9591872692108154, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.799279078273921e-06, |
|
"logits/chosen": -2.3466134071350098, |
|
"logits/rejected": -2.157196044921875, |
|
"logps/chosen": -177.853271484375, |
|
"logps/rejected": -158.73776245117188, |
|
"loss": 0.4905, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.5086350440979004, |
|
"rewards/margins": 0.6321147680282593, |
|
"rewards/rejected": -2.140749454498291, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_logits/chosen": -2.195096015930176, |
|
"eval_logits/rejected": -2.0950398445129395, |
|
"eval_logps/chosen": -177.19459533691406, |
|
"eval_logps/rejected": -168.9884490966797, |
|
"eval_loss": 0.5550708174705505, |
|
"eval_rewards/accuracies": 0.6957547068595886, |
|
"eval_rewards/chosen": -1.6581227779388428, |
|
"eval_rewards/margins": 0.4946018159389496, |
|
"eval_rewards/rejected": -2.152724504470825, |
|
"eval_runtime": 417.866, |
|
"eval_samples_per_second": 1.0, |
|
"eval_steps_per_second": 0.127, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.725112709981249e-06, |
|
"logits/chosen": -2.20538592338562, |
|
"logits/rejected": -2.059528112411499, |
|
"logps/chosen": -192.91726684570312, |
|
"logps/rejected": -185.90341186523438, |
|
"loss": 0.5449, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.8029924631118774, |
|
"rewards/margins": 0.5775827765464783, |
|
"rewards/rejected": -2.38057541847229, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.649499101407737e-06, |
|
"logits/chosen": -2.363370895385742, |
|
"logits/rejected": -2.2048511505126953, |
|
"logps/chosen": -224.8810577392578, |
|
"logps/rejected": -214.8511505126953, |
|
"loss": 0.5825, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.7866973876953125, |
|
"rewards/margins": 0.5168424844741821, |
|
"rewards/rejected": -3.303539991378784, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.5725275758125564e-06, |
|
"logits/chosen": -2.27677059173584, |
|
"logits/rejected": -2.1246485710144043, |
|
"logps/chosen": -200.77066040039062, |
|
"logps/rejected": -190.68533325195312, |
|
"loss": 0.4805, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.6391139030456543, |
|
"rewards/margins": 0.7239618897438049, |
|
"rewards/rejected": -3.3630757331848145, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.494289060578478e-06, |
|
"logits/chosen": -2.3822944164276123, |
|
"logits/rejected": -2.2915594577789307, |
|
"logps/chosen": -169.66128540039062, |
|
"logps/rejected": -174.19607543945312, |
|
"loss": 0.4812, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.6458194255828857, |
|
"rewards/margins": 0.7625138163566589, |
|
"rewards/rejected": -2.4083335399627686, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.414875979798272e-06, |
|
"logits/chosen": -2.327730178833008, |
|
"logits/rejected": -2.2062036991119385, |
|
"logps/chosen": -194.1064453125, |
|
"logps/rejected": -193.63107299804688, |
|
"loss": 0.4394, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.4300081729888916, |
|
"rewards/margins": 0.892192006111145, |
|
"rewards/rejected": -2.322200059890747, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3343821450930196e-06, |
|
"logits/chosen": -2.454336166381836, |
|
"logits/rejected": -2.2937960624694824, |
|
"logps/chosen": -201.33657836914062, |
|
"logps/rejected": -188.39266967773438, |
|
"loss": 0.4758, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.0642800331115723, |
|
"rewards/margins": 0.8783187866210938, |
|
"rewards/rejected": -2.942598819732666, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.252902644791325e-06, |
|
"logits/chosen": -2.188424587249756, |
|
"logits/rejected": -2.041105031967163, |
|
"logps/chosen": -214.7936248779297, |
|
"logps/rejected": -222.3824462890625, |
|
"loss": 0.4536, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.7383971214294434, |
|
"rewards/margins": 0.9385073781013489, |
|
"rewards/rejected": -3.6769042015075684, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.170533731600339e-06, |
|
"logits/chosen": -2.3067922592163086, |
|
"logits/rejected": -2.1540229320526123, |
|
"logps/chosen": -204.9595184326172, |
|
"logps/rejected": -204.6565704345703, |
|
"loss": 0.4887, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.3199477195739746, |
|
"rewards/margins": 0.8953973650932312, |
|
"rewards/rejected": -3.2153449058532715, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.0873727089012816e-06, |
|
"logits/chosen": -2.4507811069488525, |
|
"logits/rejected": -2.357919931411743, |
|
"logps/chosen": -212.8636932373047, |
|
"logps/rejected": -208.5207977294922, |
|
"loss": 0.5393, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.4198460578918457, |
|
"rewards/margins": 0.7457529306411743, |
|
"rewards/rejected": -3.1655986309051514, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0035178158038026e-06, |
|
"logits/chosen": -2.2298638820648193, |
|
"logits/rejected": -2.035947322845459, |
|
"logps/chosen": -217.26846313476562, |
|
"logps/rejected": -209.74526977539062, |
|
"loss": 0.4886, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.4076380729675293, |
|
"rewards/margins": 0.9488040208816528, |
|
"rewards/rejected": -3.3564422130584717, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.919068111094937e-06, |
|
"logits/chosen": -2.3187146186828613, |
|
"logits/rejected": -2.193861484527588, |
|
"logps/chosen": -185.0901641845703, |
|
"logps/rejected": -195.90634155273438, |
|
"loss": 0.552, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.132998466491699, |
|
"rewards/margins": 0.827830970287323, |
|
"rewards/rejected": -2.960829257965088, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8341233562197895e-06, |
|
"logits/chosen": -2.3116376399993896, |
|
"logits/rejected": -2.246950626373291, |
|
"logps/chosen": -172.0282440185547, |
|
"logps/rejected": -177.47348022460938, |
|
"loss": 0.5152, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.7447055578231812, |
|
"rewards/margins": 0.754051685333252, |
|
"rewards/rejected": -2.4987568855285645, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7487838974321352e-06, |
|
"logits/chosen": -2.2577805519104004, |
|
"logits/rejected": -2.143658399581909, |
|
"logps/chosen": -178.53317260742188, |
|
"logps/rejected": -182.5507049560547, |
|
"loss": 0.4719, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.789046287536621, |
|
"rewards/margins": 0.8771921396255493, |
|
"rewards/rejected": -2.666238307952881, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6631505472541997e-06, |
|
"logits/chosen": -2.2621216773986816, |
|
"logits/rejected": -2.1167335510253906, |
|
"logps/chosen": -193.0259246826172, |
|
"logps/rejected": -198.51705932617188, |
|
"loss": 0.4442, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.8945804834365845, |
|
"rewards/margins": 1.0066120624542236, |
|
"rewards/rejected": -2.9011926651000977, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5773244653856173e-06, |
|
"logits/chosen": -2.2354609966278076, |
|
"logits/rejected": -2.1045310497283936, |
|
"logps/chosen": -205.27163696289062, |
|
"logps/rejected": -207.8455810546875, |
|
"loss": 0.4791, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.161865711212158, |
|
"rewards/margins": 0.9317368268966675, |
|
"rewards/rejected": -3.0936026573181152, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4914070392022717e-06, |
|
"logits/chosen": -2.274534225463867, |
|
"logits/rejected": -2.158811569213867, |
|
"logps/chosen": -208.08349609375, |
|
"logps/rejected": -211.2762451171875, |
|
"loss": 0.4978, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.0876078605651855, |
|
"rewards/margins": 0.8888559341430664, |
|
"rewards/rejected": -2.976463794708252, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4054997639861778e-06, |
|
"logits/chosen": -2.1874241828918457, |
|
"logits/rejected": -2.0126781463623047, |
|
"logps/chosen": -207.1369171142578, |
|
"logps/rejected": -207.59768676757812, |
|
"loss": 0.4732, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0929489135742188, |
|
"rewards/margins": 1.14609694480896, |
|
"rewards/rejected": -3.2390456199645996, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3197041230278905e-06, |
|
"logits/chosen": -2.3066487312316895, |
|
"logits/rejected": -2.18741512298584, |
|
"logps/chosen": -204.58642578125, |
|
"logps/rejected": -222.44973754882812, |
|
"loss": 0.427, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.2493700981140137, |
|
"rewards/margins": 1.2541824579238892, |
|
"rewards/rejected": -3.5035526752471924, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.234121467743082e-06, |
|
"logits/chosen": -2.3349661827087402, |
|
"logits/rejected": -2.252894163131714, |
|
"logps/chosen": -208.3444061279297, |
|
"logps/rejected": -211.37295532226562, |
|
"loss": 0.5968, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.635042905807495, |
|
"rewards/margins": 0.8392454981803894, |
|
"rewards/rejected": -3.4742884635925293, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.148852897944905e-06, |
|
"logits/chosen": -2.3977198600769043, |
|
"logits/rejected": -2.2428812980651855, |
|
"logps/chosen": -208.42538452148438, |
|
"logps/rejected": -219.8905487060547, |
|
"loss": 0.4249, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.645089626312256, |
|
"rewards/margins": 1.1129385232925415, |
|
"rewards/rejected": -3.758028030395508, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_logits/chosen": -2.17734956741333, |
|
"eval_logits/rejected": -2.078249454498291, |
|
"eval_logps/chosen": -237.10446166992188, |
|
"eval_logps/rejected": -241.38665771484375, |
|
"eval_loss": 0.5327094793319702, |
|
"eval_rewards/accuracies": 0.724056601524353, |
|
"eval_rewards/chosen": -3.455418348312378, |
|
"eval_rewards/margins": 0.8692519068717957, |
|
"eval_rewards/rejected": -4.324670314788818, |
|
"eval_runtime": 423.7708, |
|
"eval_samples_per_second": 0.986, |
|
"eval_steps_per_second": 0.125, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.063999142413574e-06, |
|
"logits/chosen": -2.2915334701538086, |
|
"logits/rejected": -2.1494946479797363, |
|
"logps/chosen": -247.0482177734375, |
|
"logps/rejected": -239.52462768554688, |
|
"loss": 0.489, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.5384364128112793, |
|
"rewards/margins": 0.9590060114860535, |
|
"rewards/rejected": -4.497443199157715, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9796604399042547e-06, |
|
"logits/chosen": -2.3757712841033936, |
|
"logits/rejected": -2.2203211784362793, |
|
"logps/chosen": -268.9779052734375, |
|
"logps/rejected": -274.21771240234375, |
|
"loss": 0.4595, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -4.319927215576172, |
|
"rewards/margins": 1.1538012027740479, |
|
"rewards/rejected": -5.473728179931641, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8959364207338216e-06, |
|
"logits/chosen": -2.345416784286499, |
|
"logits/rejected": -2.180387020111084, |
|
"logps/chosen": -245.0520477294922, |
|
"logps/rejected": -258.79498291015625, |
|
"loss": 0.4835, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -4.156452655792236, |
|
"rewards/margins": 1.1458499431610107, |
|
"rewards/rejected": -5.302302360534668, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8129259890863825e-06, |
|
"logits/chosen": -2.3113701343536377, |
|
"logits/rejected": -2.200329303741455, |
|
"logps/chosen": -258.2785949707031, |
|
"logps/rejected": -273.8367614746094, |
|
"loss": 0.5494, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -3.8280506134033203, |
|
"rewards/margins": 1.0454813241958618, |
|
"rewards/rejected": -4.873531818389893, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7307272061765738e-06, |
|
"logits/chosen": -2.332291841506958, |
|
"logits/rejected": -2.238374948501587, |
|
"logps/chosen": -245.2693328857422, |
|
"logps/rejected": -258.13922119140625, |
|
"loss": 0.4966, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.4873454570770264, |
|
"rewards/margins": 1.1437709331512451, |
|
"rewards/rejected": -4.6311163902282715, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.649437174408685e-06, |
|
"logits/chosen": -2.2653393745422363, |
|
"logits/rejected": -2.1328892707824707, |
|
"logps/chosen": -225.3964385986328, |
|
"logps/rejected": -243.22384643554688, |
|
"loss": 0.5442, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.7976624965667725, |
|
"rewards/margins": 1.2127068042755127, |
|
"rewards/rejected": -4.010369300842285, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.569151922668422e-06, |
|
"logits/chosen": -2.367928981781006, |
|
"logits/rejected": -2.253605365753174, |
|
"logps/chosen": -201.37025451660156, |
|
"logps/rejected": -218.9671630859375, |
|
"loss": 0.5362, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.52730131149292, |
|
"rewards/margins": 1.1274640560150146, |
|
"rewards/rejected": -3.6547648906707764, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4899662928828428e-06, |
|
"logits/chosen": -2.309407949447632, |
|
"logits/rejected": -2.1246845722198486, |
|
"logps/chosen": -186.25376892089844, |
|
"logps/rejected": -205.3202667236328, |
|
"loss": 0.4534, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.198615550994873, |
|
"rewards/margins": 1.3432915210723877, |
|
"rewards/rejected": -3.5419068336486816, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4119738279824507e-06, |
|
"logits/chosen": -2.233764886856079, |
|
"logits/rejected": -2.0911850929260254, |
|
"logps/chosen": -194.0051727294922, |
|
"logps/rejected": -202.19174194335938, |
|
"loss": 0.5816, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.272157907485962, |
|
"rewards/margins": 0.9171239137649536, |
|
"rewards/rejected": -3.189281463623047, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3352666613978152e-06, |
|
"logits/chosen": -2.1566410064697266, |
|
"logits/rejected": -2.075209379196167, |
|
"logps/chosen": -198.47071838378906, |
|
"logps/rejected": -202.29379272460938, |
|
"loss": 0.5355, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.325533151626587, |
|
"rewards/margins": 0.8646724820137024, |
|
"rewards/rejected": -3.1902058124542236, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2599354082212523e-06, |
|
"logits/chosen": -2.222766876220703, |
|
"logits/rejected": -2.0928432941436768, |
|
"logps/chosen": -190.0797119140625, |
|
"logps/rejected": -198.69985961914062, |
|
"loss": 0.5152, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.1240830421447754, |
|
"rewards/margins": 0.9723888635635376, |
|
"rewards/rejected": -3.0964715480804443, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.186069058162127e-06, |
|
"logits/chosen": -2.3559908866882324, |
|
"logits/rejected": -2.249436140060425, |
|
"logps/chosen": -197.04818725585938, |
|
"logps/rejected": -201.38693237304688, |
|
"loss": 0.5537, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.438502788543701, |
|
"rewards/margins": 0.8104500770568848, |
|
"rewards/rejected": -3.248952865600586, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.113754870422254e-06, |
|
"logits/chosen": -2.40106201171875, |
|
"logits/rejected": -2.2911598682403564, |
|
"logps/chosen": -198.04409790039062, |
|
"logps/rejected": -214.9007568359375, |
|
"loss": 0.5454, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.3586318492889404, |
|
"rewards/margins": 0.8204809427261353, |
|
"rewards/rejected": -3.179112672805786, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0430782706155545e-06, |
|
"logits/chosen": -2.4164676666259766, |
|
"logits/rejected": -2.2673213481903076, |
|
"logps/chosen": -204.253173828125, |
|
"logps/rejected": -209.43753051757812, |
|
"loss": 0.45, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.2196271419525146, |
|
"rewards/margins": 1.149065613746643, |
|
"rewards/rejected": -3.3686928749084473, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.741227498537615e-07, |
|
"logits/chosen": -2.457432270050049, |
|
"logits/rejected": -2.321898937225342, |
|
"logps/chosen": -194.0103302001953, |
|
"logps/rejected": -204.46145629882812, |
|
"loss": 0.4596, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.4977972507476807, |
|
"rewards/margins": 1.0296275615692139, |
|
"rewards/rejected": -3.5274243354797363, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.069697661173668e-07, |
|
"logits/chosen": -2.3087573051452637, |
|
"logits/rejected": -2.210942268371582, |
|
"logps/chosen": -205.71200561523438, |
|
"logps/rejected": -217.53921508789062, |
|
"loss": 0.6425, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.667379379272461, |
|
"rewards/margins": 0.8116022348403931, |
|
"rewards/rejected": -3.4789810180664062, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.416986480283434e-07, |
|
"logits/chosen": -2.2393643856048584, |
|
"logits/rejected": -2.096648693084717, |
|
"logps/chosen": -205.7020721435547, |
|
"logps/rejected": -220.5606689453125, |
|
"loss": 0.5619, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.711803913116455, |
|
"rewards/margins": 1.0723652839660645, |
|
"rewards/rejected": -3.7841694355010986, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.783865011382876e-07, |
|
"logits/chosen": -2.2971372604370117, |
|
"logits/rejected": -2.1558918952941895, |
|
"logps/chosen": -213.41958618164062, |
|
"logps/rejected": -214.00326538085938, |
|
"loss": 0.5043, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.363002300262451, |
|
"rewards/margins": 1.088847279548645, |
|
"rewards/rejected": -3.4518496990203857, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.171081168427205e-07, |
|
"logits/chosen": -2.331343173980713, |
|
"logits/rejected": -2.165480613708496, |
|
"logps/chosen": -221.4240264892578, |
|
"logps/rejected": -226.91650390625, |
|
"loss": 0.473, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.2688238620758057, |
|
"rewards/margins": 1.2109057903289795, |
|
"rewards/rejected": -3.479729413986206, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.579358840291064e-07, |
|
"logits/chosen": -2.2774598598480225, |
|
"logits/rejected": -2.1744918823242188, |
|
"logps/chosen": -209.7314453125, |
|
"logps/rejected": -224.2617645263672, |
|
"loss": 0.5858, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.5639281272888184, |
|
"rewards/margins": 0.7768001556396484, |
|
"rewards/rejected": -3.340728282928467, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_logits/chosen": -2.159109354019165, |
|
"eval_logits/rejected": -2.058640241622925, |
|
"eval_logps/chosen": -205.4982452392578, |
|
"eval_logps/rejected": -212.2717742919922, |
|
"eval_loss": 0.5206710696220398, |
|
"eval_rewards/accuracies": 0.7334905862808228, |
|
"eval_rewards/chosen": -2.507232666015625, |
|
"eval_rewards/margins": 0.9439911842346191, |
|
"eval_rewards/rejected": -3.4512243270874023, |
|
"eval_runtime": 423.3516, |
|
"eval_samples_per_second": 0.987, |
|
"eval_steps_per_second": 0.125, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.00939703563006e-07, |
|
"logits/chosen": -2.3462395668029785, |
|
"logits/rejected": -2.2609333992004395, |
|
"logps/chosen": -191.94070434570312, |
|
"logps/rejected": -208.1775665283203, |
|
"loss": 0.5793, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.5009541511535645, |
|
"rewards/margins": 0.8846192359924316, |
|
"rewards/rejected": -3.385573625564575, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.461869057133412e-07, |
|
"logits/chosen": -2.386518955230713, |
|
"logits/rejected": -2.2898330688476562, |
|
"logps/chosen": -198.1256866455078, |
|
"logps/rejected": -216.5961456298828, |
|
"loss": 0.4895, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.2799556255340576, |
|
"rewards/margins": 1.0353368520736694, |
|
"rewards/rejected": -3.3152928352355957, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.937421706143497e-07, |
|
"logits/chosen": -2.2601094245910645, |
|
"logits/rejected": -2.1199710369110107, |
|
"logps/chosen": -208.54898071289062, |
|
"logps/rejected": -210.0823974609375, |
|
"loss": 0.5261, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.4491305351257324, |
|
"rewards/margins": 0.9635556936264038, |
|
"rewards/rejected": -3.412686586380005, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.43667451858166e-07, |
|
"logits/chosen": -2.262453079223633, |
|
"logits/rejected": -2.0967936515808105, |
|
"logps/chosen": -188.5206298828125, |
|
"logps/rejected": -202.87240600585938, |
|
"loss": 0.4361, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.2963976860046387, |
|
"rewards/margins": 1.180870771408081, |
|
"rewards/rejected": -3.4772682189941406, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9602190330830484e-07, |
|
"logits/chosen": -2.2342655658721924, |
|
"logits/rejected": -2.130017042160034, |
|
"logps/chosen": -200.30343627929688, |
|
"logps/rejected": -220.6678466796875, |
|
"loss": 0.4885, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.533165454864502, |
|
"rewards/margins": 1.0824253559112549, |
|
"rewards/rejected": -3.6155905723571777, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.5086180922049295e-07, |
|
"logits/chosen": -2.3764655590057373, |
|
"logits/rejected": -2.235109329223633, |
|
"logps/chosen": -224.97146606445312, |
|
"logps/rejected": -221.2453155517578, |
|
"loss": 0.4675, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.4995951652526855, |
|
"rewards/margins": 1.0894619226455688, |
|
"rewards/rejected": -3.589057445526123, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.0824051775340895e-07, |
|
"logits/chosen": -2.380509853363037, |
|
"logits/rejected": -2.275191068649292, |
|
"logps/chosen": -180.95895385742188, |
|
"logps/rejected": -195.82479858398438, |
|
"loss": 0.547, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.3850598335266113, |
|
"rewards/margins": 0.8363308906555176, |
|
"rewards/rejected": -3.22139048576355, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.6820837794786336e-07, |
|
"logits/chosen": -2.2321319580078125, |
|
"logits/rejected": -2.1590046882629395, |
|
"logps/chosen": -204.28173828125, |
|
"logps/rejected": -214.01327514648438, |
|
"loss": 0.6701, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.5836868286132812, |
|
"rewards/margins": 0.4722086787223816, |
|
"rewards/rejected": -3.0558953285217285, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.3081268024887694e-07, |
|
"logits/chosen": -2.222381114959717, |
|
"logits/rejected": -2.051706075668335, |
|
"logps/chosen": -199.63970947265625, |
|
"logps/rejected": -208.906005859375, |
|
"loss": 0.4139, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.3457841873168945, |
|
"rewards/margins": 1.2535767555236816, |
|
"rewards/rejected": -3.599360942840576, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.9609760064091044e-07, |
|
"logits/chosen": -2.3242409229278564, |
|
"logits/rejected": -2.2560477256774902, |
|
"logps/chosen": -205.27554321289062, |
|
"logps/rejected": -201.7273712158203, |
|
"loss": 0.5173, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.428300142288208, |
|
"rewards/margins": 0.8561038970947266, |
|
"rewards/rejected": -3.2844040393829346, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6410414846224992e-07, |
|
"logits/chosen": -2.220360517501831, |
|
"logits/rejected": -2.109575033187866, |
|
"logps/chosen": -201.75045776367188, |
|
"logps/rejected": -214.3638458251953, |
|
"loss": 0.4565, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.569467067718506, |
|
"rewards/margins": 1.1193673610687256, |
|
"rewards/rejected": -3.6888339519500732, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.348701179601819e-07, |
|
"logits/chosen": -2.401984691619873, |
|
"logits/rejected": -2.2650654315948486, |
|
"logps/chosen": -215.353515625, |
|
"logps/rejected": -225.1014404296875, |
|
"loss": 0.4529, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.38716983795166, |
|
"rewards/margins": 1.1682064533233643, |
|
"rewards/rejected": -3.5553765296936035, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.0843004364420151e-07, |
|
"logits/chosen": -2.2123489379882812, |
|
"logits/rejected": -2.100048065185547, |
|
"logps/chosen": -215.25479125976562, |
|
"logps/rejected": -229.445556640625, |
|
"loss": 0.5697, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.393723964691162, |
|
"rewards/margins": 0.8360812067985535, |
|
"rewards/rejected": -3.2298049926757812, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.481515948997931e-08, |
|
"logits/chosen": -2.3680367469787598, |
|
"logits/rejected": -2.26953387260437, |
|
"logps/chosen": -216.99514770507812, |
|
"logps/rejected": -212.17263793945312, |
|
"loss": 0.6119, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.478694200515747, |
|
"rewards/margins": 0.7769169807434082, |
|
"rewards/rejected": -3.255610942840576, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.4053362042297e-08, |
|
"logits/chosen": -2.2625975608825684, |
|
"logits/rejected": -2.104025363922119, |
|
"logps/chosen": -205.9175262451172, |
|
"logps/rejected": -217.52304077148438, |
|
"loss": 0.4583, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.4613893032073975, |
|
"rewards/margins": 1.17227303981781, |
|
"rewards/rejected": -3.633662462234497, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.616917746052163e-08, |
|
"logits/chosen": -2.3516554832458496, |
|
"logits/rejected": -2.214130401611328, |
|
"logps/chosen": -203.63438415527344, |
|
"logps/rejected": -211.27334594726562, |
|
"loss": 0.546, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.5478570461273193, |
|
"rewards/margins": 0.990521252155304, |
|
"rewards/rejected": -3.5383784770965576, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.118373254556412e-08, |
|
"logits/chosen": -2.3878164291381836, |
|
"logits/rejected": -2.2502613067626953, |
|
"logps/chosen": -198.12130737304688, |
|
"logps/rejected": -199.36111450195312, |
|
"loss": 0.4535, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.448812484741211, |
|
"rewards/margins": 1.0348026752471924, |
|
"rewards/rejected": -3.483614683151245, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.9114729782535037e-08, |
|
"logits/chosen": -2.4108872413635254, |
|
"logits/rejected": -2.297142505645752, |
|
"logps/chosen": -194.57632446289062, |
|
"logps/rejected": -202.7489776611328, |
|
"loss": 0.4981, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.350036859512329, |
|
"rewards/margins": 0.9002410173416138, |
|
"rewards/rejected": -3.2502777576446533, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.97642642858815e-09, |
|
"logits/chosen": -2.28024959564209, |
|
"logits/rejected": -2.139801502227783, |
|
"logps/chosen": -211.3317413330078, |
|
"logps/rejected": -211.1747283935547, |
|
"loss": 0.5021, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.55255126953125, |
|
"rewards/margins": 0.9248201251029968, |
|
"rewards/rejected": -3.4773712158203125, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.779617670651436e-09, |
|
"logits/chosen": -2.248671054840088, |
|
"logits/rejected": -2.1480166912078857, |
|
"logps/chosen": -217.71707153320312, |
|
"logps/rejected": -225.20040893554688, |
|
"loss": 0.6128, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.73987078666687, |
|
"rewards/margins": 0.9156008958816528, |
|
"rewards/rejected": -3.6554713249206543, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_logits/chosen": -2.1620335578918457, |
|
"eval_logits/rejected": -2.062356948852539, |
|
"eval_logps/chosen": -206.584716796875, |
|
"eval_logps/rejected": -214.93492126464844, |
|
"eval_loss": 0.521207869052887, |
|
"eval_rewards/accuracies": 0.7405660152435303, |
|
"eval_rewards/chosen": -2.5398268699645996, |
|
"eval_rewards/margins": 0.9912916421890259, |
|
"eval_rewards/rejected": -3.531118631362915, |
|
"eval_runtime": 423.4333, |
|
"eval_samples_per_second": 0.987, |
|
"eval_steps_per_second": 0.125, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.316238729444201e-10, |
|
"logits/chosen": -2.234221935272217, |
|
"logits/rejected": -2.0627448558807373, |
|
"logps/chosen": -209.44503784179688, |
|
"logps/rejected": -219.24606323242188, |
|
"loss": 0.4684, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.601386547088623, |
|
"rewards/margins": 1.190915584564209, |
|
"rewards/rejected": -3.792301893234253, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1016, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5583291621658746, |
|
"train_runtime": 16785.8838, |
|
"train_samples_per_second": 0.484, |
|
"train_steps_per_second": 0.061 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1016, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|