|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 2000, |
|
"global_step": 4168, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0002399232245681382, |
|
"grad_norm": 4.854994271032306, |
|
"learning_rate": 1.199040767386091e-09, |
|
"logits/chosen": -0.3870464563369751, |
|
"logits/rejected": -0.3449973464012146, |
|
"logps/chosen": -161.37554931640625, |
|
"logps/rejected": -150.78668212890625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0023992322456813818, |
|
"grad_norm": 5.007999188048557, |
|
"learning_rate": 1.199040767386091e-08, |
|
"logits/chosen": -0.38755929470062256, |
|
"logits/rejected": -0.40367352962493896, |
|
"logps/chosen": -389.556640625, |
|
"logps/rejected": -313.19439697265625, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5277777910232544, |
|
"rewards/chosen": 0.000382223108317703, |
|
"rewards/margins": 0.000817837193608284, |
|
"rewards/rejected": -0.0004356140270829201, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0047984644913627635, |
|
"grad_norm": 5.4251682517128685, |
|
"learning_rate": 2.398081534772182e-08, |
|
"logits/chosen": -0.40382856130599976, |
|
"logits/rejected": -0.4116736352443695, |
|
"logps/chosen": -253.2971649169922, |
|
"logps/rejected": -222.39187622070312, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.0011010636808350682, |
|
"rewards/margins": 0.0007788551738485694, |
|
"rewards/rejected": -0.0018799189710989594, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007197696737044146, |
|
"grad_norm": 4.855409710988802, |
|
"learning_rate": 3.597122302158273e-08, |
|
"logits/chosen": -0.37188905477523804, |
|
"logits/rejected": -0.41493088006973267, |
|
"logps/chosen": -264.1092834472656, |
|
"logps/rejected": -276.79327392578125, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0006352605996653438, |
|
"rewards/margins": 0.0009311493486166, |
|
"rewards/rejected": -0.001566409831866622, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.009596928982725527, |
|
"grad_norm": 4.773650762501925, |
|
"learning_rate": 4.796163069544364e-08, |
|
"logits/chosen": -0.4388062357902527, |
|
"logits/rejected": -0.4551084041595459, |
|
"logps/chosen": -283.5164489746094, |
|
"logps/rejected": -264.680419921875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -6.967745866859332e-05, |
|
"rewards/margins": 0.0002998415438923985, |
|
"rewards/rejected": -0.0003695189952850342, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01199616122840691, |
|
"grad_norm": 5.249008388011441, |
|
"learning_rate": 5.995203836930455e-08, |
|
"logits/chosen": -0.44028440117836, |
|
"logits/rejected": -0.41670793294906616, |
|
"logps/chosen": -289.8959045410156, |
|
"logps/rejected": -249.32876586914062, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.00012158080789959058, |
|
"rewards/margins": 0.0013795382110401988, |
|
"rewards/rejected": -0.0012579575413838029, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.014395393474088292, |
|
"grad_norm": 5.451149941549421, |
|
"learning_rate": 7.194244604316546e-08, |
|
"logits/chosen": -0.41042596101760864, |
|
"logits/rejected": -0.39021745324134827, |
|
"logps/chosen": -293.70989990234375, |
|
"logps/rejected": -274.8219909667969, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.00042934497469104826, |
|
"rewards/margins": 5.6701617722865194e-05, |
|
"rewards/rejected": 0.00037264340789988637, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.016794625719769675, |
|
"grad_norm": 4.9176821013212955, |
|
"learning_rate": 8.393285371702638e-08, |
|
"logits/chosen": -0.37676185369491577, |
|
"logits/rejected": -0.3675565719604492, |
|
"logps/chosen": -300.6470031738281, |
|
"logps/rejected": -285.7118225097656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.0005408605793491006, |
|
"rewards/margins": -0.0005098087713122368, |
|
"rewards/rejected": -3.105172800133005e-05, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.019193857965451054, |
|
"grad_norm": 5.324153930467038, |
|
"learning_rate": 9.592326139088728e-08, |
|
"logits/chosen": -0.3833390474319458, |
|
"logits/rejected": -0.3485874533653259, |
|
"logps/chosen": -202.55172729492188, |
|
"logps/rejected": -266.27801513671875, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0002449182793498039, |
|
"rewards/margins": -0.00045424007112160325, |
|
"rewards/rejected": 0.00020932205370627344, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.021593090211132437, |
|
"grad_norm": 4.902493446985089, |
|
"learning_rate": 1.0791366906474819e-07, |
|
"logits/chosen": -0.42354243993759155, |
|
"logits/rejected": -0.4241662621498108, |
|
"logps/chosen": -345.2079162597656, |
|
"logps/rejected": -297.83392333984375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.00027865776792168617, |
|
"rewards/margins": -7.414491847157478e-05, |
|
"rewards/rejected": -0.00020451273303478956, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02399232245681382, |
|
"grad_norm": 5.335283741962904, |
|
"learning_rate": 1.199040767386091e-07, |
|
"logits/chosen": -0.3970239758491516, |
|
"logits/rejected": -0.36344924569129944, |
|
"logps/chosen": -279.8683166503906, |
|
"logps/rejected": -301.5334167480469, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0004452617431525141, |
|
"rewards/margins": 0.0009495856938883662, |
|
"rewards/rejected": -0.0005043239216320217, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.026391554702495202, |
|
"grad_norm": 4.56016585237767, |
|
"learning_rate": 1.3189448441247004e-07, |
|
"logits/chosen": -0.3945187032222748, |
|
"logits/rejected": -0.40393415093421936, |
|
"logps/chosen": -245.4661102294922, |
|
"logps/rejected": -244.88955688476562, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0013859450118616223, |
|
"rewards/margins": -9.293450420955196e-05, |
|
"rewards/rejected": -0.0012930103112012148, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.028790786948176585, |
|
"grad_norm": 5.117639055978934, |
|
"learning_rate": 1.4388489208633092e-07, |
|
"logits/chosen": -0.3993036150932312, |
|
"logits/rejected": -0.413900762796402, |
|
"logps/chosen": -301.570068359375, |
|
"logps/rejected": -287.91998291015625, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0009120000759139657, |
|
"rewards/margins": 0.002243091817945242, |
|
"rewards/rejected": -0.003155091777443886, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.031190019193857964, |
|
"grad_norm": 4.533876465192458, |
|
"learning_rate": 1.5587529976019183e-07, |
|
"logits/chosen": -0.40214699506759644, |
|
"logits/rejected": -0.4113968014717102, |
|
"logps/chosen": -219.7006072998047, |
|
"logps/rejected": -321.1871337890625, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0008342149667441845, |
|
"rewards/margins": 0.0029901477973908186, |
|
"rewards/rejected": -0.0038243632297962904, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03358925143953935, |
|
"grad_norm": 5.008152220168436, |
|
"learning_rate": 1.6786570743405277e-07, |
|
"logits/chosen": -0.33501917123794556, |
|
"logits/rejected": -0.3522827625274658, |
|
"logps/chosen": -314.4662780761719, |
|
"logps/rejected": -304.34869384765625, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0010583497351035476, |
|
"rewards/margins": 0.004176832735538483, |
|
"rewards/rejected": -0.005235183052718639, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03598848368522073, |
|
"grad_norm": 5.230326203624785, |
|
"learning_rate": 1.7985611510791365e-07, |
|
"logits/chosen": -0.3923170566558838, |
|
"logits/rejected": -0.3953098952770233, |
|
"logps/chosen": -236.24685668945312, |
|
"logps/rejected": -234.350830078125, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.0009860789868980646, |
|
"rewards/margins": 0.004025029484182596, |
|
"rewards/rejected": -0.0050111087039113045, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03838771593090211, |
|
"grad_norm": 5.013822574107228, |
|
"learning_rate": 1.9184652278177456e-07, |
|
"logits/chosen": -0.31543251872062683, |
|
"logits/rejected": -0.3109430968761444, |
|
"logps/chosen": -316.65985107421875, |
|
"logps/rejected": -250.0377960205078, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.0033806234132498503, |
|
"rewards/margins": 0.0072640664875507355, |
|
"rewards/rejected": -0.010644689202308655, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.040786948176583494, |
|
"grad_norm": 4.673205835529891, |
|
"learning_rate": 2.038369304556355e-07, |
|
"logits/chosen": -0.3539220094680786, |
|
"logits/rejected": -0.3610088527202606, |
|
"logps/chosen": -352.55316162109375, |
|
"logps/rejected": -340.3455810546875, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.0013684859732165933, |
|
"rewards/margins": 0.009599483571946621, |
|
"rewards/rejected": -0.01096796989440918, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04318618042226487, |
|
"grad_norm": 5.280440292000614, |
|
"learning_rate": 2.1582733812949638e-07, |
|
"logits/chosen": -0.414604127407074, |
|
"logits/rejected": -0.40983182191848755, |
|
"logps/chosen": -251.31930541992188, |
|
"logps/rejected": -246.71249389648438, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0037162289954721928, |
|
"rewards/margins": 0.016576949506998062, |
|
"rewards/rejected": -0.020293179899454117, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04558541266794626, |
|
"grad_norm": 6.110987926729073, |
|
"learning_rate": 2.278177458033573e-07, |
|
"logits/chosen": -0.37201982736587524, |
|
"logits/rejected": -0.3684994578361511, |
|
"logps/chosen": -334.75994873046875, |
|
"logps/rejected": -276.002197265625, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0038164921570569277, |
|
"rewards/margins": 0.0039043619763106108, |
|
"rewards/rejected": -0.0077208541333675385, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04798464491362764, |
|
"grad_norm": 4.693128517095156, |
|
"learning_rate": 2.398081534772182e-07, |
|
"logits/chosen": -0.3985927700996399, |
|
"logits/rejected": -0.3652064800262451, |
|
"logps/chosen": -327.9397888183594, |
|
"logps/rejected": -314.77703857421875, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.0021692051086574793, |
|
"rewards/margins": 0.02031012810766697, |
|
"rewards/rejected": -0.022479332983493805, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05038387715930902, |
|
"grad_norm": 4.82808785130016, |
|
"learning_rate": 2.517985611510791e-07, |
|
"logits/chosen": -0.416436105966568, |
|
"logits/rejected": -0.42406004667282104, |
|
"logps/chosen": -256.06634521484375, |
|
"logps/rejected": -279.0437316894531, |
|
"loss": 0.6868, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.006927967071533203, |
|
"rewards/margins": 0.01621558703482151, |
|
"rewards/rejected": -0.023143552243709564, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.052783109404990404, |
|
"grad_norm": 4.930218028749106, |
|
"learning_rate": 2.637889688249401e-07, |
|
"logits/chosen": -0.4129602015018463, |
|
"logits/rejected": -0.42116695642471313, |
|
"logps/chosen": -326.42987060546875, |
|
"logps/rejected": -336.3708801269531, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.011486181057989597, |
|
"rewards/margins": 0.01089237816631794, |
|
"rewards/rejected": -0.02237856015563011, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05518234165067178, |
|
"grad_norm": 5.209995561674771, |
|
"learning_rate": 2.7577937649880093e-07, |
|
"logits/chosen": -0.39636367559432983, |
|
"logits/rejected": -0.37121134996414185, |
|
"logps/chosen": -249.68606567382812, |
|
"logps/rejected": -287.6152038574219, |
|
"loss": 0.6826, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.014097055420279503, |
|
"rewards/margins": 0.01727898046374321, |
|
"rewards/rejected": -0.031376034021377563, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05758157389635317, |
|
"grad_norm": 5.637285290487847, |
|
"learning_rate": 2.8776978417266184e-07, |
|
"logits/chosen": -0.3950192332267761, |
|
"logits/rejected": -0.38908690214157104, |
|
"logps/chosen": -302.5347595214844, |
|
"logps/rejected": -257.3473205566406, |
|
"loss": 0.6811, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.006957252975553274, |
|
"rewards/margins": 0.02547440305352211, |
|
"rewards/rejected": -0.03243165463209152, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05998080614203455, |
|
"grad_norm": 5.523154228755914, |
|
"learning_rate": 2.997601918465228e-07, |
|
"logits/chosen": -0.3644653558731079, |
|
"logits/rejected": -0.35463160276412964, |
|
"logps/chosen": -244.20401000976562, |
|
"logps/rejected": -235.6724395751953, |
|
"loss": 0.6804, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.01351804006844759, |
|
"rewards/margins": 0.01623005047440529, |
|
"rewards/rejected": -0.029748091474175453, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06238003838771593, |
|
"grad_norm": 4.946428082161042, |
|
"learning_rate": 3.1175059952038366e-07, |
|
"logits/chosen": -0.44501185417175293, |
|
"logits/rejected": -0.39841514825820923, |
|
"logps/chosen": -285.33624267578125, |
|
"logps/rejected": -289.7425842285156, |
|
"loss": 0.6786, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.024796243757009506, |
|
"rewards/margins": 0.03235085308551788, |
|
"rewards/rejected": -0.05714709684252739, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0647792706333973, |
|
"grad_norm": 5.212561603527658, |
|
"learning_rate": 3.2374100719424457e-07, |
|
"logits/chosen": -0.3279297947883606, |
|
"logits/rejected": -0.38405635952949524, |
|
"logps/chosen": -295.61566162109375, |
|
"logps/rejected": -237.0383758544922, |
|
"loss": 0.676, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.021820012480020523, |
|
"rewards/margins": 0.008004983887076378, |
|
"rewards/rejected": -0.029824992641806602, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0671785028790787, |
|
"grad_norm": 5.188924605918914, |
|
"learning_rate": 3.3573141486810554e-07, |
|
"logits/chosen": -0.3912425935268402, |
|
"logits/rejected": -0.38816994428634644, |
|
"logps/chosen": -309.39617919921875, |
|
"logps/rejected": -302.08001708984375, |
|
"loss": 0.6706, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.01541377604007721, |
|
"rewards/margins": 0.04586619883775711, |
|
"rewards/rejected": -0.06127997115254402, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06957773512476008, |
|
"grad_norm": 4.7601490852750326, |
|
"learning_rate": 3.477218225419664e-07, |
|
"logits/chosen": -0.3547287583351135, |
|
"logits/rejected": -0.32382625341415405, |
|
"logps/chosen": -301.15460205078125, |
|
"logps/rejected": -276.5646667480469, |
|
"loss": 0.6714, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.021407341584563255, |
|
"rewards/margins": 0.03352125734090805, |
|
"rewards/rejected": -0.05492859333753586, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07197696737044146, |
|
"grad_norm": 5.589212470867438, |
|
"learning_rate": 3.597122302158273e-07, |
|
"logits/chosen": -0.4183027744293213, |
|
"logits/rejected": -0.4140304923057556, |
|
"logps/chosen": -278.68707275390625, |
|
"logps/rejected": -302.8797912597656, |
|
"loss": 0.6729, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.03756223991513252, |
|
"rewards/margins": 0.041960276663303375, |
|
"rewards/rejected": -0.0795225277543068, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07437619961612284, |
|
"grad_norm": 5.107027000775508, |
|
"learning_rate": 3.7170263788968827e-07, |
|
"logits/chosen": -0.38045555353164673, |
|
"logits/rejected": -0.3847430646419525, |
|
"logps/chosen": -288.02801513671875, |
|
"logps/rejected": -258.888671875, |
|
"loss": 0.6756, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.043108247220516205, |
|
"rewards/margins": 0.04828093200922012, |
|
"rewards/rejected": -0.09138917922973633, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07677543186180422, |
|
"grad_norm": 4.7093696590111955, |
|
"learning_rate": 3.836930455635491e-07, |
|
"logits/chosen": -0.3643363118171692, |
|
"logits/rejected": -0.35694578289985657, |
|
"logps/chosen": -284.8294372558594, |
|
"logps/rejected": -257.43133544921875, |
|
"loss": 0.6681, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.04265808314085007, |
|
"rewards/margins": 0.04907030984759331, |
|
"rewards/rejected": -0.09172839671373367, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.07917466410748561, |
|
"grad_norm": 4.910514032068699, |
|
"learning_rate": 3.9568345323741003e-07, |
|
"logits/chosen": -0.31883081793785095, |
|
"logits/rejected": -0.28674525022506714, |
|
"logps/chosen": -272.1695251464844, |
|
"logps/rejected": -318.9271240234375, |
|
"loss": 0.6639, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.08635957539081573, |
|
"rewards/margins": 0.05848981812596321, |
|
"rewards/rejected": -0.14484938979148865, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08157389635316699, |
|
"grad_norm": 5.0332599907751545, |
|
"learning_rate": 4.07673860911271e-07, |
|
"logits/chosen": -0.3443170189857483, |
|
"logits/rejected": -0.35223323106765747, |
|
"logps/chosen": -254.35726928710938, |
|
"logps/rejected": -281.94195556640625, |
|
"loss": 0.6642, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.04620979353785515, |
|
"rewards/margins": 0.08755537122488022, |
|
"rewards/rejected": -0.13376514613628387, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08397312859884837, |
|
"grad_norm": 5.508072548843049, |
|
"learning_rate": 4.1966426858513185e-07, |
|
"logits/chosen": -0.3824247717857361, |
|
"logits/rejected": -0.3841249346733093, |
|
"logps/chosen": -319.01995849609375, |
|
"logps/rejected": -318.2255554199219, |
|
"loss": 0.6667, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.08988650888204575, |
|
"rewards/margins": 0.05337555333971977, |
|
"rewards/rejected": -0.1432620733976364, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08637236084452975, |
|
"grad_norm": 5.385600660244676, |
|
"learning_rate": 4.3165467625899276e-07, |
|
"logits/chosen": -0.36046355962753296, |
|
"logits/rejected": -0.3866155743598938, |
|
"logps/chosen": -274.48846435546875, |
|
"logps/rejected": -234.24191284179688, |
|
"loss": 0.6672, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0927540734410286, |
|
"rewards/margins": 0.045326970517635345, |
|
"rewards/rejected": -0.13808102905750275, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.08877159309021113, |
|
"grad_norm": 5.5450235472250675, |
|
"learning_rate": 4.436450839328537e-07, |
|
"logits/chosen": -0.3677825331687927, |
|
"logits/rejected": -0.3560819625854492, |
|
"logps/chosen": -265.2862243652344, |
|
"logps/rejected": -291.4950256347656, |
|
"loss": 0.6608, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1145174503326416, |
|
"rewards/margins": 0.10159511864185333, |
|
"rewards/rejected": -0.21611256897449493, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09117082533589252, |
|
"grad_norm": 4.681358528597076, |
|
"learning_rate": 4.556354916067146e-07, |
|
"logits/chosen": -0.39217817783355713, |
|
"logits/rejected": -0.36524298787117004, |
|
"logps/chosen": -255.4202117919922, |
|
"logps/rejected": -269.22540283203125, |
|
"loss": 0.65, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.0964360311627388, |
|
"rewards/margins": 0.10729198157787323, |
|
"rewards/rejected": -0.20372800529003143, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.0935700575815739, |
|
"grad_norm": 5.3926706949666325, |
|
"learning_rate": 4.676258992805755e-07, |
|
"logits/chosen": -0.31998729705810547, |
|
"logits/rejected": -0.30327945947647095, |
|
"logps/chosen": -294.6556396484375, |
|
"logps/rejected": -271.11346435546875, |
|
"loss": 0.6514, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.11293824017047882, |
|
"rewards/margins": 0.07754337787628174, |
|
"rewards/rejected": -0.19048163294792175, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.09596928982725528, |
|
"grad_norm": 5.420438925013907, |
|
"learning_rate": 4.796163069544364e-07, |
|
"logits/chosen": -0.35674285888671875, |
|
"logits/rejected": -0.3888497054576874, |
|
"logps/chosen": -275.8892517089844, |
|
"logps/rejected": -273.18157958984375, |
|
"loss": 0.6491, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1275942027568817, |
|
"rewards/margins": 0.13519003987312317, |
|
"rewards/rejected": -0.26278427243232727, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09836852207293666, |
|
"grad_norm": 5.2798127437891775, |
|
"learning_rate": 4.916067146282974e-07, |
|
"logits/chosen": -0.362439900636673, |
|
"logits/rejected": -0.3545471131801605, |
|
"logps/chosen": -278.9376525878906, |
|
"logps/rejected": -321.1399841308594, |
|
"loss": 0.6355, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.15209965407848358, |
|
"rewards/margins": 0.1249600201845169, |
|
"rewards/rejected": -0.2770597040653229, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.10076775431861804, |
|
"grad_norm": 5.259650468657089, |
|
"learning_rate": 4.999992108529978e-07, |
|
"logits/chosen": -0.31682300567626953, |
|
"logits/rejected": -0.3138789236545563, |
|
"logps/chosen": -353.08843994140625, |
|
"logps/rejected": -335.11993408203125, |
|
"loss": 0.6426, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.1913050264120102, |
|
"rewards/margins": 0.15328899025917053, |
|
"rewards/rejected": -0.3445940315723419, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.10316698656429943, |
|
"grad_norm": 5.7901843585434305, |
|
"learning_rate": 4.999851817115532e-07, |
|
"logits/chosen": -0.4503898620605469, |
|
"logits/rejected": -0.406587690114975, |
|
"logps/chosen": -276.3069152832031, |
|
"logps/rejected": -298.9613342285156, |
|
"loss": 0.6437, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1920853555202484, |
|
"rewards/margins": 0.21766385436058044, |
|
"rewards/rejected": -0.40974926948547363, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.10556621880998081, |
|
"grad_norm": 5.5732351937176325, |
|
"learning_rate": 4.999536171027889e-07, |
|
"logits/chosen": -0.3798277974128723, |
|
"logits/rejected": -0.38922780752182007, |
|
"logps/chosen": -321.9649658203125, |
|
"logps/rejected": -325.6181640625, |
|
"loss": 0.6342, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.23407897353172302, |
|
"rewards/margins": 0.13595230877399445, |
|
"rewards/rejected": -0.37003129720687866, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.10796545105566219, |
|
"grad_norm": 6.182065387597852, |
|
"learning_rate": 4.999045192408369e-07, |
|
"logits/chosen": -0.28310176730155945, |
|
"logits/rejected": -0.28306809067726135, |
|
"logps/chosen": -274.2243347167969, |
|
"logps/rejected": -265.6375732421875, |
|
"loss": 0.6339, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2778427004814148, |
|
"rewards/margins": 0.08695127815008163, |
|
"rewards/rejected": -0.36479395627975464, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.11036468330134357, |
|
"grad_norm": 5.712218150903385, |
|
"learning_rate": 4.998378915697171e-07, |
|
"logits/chosen": -0.3742767870426178, |
|
"logits/rejected": -0.3694307804107666, |
|
"logps/chosen": -301.56683349609375, |
|
"logps/rejected": -318.60382080078125, |
|
"loss": 0.6177, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.2279098480939865, |
|
"rewards/margins": 0.23911185562610626, |
|
"rewards/rejected": -0.4670217037200928, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.11276391554702495, |
|
"grad_norm": 5.207613785560333, |
|
"learning_rate": 4.997537387630958e-07, |
|
"logits/chosen": -0.30902743339538574, |
|
"logits/rejected": -0.3139379322528839, |
|
"logps/chosen": -238.130859375, |
|
"logps/rejected": -261.97259521484375, |
|
"loss": 0.6099, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.21456749737262726, |
|
"rewards/margins": 0.2124086171388626, |
|
"rewards/rejected": -0.42697611451148987, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.11516314779270634, |
|
"grad_norm": 6.373579896233462, |
|
"learning_rate": 4.996520667239582e-07, |
|
"logits/chosen": -0.44946223497390747, |
|
"logits/rejected": -0.4517344534397125, |
|
"logps/chosen": -263.78277587890625, |
|
"logps/rejected": -343.26947021484375, |
|
"loss": 0.6111, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2376352846622467, |
|
"rewards/margins": 0.2664358913898468, |
|
"rewards/rejected": -0.5040711164474487, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.11756238003838772, |
|
"grad_norm": 6.494273680385882, |
|
"learning_rate": 4.995328825841939e-07, |
|
"logits/chosen": -0.32751840353012085, |
|
"logits/rejected": -0.33484649658203125, |
|
"logps/chosen": -246.5781707763672, |
|
"logps/rejected": -297.93353271484375, |
|
"loss": 0.6083, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.27829456329345703, |
|
"rewards/margins": 0.44562092423439026, |
|
"rewards/rejected": -0.7239154577255249, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.1199616122840691, |
|
"grad_norm": 6.156723989230307, |
|
"learning_rate": 4.993961947040967e-07, |
|
"logits/chosen": -0.3556281626224518, |
|
"logits/rejected": -0.37649574875831604, |
|
"logps/chosen": -335.5538024902344, |
|
"logps/rejected": -312.1348876953125, |
|
"loss": 0.6281, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4268563687801361, |
|
"rewards/margins": 0.14374002814292908, |
|
"rewards/rejected": -0.5705963373184204, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12236084452975048, |
|
"grad_norm": 5.5439607137629, |
|
"learning_rate": 4.992420126717784e-07, |
|
"logits/chosen": -0.39222702383995056, |
|
"logits/rejected": -0.3753407597541809, |
|
"logps/chosen": -280.47393798828125, |
|
"logps/rejected": -329.1144104003906, |
|
"loss": 0.6064, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.24301853775978088, |
|
"rewards/margins": 0.4458102285861969, |
|
"rewards/rejected": -0.6888288259506226, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.12476007677543186, |
|
"grad_norm": 6.961165174392256, |
|
"learning_rate": 4.990703473024958e-07, |
|
"logits/chosen": -0.3275012969970703, |
|
"logits/rejected": -0.35214173793792725, |
|
"logps/chosen": -332.95355224609375, |
|
"logps/rejected": -348.1015930175781, |
|
"loss": 0.6268, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.42038726806640625, |
|
"rewards/margins": 0.20834532380104065, |
|
"rewards/rejected": -0.6287325620651245, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.12715930902111325, |
|
"grad_norm": 6.451198360785239, |
|
"learning_rate": 4.98881210637893e-07, |
|
"logits/chosen": -0.35043126344680786, |
|
"logits/rejected": -0.3229239583015442, |
|
"logps/chosen": -256.13128662109375, |
|
"logps/rejected": -325.2506408691406, |
|
"loss": 0.6186, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.35000094771385193, |
|
"rewards/margins": 0.2998683452606201, |
|
"rewards/rejected": -0.6498693227767944, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.1295585412667946, |
|
"grad_norm": 5.138402880291277, |
|
"learning_rate": 4.986746159451553e-07, |
|
"logits/chosen": -0.2927325963973999, |
|
"logits/rejected": -0.2958449721336365, |
|
"logps/chosen": -296.5050964355469, |
|
"logps/rejected": -315.69061279296875, |
|
"loss": 0.6091, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2853540778160095, |
|
"rewards/margins": 0.2909182012081146, |
|
"rewards/rejected": -0.5762723088264465, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.131957773512476, |
|
"grad_norm": 5.337841343228059, |
|
"learning_rate": 4.984505777160795e-07, |
|
"logits/chosen": -0.2978525757789612, |
|
"logits/rejected": -0.3084144592285156, |
|
"logps/chosen": -360.9128112792969, |
|
"logps/rejected": -391.237060546875, |
|
"loss": 0.6252, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.40219253301620483, |
|
"rewards/margins": 0.2555919587612152, |
|
"rewards/rejected": -0.6577844619750977, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1343570057581574, |
|
"grad_norm": 5.899251959062901, |
|
"learning_rate": 4.982091116660574e-07, |
|
"logits/chosen": -0.44975343346595764, |
|
"logits/rejected": -0.4612964689731598, |
|
"logps/chosen": -247.9381561279297, |
|
"logps/rejected": -239.43057250976562, |
|
"loss": 0.6346, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.4517020285129547, |
|
"rewards/margins": 0.14873093366622925, |
|
"rewards/rejected": -0.6004330515861511, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.13675623800383876, |
|
"grad_norm": 6.842342229687067, |
|
"learning_rate": 4.979502347329732e-07, |
|
"logits/chosen": -0.3259963393211365, |
|
"logits/rejected": -0.32738104462623596, |
|
"logps/chosen": -363.0848083496094, |
|
"logps/rejected": -425.85650634765625, |
|
"loss": 0.6152, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.53682541847229, |
|
"rewards/margins": 0.35178542137145996, |
|
"rewards/rejected": -0.88861083984375, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.13915547024952016, |
|
"grad_norm": 7.709446348310538, |
|
"learning_rate": 4.976739650760151e-07, |
|
"logits/chosen": -0.4362337589263916, |
|
"logits/rejected": -0.4145421087741852, |
|
"logps/chosen": -322.2565002441406, |
|
"logps/rejected": -323.690673828125, |
|
"loss": 0.6084, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.503047525882721, |
|
"rewards/margins": 0.2108907401561737, |
|
"rewards/rejected": -0.7139382362365723, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.14155470249520152, |
|
"grad_norm": 7.389107341643373, |
|
"learning_rate": 4.97380322074402e-07, |
|
"logits/chosen": -0.3247602880001068, |
|
"logits/rejected": -0.33087000250816345, |
|
"logps/chosen": -276.3142395019531, |
|
"logps/rejected": -307.705078125, |
|
"loss": 0.6167, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4834076464176178, |
|
"rewards/margins": 0.29755571484565735, |
|
"rewards/rejected": -0.7809633016586304, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.14395393474088292, |
|
"grad_norm": 6.3757200147393585, |
|
"learning_rate": 4.970693263260237e-07, |
|
"logits/chosen": -0.34789201617240906, |
|
"logits/rejected": -0.3619535267353058, |
|
"logps/chosen": -332.65240478515625, |
|
"logps/rejected": -348.0079650878906, |
|
"loss": 0.6153, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4377606511116028, |
|
"rewards/margins": 0.43048110604286194, |
|
"rewards/rejected": -0.8682417869567871, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1463531669865643, |
|
"grad_norm": 6.992245943787356, |
|
"learning_rate": 4.967409996459966e-07, |
|
"logits/chosen": -0.40890535712242126, |
|
"logits/rejected": -0.41852784156799316, |
|
"logps/chosen": -343.47113037109375, |
|
"logps/rejected": -352.51739501953125, |
|
"loss": 0.6028, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.44202518463134766, |
|
"rewards/margins": 0.35426777601242065, |
|
"rewards/rejected": -0.7962929010391235, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.14875239923224567, |
|
"grad_norm": 5.871485349553309, |
|
"learning_rate": 4.963953650651326e-07, |
|
"logits/chosen": -0.3329642117023468, |
|
"logits/rejected": -0.33251506090164185, |
|
"logps/chosen": -415.4046936035156, |
|
"logps/rejected": -351.1783447265625, |
|
"loss": 0.5993, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.527837336063385, |
|
"rewards/margins": 0.2701931595802307, |
|
"rewards/rejected": -0.798030436038971, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.15115163147792707, |
|
"grad_norm": 6.6189704018639155, |
|
"learning_rate": 4.960324468283248e-07, |
|
"logits/chosen": -0.4594503343105316, |
|
"logits/rejected": -0.4743649363517761, |
|
"logps/chosen": -287.46697998046875, |
|
"logps/rejected": -318.0199279785156, |
|
"loss": 0.5864, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5368043184280396, |
|
"rewards/margins": 0.2990504205226898, |
|
"rewards/rejected": -0.8358548283576965, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.15355086372360843, |
|
"grad_norm": 6.440240303252386, |
|
"learning_rate": 4.956522703928451e-07, |
|
"logits/chosen": -0.4017508625984192, |
|
"logits/rejected": -0.3634760081768036, |
|
"logps/chosen": -301.4740295410156, |
|
"logps/rejected": -330.0491943359375, |
|
"loss": 0.5777, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.5480097532272339, |
|
"rewards/margins": 0.28381380438804626, |
|
"rewards/rejected": -0.8318235278129578, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.15595009596928983, |
|
"grad_norm": 8.733595781101256, |
|
"learning_rate": 4.952548624265606e-07, |
|
"logits/chosen": -0.33765482902526855, |
|
"logits/rejected": -0.3307989537715912, |
|
"logps/chosen": -368.00457763671875, |
|
"logps/rejected": -375.5353088378906, |
|
"loss": 0.6137, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6871958374977112, |
|
"rewards/margins": 0.2888778746128082, |
|
"rewards/rejected": -0.9760736227035522, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.15834932821497122, |
|
"grad_norm": 6.492977447810359, |
|
"learning_rate": 4.948402508060607e-07, |
|
"logits/chosen": -0.41905927658081055, |
|
"logits/rejected": -0.4047884941101074, |
|
"logps/chosen": -299.0521240234375, |
|
"logps/rejected": -338.4973449707031, |
|
"loss": 0.6203, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5227022767066956, |
|
"rewards/margins": 0.4564470648765564, |
|
"rewards/rejected": -0.979149341583252, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.16074856046065258, |
|
"grad_norm": 7.072838685765256, |
|
"learning_rate": 4.944084646147038e-07, |
|
"logits/chosen": -0.3967028260231018, |
|
"logits/rejected": -0.3915463387966156, |
|
"logps/chosen": -393.5112609863281, |
|
"logps/rejected": -390.46771240234375, |
|
"loss": 0.6382, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6279814839363098, |
|
"rewards/margins": 0.20776453614234924, |
|
"rewards/rejected": -0.8357461094856262, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.16314779270633398, |
|
"grad_norm": 7.229422542340886, |
|
"learning_rate": 4.939595341405754e-07, |
|
"logits/chosen": -0.4420618414878845, |
|
"logits/rejected": -0.47058361768722534, |
|
"logps/chosen": -320.97833251953125, |
|
"logps/rejected": -354.74896240234375, |
|
"loss": 0.6077, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5401324033737183, |
|
"rewards/margins": 0.39541110396385193, |
|
"rewards/rejected": -0.935543417930603, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.16554702495201534, |
|
"grad_norm": 6.416644260001778, |
|
"learning_rate": 4.93493490874365e-07, |
|
"logits/chosen": -0.3352740705013275, |
|
"logits/rejected": -0.3355199694633484, |
|
"logps/chosen": -320.6838073730469, |
|
"logps/rejected": -349.72637939453125, |
|
"loss": 0.5704, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6109887361526489, |
|
"rewards/margins": 0.24212434887886047, |
|
"rewards/rejected": -0.8531131744384766, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.16794625719769674, |
|
"grad_norm": 8.587832635723272, |
|
"learning_rate": 4.93010367507156e-07, |
|
"logits/chosen": -0.4223472476005554, |
|
"logits/rejected": -0.40697455406188965, |
|
"logps/chosen": -279.3757629394531, |
|
"logps/rejected": -302.53887939453125, |
|
"loss": 0.5809, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5754938125610352, |
|
"rewards/margins": 0.49033960700035095, |
|
"rewards/rejected": -1.065833330154419, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.17034548944337813, |
|
"grad_norm": 8.280473447647031, |
|
"learning_rate": 4.925101979281332e-07, |
|
"logits/chosen": -0.308775395154953, |
|
"logits/rejected": -0.3469308018684387, |
|
"logps/chosen": -367.63006591796875, |
|
"logps/rejected": -369.24102783203125, |
|
"loss": 0.5982, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.54417484998703, |
|
"rewards/margins": 0.5548971891403198, |
|
"rewards/rejected": -1.0990720987319946, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.1727447216890595, |
|
"grad_norm": 7.262952451036594, |
|
"learning_rate": 4.919930172222054e-07, |
|
"logits/chosen": -0.40510883927345276, |
|
"logits/rejected": -0.4301750659942627, |
|
"logps/chosen": -333.3446350097656, |
|
"logps/rejected": -367.443359375, |
|
"loss": 0.562, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6565228700637817, |
|
"rewards/margins": 0.41670140624046326, |
|
"rewards/rejected": -1.0732243061065674, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.1751439539347409, |
|
"grad_norm": 7.527915939450412, |
|
"learning_rate": 4.914588616675445e-07, |
|
"logits/chosen": -0.5176496505737305, |
|
"logits/rejected": -0.5337257981300354, |
|
"logps/chosen": -276.7431945800781, |
|
"logps/rejected": -333.2959899902344, |
|
"loss": 0.5984, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5182946920394897, |
|
"rewards/margins": 0.4113486707210541, |
|
"rewards/rejected": -0.9296433329582214, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.17754318618042225, |
|
"grad_norm": 7.676539707780335, |
|
"learning_rate": 4.909077687330404e-07, |
|
"logits/chosen": -0.38037022948265076, |
|
"logits/rejected": -0.3775717318058014, |
|
"logps/chosen": -361.13360595703125, |
|
"logps/rejected": -350.4933166503906, |
|
"loss": 0.5737, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6827607154846191, |
|
"rewards/margins": 0.23238630592823029, |
|
"rewards/rejected": -0.9151470065116882, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.17994241842610365, |
|
"grad_norm": 7.668861235099345, |
|
"learning_rate": 4.903397770756729e-07, |
|
"logits/chosen": -0.4016490876674652, |
|
"logits/rejected": -0.41701728105545044, |
|
"logps/chosen": -345.77093505859375, |
|
"logps/rejected": -386.59478759765625, |
|
"loss": 0.5849, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6805583834648132, |
|
"rewards/margins": 0.4540184438228607, |
|
"rewards/rejected": -1.1345769166946411, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.18234165067178504, |
|
"grad_norm": 5.8948610663264756, |
|
"learning_rate": 4.897549265378004e-07, |
|
"logits/chosen": -0.39282411336898804, |
|
"logits/rejected": -0.40464964509010315, |
|
"logps/chosen": -409.1741638183594, |
|
"logps/rejected": -438.93896484375, |
|
"loss": 0.5763, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6418434381484985, |
|
"rewards/margins": 0.33645665645599365, |
|
"rewards/rejected": -0.9782999753952026, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.1847408829174664, |
|
"grad_norm": 7.575295799966419, |
|
"learning_rate": 4.891532581443643e-07, |
|
"logits/chosen": -0.4237458109855652, |
|
"logits/rejected": -0.43563684821128845, |
|
"logps/chosen": -363.11199951171875, |
|
"logps/rejected": -443.242919921875, |
|
"loss": 0.5533, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.48444193601608276, |
|
"rewards/margins": 0.8247998356819153, |
|
"rewards/rejected": -1.309241771697998, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.1871401151631478, |
|
"grad_norm": 8.560608469390486, |
|
"learning_rate": 4.885348141000122e-07, |
|
"logits/chosen": -0.37164923548698425, |
|
"logits/rejected": -0.38602423667907715, |
|
"logps/chosen": -325.58111572265625, |
|
"logps/rejected": -402.8592224121094, |
|
"loss": 0.5743, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6478859186172485, |
|
"rewards/margins": 0.5546708106994629, |
|
"rewards/rejected": -1.202556848526001, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.18953934740882916, |
|
"grad_norm": 6.7270126026080295, |
|
"learning_rate": 4.878996377861367e-07, |
|
"logits/chosen": -0.46751460433006287, |
|
"logits/rejected": -0.5031236410140991, |
|
"logps/chosen": -311.3717346191406, |
|
"logps/rejected": -359.74786376953125, |
|
"loss": 0.5381, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8375557065010071, |
|
"rewards/margins": 0.37298810482025146, |
|
"rewards/rejected": -1.2105437517166138, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.19193857965451055, |
|
"grad_norm": 8.21855435965664, |
|
"learning_rate": 4.872477737578327e-07, |
|
"logits/chosen": -0.42351236939430237, |
|
"logits/rejected": -0.37146827578544617, |
|
"logps/chosen": -370.0285339355469, |
|
"logps/rejected": -442.88092041015625, |
|
"loss": 0.544, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8096299171447754, |
|
"rewards/margins": 0.7599128484725952, |
|
"rewards/rejected": -1.569542646408081, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.19433781190019195, |
|
"grad_norm": 11.575192192623518, |
|
"learning_rate": 4.865792677407718e-07, |
|
"logits/chosen": -0.4782884120941162, |
|
"logits/rejected": -0.48071250319480896, |
|
"logps/chosen": -352.44830322265625, |
|
"logps/rejected": -357.5110778808594, |
|
"loss": 0.5849, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.840336799621582, |
|
"rewards/margins": 0.3332519829273224, |
|
"rewards/rejected": -1.173588752746582, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.1967370441458733, |
|
"grad_norm": 10.038226850588464, |
|
"learning_rate": 4.858941666279955e-07, |
|
"logits/chosen": -0.4984146058559418, |
|
"logits/rejected": -0.5030771493911743, |
|
"logps/chosen": -356.491455078125, |
|
"logps/rejected": -367.9295959472656, |
|
"loss": 0.5948, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6749507784843445, |
|
"rewards/margins": 0.2715102434158325, |
|
"rewards/rejected": -0.946461021900177, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.1991362763915547, |
|
"grad_norm": 9.05117258722249, |
|
"learning_rate": 4.851925184766247e-07, |
|
"logits/chosen": -0.4640856683254242, |
|
"logits/rejected": -0.4853819012641907, |
|
"logps/chosen": -357.955078125, |
|
"logps/rejected": -393.90478515625, |
|
"loss": 0.5816, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8941922187805176, |
|
"rewards/margins": 0.591671347618103, |
|
"rewards/rejected": -1.4858636856079102, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.20153550863723607, |
|
"grad_norm": 9.681421628090424, |
|
"learning_rate": 4.844743725044897e-07, |
|
"logits/chosen": -0.4659281373023987, |
|
"logits/rejected": -0.5088318586349487, |
|
"logps/chosen": -329.5724182128906, |
|
"logps/rejected": -351.2308044433594, |
|
"loss": 0.5644, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7480869293212891, |
|
"rewards/margins": 0.4603498876094818, |
|
"rewards/rejected": -1.2084368467330933, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.20393474088291746, |
|
"grad_norm": 9.601153198405655, |
|
"learning_rate": 4.837397790866774e-07, |
|
"logits/chosen": -0.47188258171081543, |
|
"logits/rejected": -0.4746321141719818, |
|
"logps/chosen": -362.03021240234375, |
|
"logps/rejected": -425.6300354003906, |
|
"loss": 0.5614, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.596829354763031, |
|
"rewards/margins": 0.8421809077262878, |
|
"rewards/rejected": -1.4390103816986084, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.20633397312859886, |
|
"grad_norm": 8.338440429434868, |
|
"learning_rate": 4.829887897519974e-07, |
|
"logits/chosen": -0.4840044379234314, |
|
"logits/rejected": -0.4713994860649109, |
|
"logps/chosen": -310.5907897949219, |
|
"logps/rejected": -378.32049560546875, |
|
"loss": 0.5839, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6725698709487915, |
|
"rewards/margins": 0.4422430992126465, |
|
"rewards/rejected": -1.114812970161438, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.20873320537428022, |
|
"grad_norm": 7.422844726445461, |
|
"learning_rate": 4.82221457179368e-07, |
|
"logits/chosen": -0.4798775613307953, |
|
"logits/rejected": -0.4771656095981598, |
|
"logps/chosen": -346.35284423828125, |
|
"logps/rejected": -404.0704040527344, |
|
"loss": 0.5487, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.653545081615448, |
|
"rewards/margins": 0.7137434482574463, |
|
"rewards/rejected": -1.36728835105896, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.21113243761996162, |
|
"grad_norm": 7.498203140238108, |
|
"learning_rate": 4.814378351941206e-07, |
|
"logits/chosen": -0.4903596341609955, |
|
"logits/rejected": -0.4909774363040924, |
|
"logps/chosen": -333.675048828125, |
|
"logps/rejected": -356.37615966796875, |
|
"loss": 0.5783, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7229236960411072, |
|
"rewards/margins": 0.3018752932548523, |
|
"rewards/rejected": -1.0247989892959595, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.21353166986564298, |
|
"grad_norm": 8.122602026965641, |
|
"learning_rate": 4.806379787642241e-07, |
|
"logits/chosen": -0.46273237466812134, |
|
"logits/rejected": -0.45851221680641174, |
|
"logps/chosen": -316.8045959472656, |
|
"logps/rejected": -386.3130798339844, |
|
"loss": 0.5996, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6047753095626831, |
|
"rewards/margins": 0.5871869921684265, |
|
"rewards/rejected": -1.1919623613357544, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.21593090211132437, |
|
"grad_norm": 7.941797087490691, |
|
"learning_rate": 4.798219439964293e-07, |
|
"logits/chosen": -0.5175309777259827, |
|
"logits/rejected": -0.5462228059768677, |
|
"logps/chosen": -322.66717529296875, |
|
"logps/rejected": -351.2758483886719, |
|
"loss": 0.56, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7025772333145142, |
|
"rewards/margins": 0.17370259761810303, |
|
"rewards/rejected": -0.8762798309326172, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.21833013435700577, |
|
"grad_norm": 11.409843882847035, |
|
"learning_rate": 4.78989788132333e-07, |
|
"logits/chosen": -0.5460485219955444, |
|
"logits/rejected": -0.5394560098648071, |
|
"logps/chosen": -274.5609130859375, |
|
"logps/rejected": -357.4576416015625, |
|
"loss": 0.5144, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.47791242599487305, |
|
"rewards/margins": 0.7380608320236206, |
|
"rewards/rejected": -1.215973138809204, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.22072936660268713, |
|
"grad_norm": 7.234637207163524, |
|
"learning_rate": 4.781415695443631e-07, |
|
"logits/chosen": -0.47741183638572693, |
|
"logits/rejected": -0.46385058760643005, |
|
"logps/chosen": -409.548583984375, |
|
"logps/rejected": -444.76080322265625, |
|
"loss": 0.5728, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9967001080513, |
|
"rewards/margins": 0.2708914577960968, |
|
"rewards/rejected": -1.2675915956497192, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.22312859884836853, |
|
"grad_norm": 7.053932572600293, |
|
"learning_rate": 4.772773477316836e-07, |
|
"logits/chosen": -0.4638640284538269, |
|
"logits/rejected": -0.4659477174282074, |
|
"logps/chosen": -399.1170654296875, |
|
"logps/rejected": -439.843994140625, |
|
"loss": 0.5505, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0153203010559082, |
|
"rewards/margins": 0.4187864661216736, |
|
"rewards/rejected": -1.434106707572937, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.2255278310940499, |
|
"grad_norm": 8.914653285618083, |
|
"learning_rate": 4.7639718331602117e-07, |
|
"logits/chosen": -0.4590983986854553, |
|
"logits/rejected": -0.45087337493896484, |
|
"logps/chosen": -361.59393310546875, |
|
"logps/rejected": -440.941650390625, |
|
"loss": 0.5377, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7911983728408813, |
|
"rewards/margins": 0.8489359617233276, |
|
"rewards/rejected": -1.6401344537734985, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.22792706333973128, |
|
"grad_norm": 9.061462258268621, |
|
"learning_rate": 4.7550113803741275e-07, |
|
"logits/chosen": -0.4630160331726074, |
|
"logits/rejected": -0.5072802305221558, |
|
"logps/chosen": -387.6048278808594, |
|
"logps/rejected": -365.64532470703125, |
|
"loss": 0.5737, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9304320216178894, |
|
"rewards/margins": 0.43897590041160583, |
|
"rewards/rejected": -1.3694080114364624, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.23032629558541268, |
|
"grad_norm": 8.866753051371537, |
|
"learning_rate": 4.7458927474987454e-07, |
|
"logits/chosen": -0.4371200501918793, |
|
"logits/rejected": -0.43364763259887695, |
|
"logps/chosen": -409.8465881347656, |
|
"logps/rejected": -384.90234375, |
|
"loss": 0.5325, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7246421575546265, |
|
"rewards/margins": 0.3903957009315491, |
|
"rewards/rejected": -1.1150379180908203, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.23272552783109404, |
|
"grad_norm": 7.704129412482537, |
|
"learning_rate": 4.7366165741699347e-07, |
|
"logits/chosen": -0.5143966674804688, |
|
"logits/rejected": -0.5406373143196106, |
|
"logps/chosen": -423.09014892578125, |
|
"logps/rejected": -443.0372619628906, |
|
"loss": 0.557, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8278281092643738, |
|
"rewards/margins": 0.45336437225341797, |
|
"rewards/rejected": -1.2811925411224365, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.23512476007677544, |
|
"grad_norm": 7.64309056534733, |
|
"learning_rate": 4.727183511074401e-07, |
|
"logits/chosen": -0.5884715914726257, |
|
"logits/rejected": -0.5859401822090149, |
|
"logps/chosen": -379.3627624511719, |
|
"logps/rejected": -392.21710205078125, |
|
"loss": 0.5616, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.920864462852478, |
|
"rewards/margins": 0.23082181811332703, |
|
"rewards/rejected": -1.151686191558838, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.2375239923224568, |
|
"grad_norm": 8.955148603646292, |
|
"learning_rate": 4.717594219904043e-07, |
|
"logits/chosen": -0.5043666958808899, |
|
"logits/rejected": -0.5165797472000122, |
|
"logps/chosen": -397.373046875, |
|
"logps/rejected": -394.7057189941406, |
|
"loss": 0.5553, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1149301528930664, |
|
"rewards/margins": 0.4297923147678375, |
|
"rewards/rejected": -1.544722318649292, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.2399232245681382, |
|
"grad_norm": 9.68741680379089, |
|
"learning_rate": 4.7078493733095393e-07, |
|
"logits/chosen": -0.5751169919967651, |
|
"logits/rejected": -0.5922902822494507, |
|
"logps/chosen": -351.0492858886719, |
|
"logps/rejected": -421.7478942871094, |
|
"loss": 0.5413, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8745172619819641, |
|
"rewards/margins": 0.5707848072052002, |
|
"rewards/rejected": -1.445302128791809, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2423224568138196, |
|
"grad_norm": 7.552967651026979, |
|
"learning_rate": 4.6979496548531614e-07, |
|
"logits/chosen": -0.5039399862289429, |
|
"logits/rejected": -0.49812453985214233, |
|
"logps/chosen": -357.84130859375, |
|
"logps/rejected": -476.9107360839844, |
|
"loss": 0.5415, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8784712553024292, |
|
"rewards/margins": 0.6386504173278809, |
|
"rewards/rejected": -1.5171215534210205, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.24472168905950095, |
|
"grad_norm": 7.847866875926576, |
|
"learning_rate": 4.6878957589608293e-07, |
|
"logits/chosen": -0.5514234304428101, |
|
"logits/rejected": -0.5487276911735535, |
|
"logps/chosen": -358.778076171875, |
|
"logps/rejected": -490.60150146484375, |
|
"loss": 0.5386, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7840293645858765, |
|
"rewards/margins": 0.8377860188484192, |
|
"rewards/rejected": -1.6218153238296509, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.24712092130518235, |
|
"grad_norm": 7.46939724134765, |
|
"learning_rate": 4.6776883908733956e-07, |
|
"logits/chosen": -0.5816242098808289, |
|
"logits/rejected": -0.5803698897361755, |
|
"logps/chosen": -391.3583679199219, |
|
"logps/rejected": -390.5106506347656, |
|
"loss": 0.5283, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8500774502754211, |
|
"rewards/margins": 0.6333375573158264, |
|
"rewards/rejected": -1.4834150075912476, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.2495201535508637, |
|
"grad_norm": 9.499290159289549, |
|
"learning_rate": 4.667328266597178e-07, |
|
"logits/chosen": -0.5638601779937744, |
|
"logits/rejected": -0.5747939348220825, |
|
"logps/chosen": -380.7355651855469, |
|
"logps/rejected": -426.9833984375, |
|
"loss": 0.5105, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9775155782699585, |
|
"rewards/margins": 0.575149416923523, |
|
"rewards/rejected": -1.5526649951934814, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2519193857965451, |
|
"grad_norm": 7.863264950292173, |
|
"learning_rate": 4.6568161128537354e-07, |
|
"logits/chosen": -0.5107079148292542, |
|
"logits/rejected": -0.5302263498306274, |
|
"logps/chosen": -362.51165771484375, |
|
"logps/rejected": -357.8714294433594, |
|
"loss": 0.5418, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.9620344042778015, |
|
"rewards/margins": 0.46561723947525024, |
|
"rewards/rejected": -1.4276517629623413, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.2543186180422265, |
|
"grad_norm": 9.95847080901471, |
|
"learning_rate": 4.6461526670288877e-07, |
|
"logits/chosen": -0.5152772068977356, |
|
"logits/rejected": -0.5086151957511902, |
|
"logps/chosen": -378.06707763671875, |
|
"logps/rejected": -412.7275390625, |
|
"loss": 0.5773, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7655351758003235, |
|
"rewards/margins": 0.6718277335166931, |
|
"rewards/rejected": -1.4373629093170166, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2567178502879079, |
|
"grad_norm": 6.795589468002909, |
|
"learning_rate": 4.635338677120994e-07, |
|
"logits/chosen": -0.6026760935783386, |
|
"logits/rejected": -0.605171799659729, |
|
"logps/chosen": -363.8572998046875, |
|
"logps/rejected": -459.39764404296875, |
|
"loss": 0.5105, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9281944036483765, |
|
"rewards/margins": 0.7557355761528015, |
|
"rewards/rejected": -1.6839300394058228, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.2591170825335892, |
|
"grad_norm": 8.768106225235062, |
|
"learning_rate": 4.6243749016884835e-07, |
|
"logits/chosen": -0.4381980001926422, |
|
"logits/rejected": -0.4824441969394684, |
|
"logps/chosen": -396.9549255371094, |
|
"logps/rejected": -550.8186645507812, |
|
"loss": 0.5355, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1090790033340454, |
|
"rewards/margins": 0.9858474731445312, |
|
"rewards/rejected": -2.094926357269287, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2615163147792706, |
|
"grad_norm": 12.341914291404834, |
|
"learning_rate": 4.613262109796645e-07, |
|
"logits/chosen": -0.5834716558456421, |
|
"logits/rejected": -0.5614360570907593, |
|
"logps/chosen": -384.7580261230469, |
|
"logps/rejected": -524.4951171875, |
|
"loss": 0.5445, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0333119630813599, |
|
"rewards/margins": 0.9544838666915894, |
|
"rewards/rejected": -1.9877955913543701, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.263915547024952, |
|
"grad_norm": 7.9475026844690975, |
|
"learning_rate": 4.602001080963678e-07, |
|
"logits/chosen": -0.5507039427757263, |
|
"logits/rejected": -0.5546278953552246, |
|
"logps/chosen": -414.7740783691406, |
|
"logps/rejected": -445.2301330566406, |
|
"loss": 0.5363, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1491403579711914, |
|
"rewards/margins": 0.5966090559959412, |
|
"rewards/rejected": -1.7457494735717773, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2663147792706334, |
|
"grad_norm": 8.769481722876568, |
|
"learning_rate": 4.590592605106017e-07, |
|
"logits/chosen": -0.6439992189407349, |
|
"logits/rejected": -0.650866687297821, |
|
"logps/chosen": -414.07147216796875, |
|
"logps/rejected": -447.4894104003906, |
|
"loss": 0.5726, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8819205164909363, |
|
"rewards/margins": 0.6305669546127319, |
|
"rewards/rejected": -1.5124876499176025, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.2687140115163148, |
|
"grad_norm": 8.351140167515311, |
|
"learning_rate": 4.5790374824829165e-07, |
|
"logits/chosen": -0.5511302947998047, |
|
"logits/rejected": -0.5896275639533997, |
|
"logps/chosen": -292.4022216796875, |
|
"logps/rejected": -368.3529052734375, |
|
"loss": 0.5342, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9656060934066772, |
|
"rewards/margins": 0.5942641496658325, |
|
"rewards/rejected": -1.5598702430725098, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.27111324376199614, |
|
"grad_norm": 8.859939366313267, |
|
"learning_rate": 4.5673365236403216e-07, |
|
"logits/chosen": -0.6408380270004272, |
|
"logits/rejected": -0.6961749792098999, |
|
"logps/chosen": -286.7148132324219, |
|
"logps/rejected": -403.5390625, |
|
"loss": 0.521, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8016033172607422, |
|
"rewards/margins": 0.8615763783454895, |
|
"rewards/rejected": -1.6631797552108765, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.27351247600767753, |
|
"grad_norm": 8.034697339520084, |
|
"learning_rate": 4.5554905493540075e-07, |
|
"logits/chosen": -0.6324433088302612, |
|
"logits/rejected": -0.6281362175941467, |
|
"logps/chosen": -320.374755859375, |
|
"logps/rejected": -417.0606994628906, |
|
"loss": 0.5135, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8844473958015442, |
|
"rewards/margins": 0.9001449346542358, |
|
"rewards/rejected": -1.7845920324325562, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.2759117082533589, |
|
"grad_norm": 8.194750667905803, |
|
"learning_rate": 4.5435003905720074e-07, |
|
"logits/chosen": -0.5551676750183105, |
|
"logits/rejected": -0.5857855081558228, |
|
"logps/chosen": -384.4462585449219, |
|
"logps/rejected": -426.1817321777344, |
|
"loss": 0.5131, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8710935711860657, |
|
"rewards/margins": 0.703058660030365, |
|
"rewards/rejected": -1.5741522312164307, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2783109404990403, |
|
"grad_norm": 9.865565713899901, |
|
"learning_rate": 4.531366888356324e-07, |
|
"logits/chosen": -0.6332504153251648, |
|
"logits/rejected": -0.6072624325752258, |
|
"logps/chosen": -294.1646423339844, |
|
"logps/rejected": -434.31024169921875, |
|
"loss": 0.5061, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0756479501724243, |
|
"rewards/margins": 0.913143515586853, |
|
"rewards/rejected": -1.9887917041778564, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.2807101727447217, |
|
"grad_norm": 10.884390589564477, |
|
"learning_rate": 4.519090893823931e-07, |
|
"logits/chosen": -0.6246575117111206, |
|
"logits/rejected": -0.6475778818130493, |
|
"logps/chosen": -372.89215087890625, |
|
"logps/rejected": -443.2525329589844, |
|
"loss": 0.5261, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2081364393234253, |
|
"rewards/margins": 0.7674375772476196, |
|
"rewards/rejected": -1.9755741357803345, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.28310940499040305, |
|
"grad_norm": 7.778148160208306, |
|
"learning_rate": 4.5066732680870734e-07, |
|
"logits/chosen": -0.568785548210144, |
|
"logits/rejected": -0.608304500579834, |
|
"logps/chosen": -348.26702880859375, |
|
"logps/rejected": -393.5550231933594, |
|
"loss": 0.5014, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8094667196273804, |
|
"rewards/margins": 0.8615515828132629, |
|
"rewards/rejected": -1.671018362045288, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.28550863723608444, |
|
"grad_norm": 8.167033695573148, |
|
"learning_rate": 4.494114882192862e-07, |
|
"logits/chosen": -0.659604012966156, |
|
"logits/rejected": -0.6383468508720398, |
|
"logps/chosen": -355.82061767578125, |
|
"logps/rejected": -419.57440185546875, |
|
"loss": 0.4999, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.7848014831542969, |
|
"rewards/margins": 0.9513505697250366, |
|
"rewards/rejected": -1.7361520528793335, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.28790786948176583, |
|
"grad_norm": 8.531778053136817, |
|
"learning_rate": 4.4814166170621735e-07, |
|
"logits/chosen": -0.6497922539710999, |
|
"logits/rejected": -0.6769246459007263, |
|
"logps/chosen": -341.2376708984375, |
|
"logps/rejected": -416.5484924316406, |
|
"loss": 0.5219, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8200058937072754, |
|
"rewards/margins": 0.9894447326660156, |
|
"rewards/rejected": -1.8094505071640015, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2903071017274472, |
|
"grad_norm": 9.115422018620432, |
|
"learning_rate": 4.468579363427858e-07, |
|
"logits/chosen": -0.6323500871658325, |
|
"logits/rejected": -0.6400243043899536, |
|
"logps/chosen": -378.551025390625, |
|
"logps/rejected": -431.50628662109375, |
|
"loss": 0.5342, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1116712093353271, |
|
"rewards/margins": 0.7979139089584351, |
|
"rewards/rejected": -1.9095849990844727, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.2927063339731286, |
|
"grad_norm": 10.369810004009349, |
|
"learning_rate": 4.4556040217722555e-07, |
|
"logits/chosen": -0.7057371735572815, |
|
"logits/rejected": -0.6883876919746399, |
|
"logps/chosen": -335.6352233886719, |
|
"logps/rejected": -475.59588623046875, |
|
"loss": 0.5004, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9118591547012329, |
|
"rewards/margins": 0.989273190498352, |
|
"rewards/rejected": -1.9011322259902954, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.29510556621880996, |
|
"grad_norm": 8.78559151507744, |
|
"learning_rate": 4.442491502264033e-07, |
|
"logits/chosen": -0.6325684785842896, |
|
"logits/rejected": -0.6488875150680542, |
|
"logps/chosen": -321.8913879394531, |
|
"logps/rejected": -360.14776611328125, |
|
"loss": 0.5278, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.9378086924552917, |
|
"rewards/margins": 0.4965507388114929, |
|
"rewards/rejected": -1.4343595504760742, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.29750479846449135, |
|
"grad_norm": 7.996922099069886, |
|
"learning_rate": 4.429242724694338e-07, |
|
"logits/chosen": -0.7028544545173645, |
|
"logits/rejected": -0.6892791390419006, |
|
"logps/chosen": -346.9255676269531, |
|
"logps/rejected": -447.5694274902344, |
|
"loss": 0.5193, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.8944141268730164, |
|
"rewards/margins": 0.8071637153625488, |
|
"rewards/rejected": -1.7015777826309204, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.29990403071017274, |
|
"grad_norm": 8.834617984631441, |
|
"learning_rate": 4.4158586184122817e-07, |
|
"logits/chosen": -0.6356642246246338, |
|
"logits/rejected": -0.6852391362190247, |
|
"logps/chosen": -391.93524169921875, |
|
"logps/rejected": -455.7276306152344, |
|
"loss": 0.4991, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8954633474349976, |
|
"rewards/margins": 0.9801417589187622, |
|
"rewards/rejected": -1.8756049871444702, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.30230326295585414, |
|
"grad_norm": 10.623650311123297, |
|
"learning_rate": 4.4023401222597443e-07, |
|
"logits/chosen": -0.6282129287719727, |
|
"logits/rejected": -0.7119131088256836, |
|
"logps/chosen": -408.7945251464844, |
|
"logps/rejected": -447.96533203125, |
|
"loss": 0.5039, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1023727655410767, |
|
"rewards/margins": 0.621015191078186, |
|
"rewards/rejected": -1.7233880758285522, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.30470249520153553, |
|
"grad_norm": 9.541014472218308, |
|
"learning_rate": 4.3886881845055235e-07, |
|
"logits/chosen": -0.6741599440574646, |
|
"logits/rejected": -0.7359042167663574, |
|
"logps/chosen": -343.3130187988281, |
|
"logps/rejected": -435.29681396484375, |
|
"loss": 0.5013, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8718856573104858, |
|
"rewards/margins": 1.0113645792007446, |
|
"rewards/rejected": -1.8832504749298096, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.30710172744721687, |
|
"grad_norm": 9.400894165088678, |
|
"learning_rate": 4.374903762778814e-07, |
|
"logits/chosen": -0.7107304334640503, |
|
"logits/rejected": -0.7214982509613037, |
|
"logps/chosen": -361.5565185546875, |
|
"logps/rejected": -422.3573303222656, |
|
"loss": 0.4996, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9224702715873718, |
|
"rewards/margins": 0.8412786722183228, |
|
"rewards/rejected": -1.7637488842010498, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.30950095969289826, |
|
"grad_norm": 8.935621537238024, |
|
"learning_rate": 4.3609878240020356e-07, |
|
"logits/chosen": -0.6704460978507996, |
|
"logits/rejected": -0.7258785963058472, |
|
"logps/chosen": -425.38690185546875, |
|
"logps/rejected": -457.9751892089844, |
|
"loss": 0.4947, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0847482681274414, |
|
"rewards/margins": 0.9454424977302551, |
|
"rewards/rejected": -2.0301907062530518, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.31190019193857965, |
|
"grad_norm": 9.26956553898586, |
|
"learning_rate": 4.346941344323005e-07, |
|
"logits/chosen": -0.7358589768409729, |
|
"logits/rejected": -0.8038908243179321, |
|
"logps/chosen": -376.4468688964844, |
|
"logps/rejected": -392.2120361328125, |
|
"loss": 0.5534, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.231223464012146, |
|
"rewards/margins": 0.6490092277526855, |
|
"rewards/rejected": -1.880232572555542, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.31429942418426104, |
|
"grad_norm": 8.919630982293898, |
|
"learning_rate": 4.332765309046467e-07, |
|
"logits/chosen": -0.6560064554214478, |
|
"logits/rejected": -0.6517816781997681, |
|
"logps/chosen": -403.7508239746094, |
|
"logps/rejected": -460.4183654785156, |
|
"loss": 0.5389, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1548296213150024, |
|
"rewards/margins": 0.9737479090690613, |
|
"rewards/rejected": -2.128577470779419, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.31669865642994244, |
|
"grad_norm": 10.42362251711121, |
|
"learning_rate": 4.3184607125649754e-07, |
|
"logits/chosen": -0.700032114982605, |
|
"logits/rejected": -0.7237606048583984, |
|
"logps/chosen": -376.8808288574219, |
|
"logps/rejected": -488.0193786621094, |
|
"loss": 0.5239, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9241411089897156, |
|
"rewards/margins": 0.9761545062065125, |
|
"rewards/rejected": -1.900295615196228, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.3190978886756238, |
|
"grad_norm": 8.823907511461925, |
|
"learning_rate": 4.304028558289141e-07, |
|
"logits/chosen": -0.7183485627174377, |
|
"logits/rejected": -0.7400873899459839, |
|
"logps/chosen": -375.0442810058594, |
|
"logps/rejected": -453.552978515625, |
|
"loss": 0.4986, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7883042693138123, |
|
"rewards/margins": 1.0131797790527344, |
|
"rewards/rejected": -1.8014838695526123, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.32149712092130517, |
|
"grad_norm": 9.056325230872618, |
|
"learning_rate": 4.28946985857725e-07, |
|
"logits/chosen": -0.7189252972602844, |
|
"logits/rejected": -0.698843777179718, |
|
"logps/chosen": -391.2516174316406, |
|
"logps/rejected": -495.9942932128906, |
|
"loss": 0.5001, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0153273344039917, |
|
"rewards/margins": 1.1640372276306152, |
|
"rewards/rejected": -2.1793646812438965, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.32389635316698656, |
|
"grad_norm": 9.219431888564412, |
|
"learning_rate": 4.2747856346642445e-07, |
|
"logits/chosen": -0.720720648765564, |
|
"logits/rejected": -0.7227288484573364, |
|
"logps/chosen": -323.8959045410156, |
|
"logps/rejected": -411.42633056640625, |
|
"loss": 0.4889, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.8822928667068481, |
|
"rewards/margins": 0.907570481300354, |
|
"rewards/rejected": -1.7898629903793335, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.32629558541266795, |
|
"grad_norm": 9.219391981411931, |
|
"learning_rate": 4.2599769165900933e-07, |
|
"logits/chosen": -0.7076966166496277, |
|
"logits/rejected": -0.7374303340911865, |
|
"logps/chosen": -400.1958923339844, |
|
"logps/rejected": -457.45538330078125, |
|
"loss": 0.5265, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.270545482635498, |
|
"rewards/margins": 0.8403311967849731, |
|
"rewards/rejected": -2.1108765602111816, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.32869481765834935, |
|
"grad_norm": 7.961461255371797, |
|
"learning_rate": 4.245044743127535e-07, |
|
"logits/chosen": -0.8138734698295593, |
|
"logits/rejected": -0.8004827499389648, |
|
"logps/chosen": -375.55255126953125, |
|
"logps/rejected": -462.57452392578125, |
|
"loss": 0.51, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1249182224273682, |
|
"rewards/margins": 0.7031105160713196, |
|
"rewards/rejected": -1.828028678894043, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.3310940499040307, |
|
"grad_norm": 17.415326900164427, |
|
"learning_rate": 4.229990161709214e-07, |
|
"logits/chosen": -0.7364221811294556, |
|
"logits/rejected": -0.6838979721069336, |
|
"logps/chosen": -354.063720703125, |
|
"logps/rejected": -506.49468994140625, |
|
"loss": 0.533, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.1404192447662354, |
|
"rewards/margins": 1.2287505865097046, |
|
"rewards/rejected": -2.3691699504852295, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3334932821497121, |
|
"grad_norm": 8.490137472524392, |
|
"learning_rate": 4.214814228354204e-07, |
|
"logits/chosen": -0.7031981348991394, |
|
"logits/rejected": -0.7180779576301575, |
|
"logps/chosen": -381.91839599609375, |
|
"logps/rejected": -509.70391845703125, |
|
"loss": 0.4942, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.9766290783882141, |
|
"rewards/margins": 1.4473177194595337, |
|
"rewards/rejected": -2.4239466190338135, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.33589251439539347, |
|
"grad_norm": 9.025625908308374, |
|
"learning_rate": 4.1995180075939375e-07, |
|
"logits/chosen": -0.7361186742782593, |
|
"logits/rejected": -0.7329140901565552, |
|
"logps/chosen": -412.9568786621094, |
|
"logps/rejected": -463.9566955566406, |
|
"loss": 0.4945, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0486990213394165, |
|
"rewards/margins": 0.7866090536117554, |
|
"rewards/rejected": -1.8353080749511719, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.33829174664107486, |
|
"grad_norm": 9.968153480159424, |
|
"learning_rate": 4.1841025723975297e-07, |
|
"logits/chosen": -0.685051441192627, |
|
"logits/rejected": -0.6909801959991455, |
|
"logps/chosen": -381.12054443359375, |
|
"logps/rejected": -472.12628173828125, |
|
"loss": 0.4854, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7610518932342529, |
|
"rewards/margins": 1.0710885524749756, |
|
"rewards/rejected": -1.832140326499939, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.34069097888675626, |
|
"grad_norm": 10.86480858776822, |
|
"learning_rate": 4.168569004096516e-07, |
|
"logits/chosen": -0.6658666133880615, |
|
"logits/rejected": -0.6583417654037476, |
|
"logps/chosen": -361.16693115234375, |
|
"logps/rejected": -498.03997802734375, |
|
"loss": 0.4812, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.107450246810913, |
|
"rewards/margins": 1.1987017393112183, |
|
"rewards/rejected": -2.306152105331421, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.3430902111324376, |
|
"grad_norm": 9.37088980367882, |
|
"learning_rate": 4.152918392308997e-07, |
|
"logits/chosen": -0.8107253313064575, |
|
"logits/rejected": -0.792646050453186, |
|
"logps/chosen": -417.71197509765625, |
|
"logps/rejected": -471.83721923828125, |
|
"loss": 0.4813, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.6705278158187866, |
|
"rewards/margins": 0.6656574010848999, |
|
"rewards/rejected": -2.3361852169036865, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.345489443378119, |
|
"grad_norm": 13.27114348199083, |
|
"learning_rate": 4.137151834863213e-07, |
|
"logits/chosen": -0.7099133729934692, |
|
"logits/rejected": -0.6920545697212219, |
|
"logps/chosen": -407.37493896484375, |
|
"logps/rejected": -562.7499389648438, |
|
"loss": 0.5386, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.6011251211166382, |
|
"rewards/margins": 1.1472370624542236, |
|
"rewards/rejected": -2.7483620643615723, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.3478886756238004, |
|
"grad_norm": 12.106940133149287, |
|
"learning_rate": 4.121270437720526e-07, |
|
"logits/chosen": -0.6640886068344116, |
|
"logits/rejected": -0.6298462748527527, |
|
"logps/chosen": -366.8916931152344, |
|
"logps/rejected": -493.5521545410156, |
|
"loss": 0.5162, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4960647821426392, |
|
"rewards/margins": 0.7681604623794556, |
|
"rewards/rejected": -2.2642252445220947, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.3502879078694818, |
|
"grad_norm": 8.795405985800107, |
|
"learning_rate": 4.105275314897852e-07, |
|
"logits/chosen": -0.6949892640113831, |
|
"logits/rejected": -0.7056195139884949, |
|
"logps/chosen": -351.6402282714844, |
|
"logps/rejected": -530.5101318359375, |
|
"loss": 0.5051, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.323290467262268, |
|
"rewards/margins": 1.3428999185562134, |
|
"rewards/rejected": -2.6661901473999023, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.35268714011516317, |
|
"grad_norm": 9.62012729406584, |
|
"learning_rate": 4.089167588389508e-07, |
|
"logits/chosen": -0.6170503497123718, |
|
"logits/rejected": -0.6489865183830261, |
|
"logps/chosen": -479.70001220703125, |
|
"logps/rejected": -550.5726318359375, |
|
"loss": 0.4903, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2569714784622192, |
|
"rewards/margins": 1.206158995628357, |
|
"rewards/rejected": -2.463130235671997, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.3550863723608445, |
|
"grad_norm": 11.864875238047302, |
|
"learning_rate": 4.072948388088515e-07, |
|
"logits/chosen": -0.5827468037605286, |
|
"logits/rejected": -0.58921217918396, |
|
"logps/chosen": -419.8984375, |
|
"logps/rejected": -524.0806274414062, |
|
"loss": 0.5201, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3793574571609497, |
|
"rewards/margins": 1.0044056177139282, |
|
"rewards/rejected": -2.383762836456299, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.3574856046065259, |
|
"grad_norm": 10.119374435549082, |
|
"learning_rate": 4.056618851707334e-07, |
|
"logits/chosen": -0.6013773679733276, |
|
"logits/rejected": -0.6345557570457458, |
|
"logps/chosen": -384.5671691894531, |
|
"logps/rejected": -508.9178771972656, |
|
"loss": 0.4721, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.068169355392456, |
|
"rewards/margins": 1.1706373691558838, |
|
"rewards/rejected": -2.238806962966919, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.3598848368522073, |
|
"grad_norm": 10.659792949010905, |
|
"learning_rate": 4.0401801246980675e-07, |
|
"logits/chosen": -0.7585668563842773, |
|
"logits/rejected": -0.7779415249824524, |
|
"logps/chosen": -384.1558532714844, |
|
"logps/rejected": -441.42010498046875, |
|
"loss": 0.5083, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.535270094871521, |
|
"rewards/margins": 0.7871710062026978, |
|
"rewards/rejected": -2.3224408626556396, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3622840690978887, |
|
"grad_norm": 10.123601349785917, |
|
"learning_rate": 4.0236333601721043e-07, |
|
"logits/chosen": -0.6317464709281921, |
|
"logits/rejected": -0.6271511316299438, |
|
"logps/chosen": -450.973876953125, |
|
"logps/rejected": -524.8987426757812, |
|
"loss": 0.5378, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4312914609909058, |
|
"rewards/margins": 0.6362205147743225, |
|
"rewards/rejected": -2.067512035369873, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.3646833013435701, |
|
"grad_norm": 10.190915974376356, |
|
"learning_rate": 4.0069797188192364e-07, |
|
"logits/chosen": -0.6999167203903198, |
|
"logits/rejected": -0.6949875354766846, |
|
"logps/chosen": -410.099853515625, |
|
"logps/rejected": -510.3067321777344, |
|
"loss": 0.4944, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1585137844085693, |
|
"rewards/margins": 1.2204173803329468, |
|
"rewards/rejected": -2.3789315223693848, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.3670825335892514, |
|
"grad_norm": 10.837356546075833, |
|
"learning_rate": 3.9902203688262417e-07, |
|
"logits/chosen": -0.6491087675094604, |
|
"logits/rejected": -0.681550145149231, |
|
"logps/chosen": -389.38946533203125, |
|
"logps/rejected": -480.93701171875, |
|
"loss": 0.4764, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.0993043184280396, |
|
"rewards/margins": 1.0439014434814453, |
|
"rewards/rejected": -2.1432056427001953, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.3694817658349328, |
|
"grad_norm": 9.953869181979533, |
|
"learning_rate": 3.9733564857949365e-07, |
|
"logits/chosen": -0.637142539024353, |
|
"logits/rejected": -0.6458380222320557, |
|
"logps/chosen": -504.4761657714844, |
|
"logps/rejected": -539.9736938476562, |
|
"loss": 0.502, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.6645132303237915, |
|
"rewards/margins": 0.8288125991821289, |
|
"rewards/rejected": -2.493325710296631, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.3718809980806142, |
|
"grad_norm": 11.690441286702658, |
|
"learning_rate": 3.9563892526597177e-07, |
|
"logits/chosen": -0.6920310258865356, |
|
"logits/rejected": -0.6713690161705017, |
|
"logps/chosen": -376.0501708984375, |
|
"logps/rejected": -492.35382080078125, |
|
"loss": 0.4707, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3901115655899048, |
|
"rewards/margins": 0.6692919731140137, |
|
"rewards/rejected": -2.059403419494629, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.3742802303262956, |
|
"grad_norm": 9.497977320733629, |
|
"learning_rate": 3.9393198596045795e-07, |
|
"logits/chosen": -0.7644148468971252, |
|
"logits/rejected": -0.7483991384506226, |
|
"logps/chosen": -376.76885986328125, |
|
"logps/rejected": -497.29595947265625, |
|
"loss": 0.543, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3605177402496338, |
|
"rewards/margins": 0.9189019203186035, |
|
"rewards/rejected": -2.279419422149658, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.376679462571977, |
|
"grad_norm": 7.866666448729003, |
|
"learning_rate": 3.922149503979628e-07, |
|
"logits/chosen": -0.6804630160331726, |
|
"logits/rejected": -0.7361734509468079, |
|
"logps/chosen": -405.5047912597656, |
|
"logps/rejected": -583.3118286132812, |
|
"loss": 0.4749, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.131927728652954, |
|
"rewards/margins": 1.7377452850341797, |
|
"rewards/rejected": -2.869673013687134, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.3790786948176583, |
|
"grad_norm": 9.850531494942707, |
|
"learning_rate": 3.904879390217095e-07, |
|
"logits/chosen": -0.8319008946418762, |
|
"logits/rejected": -0.8503821492195129, |
|
"logps/chosen": -379.04827880859375, |
|
"logps/rejected": -460.70184326171875, |
|
"loss": 0.4687, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1876747608184814, |
|
"rewards/margins": 0.9654358625411987, |
|
"rewards/rejected": -2.1531107425689697, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.3814779270633397, |
|
"grad_norm": 9.904464624440974, |
|
"learning_rate": 3.8875107297468463e-07, |
|
"logits/chosen": -0.7686917781829834, |
|
"logits/rejected": -0.7551219463348389, |
|
"logps/chosen": -396.49847412109375, |
|
"logps/rejected": -589.2566528320312, |
|
"loss": 0.5065, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.386002540588379, |
|
"rewards/margins": 1.4360870122909546, |
|
"rewards/rejected": -2.822089195251465, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.3838771593090211, |
|
"grad_norm": 9.880008920909678, |
|
"learning_rate": 3.87004474091141e-07, |
|
"logits/chosen": -0.621880829334259, |
|
"logits/rejected": -0.6349480152130127, |
|
"logps/chosen": -381.6803894042969, |
|
"logps/rejected": -495.33538818359375, |
|
"loss": 0.5038, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3982946872711182, |
|
"rewards/margins": 0.9522615671157837, |
|
"rewards/rejected": -2.3505563735961914, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3862763915547025, |
|
"grad_norm": 9.893501375511553, |
|
"learning_rate": 3.8524826488805114e-07, |
|
"logits/chosen": -0.7854813933372498, |
|
"logits/rejected": -0.7525703310966492, |
|
"logps/chosen": -448.2474670410156, |
|
"logps/rejected": -512.2816162109375, |
|
"loss": 0.5077, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.513240098953247, |
|
"rewards/margins": 1.0552384853363037, |
|
"rewards/rejected": -2.5684781074523926, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.3886756238003839, |
|
"grad_norm": 11.71127933022176, |
|
"learning_rate": 3.834825685565133e-07, |
|
"logits/chosen": -0.7181990146636963, |
|
"logits/rejected": -0.7670043706893921, |
|
"logps/chosen": -360.92669677734375, |
|
"logps/rejected": -421.00372314453125, |
|
"loss": 0.4668, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1134910583496094, |
|
"rewards/margins": 0.9747357368469238, |
|
"rewards/rejected": -2.088226795196533, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.39107485604606523, |
|
"grad_norm": 10.523368241496188, |
|
"learning_rate": 3.8170750895311007e-07, |
|
"logits/chosen": -0.7231374979019165, |
|
"logits/rejected": -0.7205518484115601, |
|
"logps/chosen": -411.99884033203125, |
|
"logps/rejected": -491.25201416015625, |
|
"loss": 0.4704, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.136505126953125, |
|
"rewards/margins": 1.0490738153457642, |
|
"rewards/rejected": -2.185579299926758, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.3934740882917466, |
|
"grad_norm": 9.516920015983152, |
|
"learning_rate": 3.7992321059122045e-07, |
|
"logits/chosen": -0.6618058681488037, |
|
"logits/rejected": -0.7061210870742798, |
|
"logps/chosen": -389.2023010253906, |
|
"logps/rejected": -476.97998046875, |
|
"loss": 0.5002, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4337702989578247, |
|
"rewards/margins": 1.021848201751709, |
|
"rewards/rejected": -2.455618381500244, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.395873320537428, |
|
"grad_norm": 9.385395418767361, |
|
"learning_rate": 3.7812979863228576e-07, |
|
"logits/chosen": -0.7980898022651672, |
|
"logits/rejected": -0.8222333788871765, |
|
"logps/chosen": -364.509521484375, |
|
"logps/rejected": -491.23175048828125, |
|
"loss": 0.4547, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4786386489868164, |
|
"rewards/margins": 1.1454684734344482, |
|
"rewards/rejected": -2.6241071224212646, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.3982725527831094, |
|
"grad_norm": 10.324873859774062, |
|
"learning_rate": 3.763273988770296e-07, |
|
"logits/chosen": -0.6266960501670837, |
|
"logits/rejected": -0.6783492565155029, |
|
"logps/chosen": -393.72125244140625, |
|
"logps/rejected": -528.2474365234375, |
|
"loss": 0.4626, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2447704076766968, |
|
"rewards/margins": 1.3596256971359253, |
|
"rewards/rejected": -2.604396104812622, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.4006717850287908, |
|
"grad_norm": 11.992709858901152, |
|
"learning_rate": 3.7451613775663405e-07, |
|
"logits/chosen": -0.7533406615257263, |
|
"logits/rejected": -0.7324401140213013, |
|
"logps/chosen": -424.1239318847656, |
|
"logps/rejected": -602.9987182617188, |
|
"loss": 0.5282, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7245795726776123, |
|
"rewards/margins": 1.6655222177505493, |
|
"rewards/rejected": -3.390101671218872, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.40307101727447214, |
|
"grad_norm": 10.879572547394886, |
|
"learning_rate": 3.726961423238706e-07, |
|
"logits/chosen": -0.7729811668395996, |
|
"logits/rejected": -0.7984837293624878, |
|
"logps/chosen": -386.8953552246094, |
|
"logps/rejected": -555.2294311523438, |
|
"loss": 0.4942, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5044174194335938, |
|
"rewards/margins": 1.4247596263885498, |
|
"rewards/rejected": -2.9291768074035645, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.40547024952015354, |
|
"grad_norm": 11.218700739770185, |
|
"learning_rate": 3.708675402441882e-07, |
|
"logits/chosen": -0.6574599742889404, |
|
"logits/rejected": -0.6961637139320374, |
|
"logps/chosen": -434.22650146484375, |
|
"logps/rejected": -494.91607666015625, |
|
"loss": 0.5091, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3373010158538818, |
|
"rewards/margins": 0.9446905255317688, |
|
"rewards/rejected": -2.281991481781006, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.40786948176583493, |
|
"grad_norm": 8.667993544637785, |
|
"learning_rate": 3.6903045978675775e-07, |
|
"logits/chosen": -0.7062468528747559, |
|
"logits/rejected": -0.7464607954025269, |
|
"logps/chosen": -384.9060363769531, |
|
"logps/rejected": -543.77685546875, |
|
"loss": 0.4989, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4272969961166382, |
|
"rewards/margins": 1.7380192279815674, |
|
"rewards/rejected": -3.165316343307495, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4102687140115163, |
|
"grad_norm": 8.468965922840697, |
|
"learning_rate": 3.6718502981547474e-07, |
|
"logits/chosen": -0.7246867418289185, |
|
"logits/rejected": -0.7426190376281738, |
|
"logps/chosen": -419.00439453125, |
|
"logps/rejected": -550.689453125, |
|
"loss": 0.5076, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4041268825531006, |
|
"rewards/margins": 0.8987758755683899, |
|
"rewards/rejected": -2.3029026985168457, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.4126679462571977, |
|
"grad_norm": 9.099426609662448, |
|
"learning_rate": 3.6533137977991986e-07, |
|
"logits/chosen": -0.7021734118461609, |
|
"logits/rejected": -0.7220349311828613, |
|
"logps/chosen": -433.3681640625, |
|
"logps/rejected": -527.8214111328125, |
|
"loss": 0.5319, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.4180127382278442, |
|
"rewards/margins": 0.7351676225662231, |
|
"rewards/rejected": -2.1531801223754883, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.41506717850287905, |
|
"grad_norm": 8.435202779591016, |
|
"learning_rate": 3.6346963970627865e-07, |
|
"logits/chosen": -0.639062762260437, |
|
"logits/rejected": -0.6104099154472351, |
|
"logps/chosen": -357.98175048828125, |
|
"logps/rejected": -512.7109375, |
|
"loss": 0.4567, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9540430307388306, |
|
"rewards/margins": 1.3174707889556885, |
|
"rewards/rejected": -2.2715137004852295, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.41746641074856045, |
|
"grad_norm": 11.561877784247002, |
|
"learning_rate": 3.615999401882207e-07, |
|
"logits/chosen": -0.7886170148849487, |
|
"logits/rejected": -0.7725807428359985, |
|
"logps/chosen": -376.1795959472656, |
|
"logps/rejected": -529.3326416015625, |
|
"loss": 0.484, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.531253457069397, |
|
"rewards/margins": 1.2733750343322754, |
|
"rewards/rejected": -2.804628610610962, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.41986564299424184, |
|
"grad_norm": 10.686288534465577, |
|
"learning_rate": 3.597224123777389e-07, |
|
"logits/chosen": -0.6760295629501343, |
|
"logits/rejected": -0.6590694785118103, |
|
"logps/chosen": -399.1770324707031, |
|
"logps/rejected": -554.7936401367188, |
|
"loss": 0.4812, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3701995611190796, |
|
"rewards/margins": 1.3539022207260132, |
|
"rewards/rejected": -2.7241015434265137, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.42226487523992323, |
|
"grad_norm": 9.333815894429002, |
|
"learning_rate": 3.5783718797595e-07, |
|
"logits/chosen": -0.759990394115448, |
|
"logits/rejected": -0.777604877948761, |
|
"logps/chosen": -457.8511657714844, |
|
"logps/rejected": -525.8488159179688, |
|
"loss": 0.5006, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.607553482055664, |
|
"rewards/margins": 1.082852840423584, |
|
"rewards/rejected": -2.690406322479248, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.4246641074856046, |
|
"grad_norm": 10.014756401369624, |
|
"learning_rate": 3.559443992238558e-07, |
|
"logits/chosen": -0.7365792393684387, |
|
"logits/rejected": -0.7805954217910767, |
|
"logps/chosen": -389.59808349609375, |
|
"logps/rejected": -584.1541137695312, |
|
"loss": 0.5067, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.296053171157837, |
|
"rewards/margins": 1.6537158489227295, |
|
"rewards/rejected": -2.9497690200805664, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.42706333973128596, |
|
"grad_norm": 10.070273415353826, |
|
"learning_rate": 3.540441788930673e-07, |
|
"logits/chosen": -0.6368024945259094, |
|
"logits/rejected": -0.6715587377548218, |
|
"logps/chosen": -434.8896484375, |
|
"logps/rejected": -555.1212158203125, |
|
"loss": 0.4727, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2840174436569214, |
|
"rewards/margins": 1.5494660139083862, |
|
"rewards/rejected": -2.833483934402466, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.42946257197696736, |
|
"grad_norm": 9.237418058304717, |
|
"learning_rate": 3.5213666027649123e-07, |
|
"logits/chosen": -0.7330187559127808, |
|
"logits/rejected": -0.7538883686065674, |
|
"logps/chosen": -488.0216369628906, |
|
"logps/rejected": -523.1275024414062, |
|
"loss": 0.4906, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.0400612354278564, |
|
"rewards/margins": 0.7529541850090027, |
|
"rewards/rejected": -2.793015241622925, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.43186180422264875, |
|
"grad_norm": 10.85433893317022, |
|
"learning_rate": 3.5022197717898017e-07, |
|
"logits/chosen": -0.7300796508789062, |
|
"logits/rejected": -0.7841044664382935, |
|
"logps/chosen": -403.67730712890625, |
|
"logps/rejected": -509.16558837890625, |
|
"loss": 0.4465, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7776187658309937, |
|
"rewards/margins": 1.2936731576919556, |
|
"rewards/rejected": -3.07129168510437, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.43426103646833014, |
|
"grad_norm": 9.714614186627546, |
|
"learning_rate": 3.4830026390794633e-07, |
|
"logits/chosen": -0.7365170121192932, |
|
"logits/rejected": -0.7708272337913513, |
|
"logps/chosen": -494.6708984375, |
|
"logps/rejected": -567.5706176757812, |
|
"loss": 0.4518, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.851345419883728, |
|
"rewards/margins": 1.2546782493591309, |
|
"rewards/rejected": -3.1060233116149902, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.43666026871401153, |
|
"grad_norm": 7.553890272409832, |
|
"learning_rate": 3.4637165526394104e-07, |
|
"logits/chosen": -0.7593089938163757, |
|
"logits/rejected": -0.7586512565612793, |
|
"logps/chosen": -378.30596923828125, |
|
"logps/rejected": -479.9606018066406, |
|
"loss": 0.4896, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3854007720947266, |
|
"rewards/margins": 0.8888666033744812, |
|
"rewards/rejected": -2.2742676734924316, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.43905950095969287, |
|
"grad_norm": 8.674418910529546, |
|
"learning_rate": 3.4443628653119814e-07, |
|
"logits/chosen": -0.6358439922332764, |
|
"logits/rejected": -0.6491922736167908, |
|
"logps/chosen": -425.07568359375, |
|
"logps/rejected": -638.0093994140625, |
|
"loss": 0.4962, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3985021114349365, |
|
"rewards/margins": 1.68317449092865, |
|
"rewards/rejected": -3.081676959991455, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.44145873320537427, |
|
"grad_norm": 9.311707071153172, |
|
"learning_rate": 3.424942934681453e-07, |
|
"logits/chosen": -0.7188653349876404, |
|
"logits/rejected": -0.7695431709289551, |
|
"logps/chosen": -372.1746520996094, |
|
"logps/rejected": -530.4584350585938, |
|
"loss": 0.4715, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.1345326900482178, |
|
"rewards/margins": 1.6814968585968018, |
|
"rewards/rejected": -2.8160295486450195, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.44385796545105566, |
|
"grad_norm": 12.609351544952487, |
|
"learning_rate": 3.405458122978804e-07, |
|
"logits/chosen": -0.7544587850570679, |
|
"logits/rejected": -0.7642985582351685, |
|
"logps/chosen": -424.3369140625, |
|
"logps/rejected": -506.177978515625, |
|
"loss": 0.4883, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.232379674911499, |
|
"rewards/margins": 1.0791466236114502, |
|
"rewards/rejected": -2.3115265369415283, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.44625719769673705, |
|
"grad_norm": 10.750691343448636, |
|
"learning_rate": 3.3859097969861633e-07, |
|
"logits/chosen": -0.6986510157585144, |
|
"logits/rejected": -0.6798522472381592, |
|
"logps/chosen": -440.4366149902344, |
|
"logps/rejected": -503.4756774902344, |
|
"loss": 0.4636, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.483278512954712, |
|
"rewards/margins": 1.0428184270858765, |
|
"rewards/rejected": -2.526096820831299, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.44865642994241844, |
|
"grad_norm": 10.209697273000886, |
|
"learning_rate": 3.366299327940936e-07, |
|
"logits/chosen": -0.7111358046531677, |
|
"logits/rejected": -0.6854827404022217, |
|
"logps/chosen": -484.577392578125, |
|
"logps/rejected": -609.7952880859375, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7832101583480835, |
|
"rewards/margins": 1.0032484531402588, |
|
"rewards/rejected": -2.7864584922790527, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.4510556621880998, |
|
"grad_norm": 10.739925690898465, |
|
"learning_rate": 3.3466280914396117e-07, |
|
"logits/chosen": -0.6824935078620911, |
|
"logits/rejected": -0.7022455334663391, |
|
"logps/chosen": -411.59375, |
|
"logps/rejected": -576.3400268554688, |
|
"loss": 0.4636, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5962189435958862, |
|
"rewards/margins": 1.4433557987213135, |
|
"rewards/rejected": -3.0395750999450684, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.4534548944337812, |
|
"grad_norm": 15.164146543864618, |
|
"learning_rate": 3.326897467341281e-07, |
|
"logits/chosen": -0.7167527079582214, |
|
"logits/rejected": -0.74461829662323, |
|
"logps/chosen": -349.6092529296875, |
|
"logps/rejected": -494.6625061035156, |
|
"loss": 0.4884, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3855880498886108, |
|
"rewards/margins": 1.2441637516021729, |
|
"rewards/rejected": -2.629751682281494, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.45585412667946257, |
|
"grad_norm": 12.377059840959534, |
|
"learning_rate": 3.3071088396708335e-07, |
|
"logits/chosen": -0.7990108132362366, |
|
"logits/rejected": -0.7646141052246094, |
|
"logps/chosen": -343.55450439453125, |
|
"logps/rejected": -517.2105712890625, |
|
"loss": 0.489, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3536632061004639, |
|
"rewards/margins": 1.533830165863037, |
|
"rewards/rejected": -2.88749361038208, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.45825335892514396, |
|
"grad_norm": 11.613569882909587, |
|
"learning_rate": 3.2872635965218824e-07, |
|
"logits/chosen": -0.5556444525718689, |
|
"logits/rejected": -0.5901921391487122, |
|
"logps/chosen": -421.22119140625, |
|
"logps/rejected": -584.3763427734375, |
|
"loss": 0.5208, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6016429662704468, |
|
"rewards/margins": 1.2570686340332031, |
|
"rewards/rejected": -2.8587117195129395, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.46065259117082535, |
|
"grad_norm": 9.955391170965928, |
|
"learning_rate": 3.2673631299593905e-07, |
|
"logits/chosen": -0.658098578453064, |
|
"logits/rejected": -0.7359055876731873, |
|
"logps/chosen": -450.887451171875, |
|
"logps/rejected": -559.0867919921875, |
|
"loss": 0.4893, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6648181676864624, |
|
"rewards/margins": 1.1666862964630127, |
|
"rewards/rejected": -2.8315043449401855, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.4630518234165067, |
|
"grad_norm": 11.201011724113032, |
|
"learning_rate": 3.247408835922024e-07, |
|
"logits/chosen": -0.6952091455459595, |
|
"logits/rejected": -0.6913400292396545, |
|
"logps/chosen": -496.1107482910156, |
|
"logps/rejected": -632.0260009765625, |
|
"loss": 0.4992, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.7909389734268188, |
|
"rewards/margins": 1.2750978469848633, |
|
"rewards/rejected": -3.0660367012023926, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.4654510556621881, |
|
"grad_norm": 9.304186497298465, |
|
"learning_rate": 3.2274021141242306e-07, |
|
"logits/chosen": -0.6521833539009094, |
|
"logits/rejected": -0.6770762205123901, |
|
"logps/chosen": -436.94500732421875, |
|
"logps/rejected": -563.1138916015625, |
|
"loss": 0.452, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.661968469619751, |
|
"rewards/margins": 1.2037100791931152, |
|
"rewards/rejected": -2.865678548812866, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.4678502879078695, |
|
"grad_norm": 14.57039869779609, |
|
"learning_rate": 3.2073443679580613e-07, |
|
"logits/chosen": -0.710097074508667, |
|
"logits/rejected": -0.7277542352676392, |
|
"logps/chosen": -424.69091796875, |
|
"logps/rejected": -525.2821044921875, |
|
"loss": 0.4701, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3532383441925049, |
|
"rewards/margins": 0.9686284065246582, |
|
"rewards/rejected": -2.321866750717163, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.47024952015355087, |
|
"grad_norm": 8.6232802821884, |
|
"learning_rate": 3.1872370043947194e-07, |
|
"logits/chosen": -0.7797672152519226, |
|
"logits/rejected": -0.8235223889350891, |
|
"logps/chosen": -389.4978332519531, |
|
"logps/rejected": -578.5491333007812, |
|
"loss": 0.4749, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.0760996341705322, |
|
"rewards/margins": 1.9451076984405518, |
|
"rewards/rejected": -3.021207809448242, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.47264875239923226, |
|
"grad_norm": 10.70725522123182, |
|
"learning_rate": 3.167081433885874e-07, |
|
"logits/chosen": -0.5467191338539124, |
|
"logits/rejected": -0.573945164680481, |
|
"logps/chosen": -495.3836975097656, |
|
"logps/rejected": -635.158203125, |
|
"loss": 0.44, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5290435552597046, |
|
"rewards/margins": 0.9863445162773132, |
|
"rewards/rejected": -2.515388011932373, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.4750479846449136, |
|
"grad_norm": 12.578146181704888, |
|
"learning_rate": 3.14687907026472e-07, |
|
"logits/chosen": -0.6268805265426636, |
|
"logits/rejected": -0.6679359674453735, |
|
"logps/chosen": -384.5243835449219, |
|
"logps/rejected": -525.4344482421875, |
|
"loss": 0.4628, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.4070085287094116, |
|
"rewards/margins": 1.292812705039978, |
|
"rewards/rejected": -2.6998214721679688, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.477447216890595, |
|
"grad_norm": 9.948295850627304, |
|
"learning_rate": 3.126631330646801e-07, |
|
"logits/chosen": -0.635405421257019, |
|
"logits/rejected": -0.6675763726234436, |
|
"logps/chosen": -497.21466064453125, |
|
"logps/rejected": -574.4089965820312, |
|
"loss": 0.4961, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.751558542251587, |
|
"rewards/margins": 0.7816027402877808, |
|
"rewards/rejected": -2.533161163330078, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.4798464491362764, |
|
"grad_norm": 10.378623964749199, |
|
"learning_rate": 3.1063396353306097e-07, |
|
"logits/chosen": -0.698126494884491, |
|
"logits/rejected": -0.7467767000198364, |
|
"logps/chosen": -417.2310485839844, |
|
"logps/rejected": -496.03173828125, |
|
"loss": 0.4627, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2678186893463135, |
|
"rewards/margins": 1.3022050857543945, |
|
"rewards/rejected": -2.570024013519287, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4798464491362764, |
|
"eval_logits/chosen": -0.704944372177124, |
|
"eval_logits/rejected": -0.7196417450904846, |
|
"eval_logps/chosen": -413.8262023925781, |
|
"eval_logps/rejected": -571.4524536132812, |
|
"eval_loss": 0.46414685249328613, |
|
"eval_rewards/accuracies": 0.8035714030265808, |
|
"eval_rewards/chosen": -1.4788715839385986, |
|
"eval_rewards/margins": 1.4836254119873047, |
|
"eval_rewards/rejected": -2.9624969959259033, |
|
"eval_runtime": 234.411, |
|
"eval_samples_per_second": 19.031, |
|
"eval_steps_per_second": 0.299, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4822456813819578, |
|
"grad_norm": 10.645209258851928, |
|
"learning_rate": 3.0860054076979535e-07, |
|
"logits/chosen": -0.6905248761177063, |
|
"logits/rejected": -0.681174635887146, |
|
"logps/chosen": -447.33978271484375, |
|
"logps/rejected": -541.9505004882812, |
|
"loss": 0.4867, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.6291488409042358, |
|
"rewards/margins": 1.1908628940582275, |
|
"rewards/rejected": -2.820011615753174, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.4846449136276392, |
|
"grad_norm": 12.91171406230696, |
|
"learning_rate": 3.065630074114115e-07, |
|
"logits/chosen": -0.7098181843757629, |
|
"logits/rejected": -0.7330686450004578, |
|
"logps/chosen": -465.2738342285156, |
|
"logps/rejected": -591.0724487304688, |
|
"loss": 0.4784, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6661344766616821, |
|
"rewards/margins": 1.6645488739013672, |
|
"rewards/rejected": -3.330683469772339, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.4870441458733205, |
|
"grad_norm": 11.6716276344079, |
|
"learning_rate": 3.0452150638277947e-07, |
|
"logits/chosen": -0.6548662185668945, |
|
"logits/rejected": -0.6267608404159546, |
|
"logps/chosen": -399.82501220703125, |
|
"logps/rejected": -510.04425048828125, |
|
"loss": 0.5167, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6882944107055664, |
|
"rewards/margins": 0.975196361541748, |
|
"rewards/rejected": -2.6634907722473145, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.4894433781190019, |
|
"grad_norm": 8.381022963011278, |
|
"learning_rate": 3.024761808870856e-07, |
|
"logits/chosen": -0.7615236043930054, |
|
"logits/rejected": -0.7732762098312378, |
|
"logps/chosen": -385.1295471191406, |
|
"logps/rejected": -582.0843505859375, |
|
"loss": 0.462, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.4341049194335938, |
|
"rewards/margins": 1.9455454349517822, |
|
"rewards/rejected": -3.379650592803955, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.4918426103646833, |
|
"grad_norm": 14.700023421409464, |
|
"learning_rate": 3.004271743957875e-07, |
|
"logits/chosen": -0.6434902548789978, |
|
"logits/rejected": -0.6391478180885315, |
|
"logps/chosen": -473.94256591796875, |
|
"logps/rejected": -567.1292114257812, |
|
"loss": 0.5136, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.904388666152954, |
|
"rewards/margins": 0.7521687746047974, |
|
"rewards/rejected": -2.656557559967041, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.4942418426103647, |
|
"grad_norm": 9.948082556212318, |
|
"learning_rate": 2.983746306385499e-07, |
|
"logits/chosen": -0.8040687441825867, |
|
"logits/rejected": -0.750956118106842, |
|
"logps/chosen": -404.974365234375, |
|
"logps/rejected": -571.2884521484375, |
|
"loss": 0.4606, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5249295234680176, |
|
"rewards/margins": 1.4364125728607178, |
|
"rewards/rejected": -2.9613418579101562, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.4966410748560461, |
|
"grad_norm": 10.22438537397434, |
|
"learning_rate": 2.963186935931628e-07, |
|
"logits/chosen": -0.7241095304489136, |
|
"logits/rejected": -0.6997084021568298, |
|
"logps/chosen": -448.89300537109375, |
|
"logps/rejected": -555.0241088867188, |
|
"loss": 0.4714, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5664496421813965, |
|
"rewards/margins": 1.0874204635620117, |
|
"rewards/rejected": -2.653870105743408, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.4990403071017274, |
|
"grad_norm": 8.109581248601415, |
|
"learning_rate": 2.9425950747544176e-07, |
|
"logits/chosen": -0.641141414642334, |
|
"logits/rejected": -0.7013910412788391, |
|
"logps/chosen": -510.6063537597656, |
|
"logps/rejected": -640.6654052734375, |
|
"loss": 0.4353, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8512099981307983, |
|
"rewards/margins": 1.588141679763794, |
|
"rewards/rejected": -3.4393515586853027, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5014395393474088, |
|
"grad_norm": 12.084926653784903, |
|
"learning_rate": 2.921972167291119e-07, |
|
"logits/chosen": -0.7068333625793457, |
|
"logits/rejected": -0.7458164691925049, |
|
"logps/chosen": -449.1051330566406, |
|
"logps/rejected": -605.0886840820312, |
|
"loss": 0.4457, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4715017080307007, |
|
"rewards/margins": 1.2597967386245728, |
|
"rewards/rejected": -2.7312982082366943, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.5038387715930902, |
|
"grad_norm": 10.473433486601849, |
|
"learning_rate": 2.9013196601567567e-07, |
|
"logits/chosen": -0.672719419002533, |
|
"logits/rejected": -0.6805760264396667, |
|
"logps/chosen": -399.2666931152344, |
|
"logps/rejected": -524.7140502929688, |
|
"loss": 0.5356, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.396222472190857, |
|
"rewards/margins": 1.1300289630889893, |
|
"rewards/rejected": -2.5262515544891357, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5062380038387716, |
|
"grad_norm": 8.259116612360256, |
|
"learning_rate": 2.8806390020426555e-07, |
|
"logits/chosen": -0.7717374563217163, |
|
"logits/rejected": -0.7531148195266724, |
|
"logps/chosen": -406.16351318359375, |
|
"logps/rejected": -557.4243774414062, |
|
"loss": 0.4464, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2984120845794678, |
|
"rewards/margins": 1.5021198987960815, |
|
"rewards/rejected": -2.800532102584839, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.508637236084453, |
|
"grad_norm": 12.480170050902744, |
|
"learning_rate": 2.8599316436148187e-07, |
|
"logits/chosen": -0.6736984252929688, |
|
"logits/rejected": -0.6641879081726074, |
|
"logps/chosen": -438.26092529296875, |
|
"logps/rejected": -534.37890625, |
|
"loss": 0.4671, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.752355933189392, |
|
"rewards/margins": 0.9608189463615417, |
|
"rewards/rejected": -2.713174819946289, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.5110364683301344, |
|
"grad_norm": 13.113194375488565, |
|
"learning_rate": 2.8391990374121723e-07, |
|
"logits/chosen": -0.7215433120727539, |
|
"logits/rejected": -0.7145394086837769, |
|
"logps/chosen": -429.87689208984375, |
|
"logps/rejected": -588.1317138671875, |
|
"loss": 0.5056, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8144502639770508, |
|
"rewards/margins": 1.295245885848999, |
|
"rewards/rejected": -3.10969614982605, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5134357005758158, |
|
"grad_norm": 10.389514519832314, |
|
"learning_rate": 2.818442637744669e-07, |
|
"logits/chosen": -0.7280897498130798, |
|
"logits/rejected": -0.7563216686248779, |
|
"logps/chosen": -451.57037353515625, |
|
"logps/rejected": -583.136962890625, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.9117393493652344, |
|
"rewards/margins": 1.2607026100158691, |
|
"rewards/rejected": -3.1724419593811035, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.5158349328214972, |
|
"grad_norm": 9.893178585305161, |
|
"learning_rate": 2.797663900591284e-07, |
|
"logits/chosen": -0.7491916418075562, |
|
"logits/rejected": -0.7631763815879822, |
|
"logps/chosen": -454.49542236328125, |
|
"logps/rejected": -534.6981201171875, |
|
"loss": 0.4507, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7530666589736938, |
|
"rewards/margins": 1.1452219486236572, |
|
"rewards/rejected": -2.8982887268066406, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5182341650671785, |
|
"grad_norm": 10.78966013478198, |
|
"learning_rate": 2.776864283497874e-07, |
|
"logits/chosen": -0.7122198343276978, |
|
"logits/rejected": -0.7713319063186646, |
|
"logps/chosen": -410.6304626464844, |
|
"logps/rejected": -599.1976928710938, |
|
"loss": 0.479, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6474449634552002, |
|
"rewards/margins": 1.9179481267929077, |
|
"rewards/rejected": -3.5653927326202393, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5206333973128598, |
|
"grad_norm": 7.331401403742752, |
|
"learning_rate": 2.756045245474943e-07, |
|
"logits/chosen": -0.672527551651001, |
|
"logits/rejected": -0.6674192547798157, |
|
"logps/chosen": -429.77374267578125, |
|
"logps/rejected": -540.6340942382812, |
|
"loss": 0.4662, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4047420024871826, |
|
"rewards/margins": 0.8396526575088501, |
|
"rewards/rejected": -2.2443947792053223, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5230326295585412, |
|
"grad_norm": 11.47735860978489, |
|
"learning_rate": 2.7352082468952977e-07, |
|
"logits/chosen": -0.7144309282302856, |
|
"logits/rejected": -0.7627060413360596, |
|
"logps/chosen": -419.7461853027344, |
|
"logps/rejected": -631.6406860351562, |
|
"loss": 0.5053, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7272449731826782, |
|
"rewards/margins": 1.802207589149475, |
|
"rewards/rejected": -3.529452085494995, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.5254318618042226, |
|
"grad_norm": 12.66627984144212, |
|
"learning_rate": 2.7143547493916e-07, |
|
"logits/chosen": -0.7830231785774231, |
|
"logits/rejected": -0.7730289697647095, |
|
"logps/chosen": -392.6502380371094, |
|
"logps/rejected": -612.9030151367188, |
|
"loss": 0.4365, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.296020746231079, |
|
"rewards/margins": 2.1511874198913574, |
|
"rewards/rejected": -3.4472084045410156, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.527831094049904, |
|
"grad_norm": 10.665323165924152, |
|
"learning_rate": 2.693486215753853e-07, |
|
"logits/chosen": -0.7580839395523071, |
|
"logits/rejected": -0.778628945350647, |
|
"logps/chosen": -419.266845703125, |
|
"logps/rejected": -601.0218505859375, |
|
"loss": 0.4777, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.676492691040039, |
|
"rewards/margins": 1.9639472961425781, |
|
"rewards/rejected": -3.640439987182617, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5302303262955854, |
|
"grad_norm": 15.168171864262504, |
|
"learning_rate": 2.6726041098267805e-07, |
|
"logits/chosen": -0.8083688616752625, |
|
"logits/rejected": -0.835811972618103, |
|
"logps/chosen": -480.6859436035156, |
|
"logps/rejected": -533.2108764648438, |
|
"loss": 0.4966, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.826184868812561, |
|
"rewards/margins": 0.8893225789070129, |
|
"rewards/rejected": -2.7155075073242188, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5326295585412668, |
|
"grad_norm": 13.565039831991228, |
|
"learning_rate": 2.6517098964071507e-07, |
|
"logits/chosen": -0.6329632997512817, |
|
"logits/rejected": -0.658043384552002, |
|
"logps/chosen": -456.0284118652344, |
|
"logps/rejected": -526.2221069335938, |
|
"loss": 0.5196, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.818780541419983, |
|
"rewards/margins": 0.5884894132614136, |
|
"rewards/rejected": -2.4072699546813965, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5350287907869482, |
|
"grad_norm": 11.227729437674412, |
|
"learning_rate": 2.630805041141023e-07, |
|
"logits/chosen": -0.7310200333595276, |
|
"logits/rejected": -0.7426483035087585, |
|
"logps/chosen": -385.03326416015625, |
|
"logps/rejected": -597.54541015625, |
|
"loss": 0.4777, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4363114833831787, |
|
"rewards/margins": 1.9664011001586914, |
|
"rewards/rejected": -3.402712345123291, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5374280230326296, |
|
"grad_norm": 12.301180793094087, |
|
"learning_rate": 2.609891010420941e-07, |
|
"logits/chosen": -0.756328284740448, |
|
"logits/rejected": -0.731390118598938, |
|
"logps/chosen": -422.31524658203125, |
|
"logps/rejected": -570.0447998046875, |
|
"loss": 0.4578, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5025454759597778, |
|
"rewards/margins": 1.4451904296875, |
|
"rewards/rejected": -2.9477362632751465, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.539827255278311, |
|
"grad_norm": 11.203287249603568, |
|
"learning_rate": 2.5889692712830674e-07, |
|
"logits/chosen": -0.7012640237808228, |
|
"logits/rejected": -0.734104335308075, |
|
"logps/chosen": -366.77569580078125, |
|
"logps/rejected": -487.832763671875, |
|
"loss": 0.452, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.3068386316299438, |
|
"rewards/margins": 1.2463617324829102, |
|
"rewards/rejected": -2.5532002449035645, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5422264875239923, |
|
"grad_norm": 11.401920927705493, |
|
"learning_rate": 2.5680412913042843e-07, |
|
"logits/chosen": -0.7200027704238892, |
|
"logits/rejected": -0.7047854661941528, |
|
"logps/chosen": -408.1886291503906, |
|
"logps/rejected": -583.43896484375, |
|
"loss": 0.4436, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.5970829725265503, |
|
"rewards/margins": 1.6908365488052368, |
|
"rewards/rejected": -3.2879199981689453, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5446257197696737, |
|
"grad_norm": 13.14986787185228, |
|
"learning_rate": 2.5471085384992404e-07, |
|
"logits/chosen": -0.7282342910766602, |
|
"logits/rejected": -0.7267628312110901, |
|
"logps/chosen": -395.9429016113281, |
|
"logps/rejected": -668.2164916992188, |
|
"loss": 0.4305, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5156352519989014, |
|
"rewards/margins": 2.5282320976257324, |
|
"rewards/rejected": -4.043867588043213, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5470249520153551, |
|
"grad_norm": 9.395110944586639, |
|
"learning_rate": 2.526172481217381e-07, |
|
"logits/chosen": -0.6741994619369507, |
|
"logits/rejected": -0.6505922675132751, |
|
"logps/chosen": -426.7835388183594, |
|
"logps/rejected": -573.0272827148438, |
|
"loss": 0.4759, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.0611157417297363, |
|
"rewards/margins": 1.2577565908432007, |
|
"rewards/rejected": -3.3188719749450684, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5494241842610365, |
|
"grad_norm": 13.157814593299042, |
|
"learning_rate": 2.5052345880399456e-07, |
|
"logits/chosen": -0.727673351764679, |
|
"logits/rejected": -0.7585957050323486, |
|
"logps/chosen": -428.2842712402344, |
|
"logps/rejected": -550.08642578125, |
|
"loss": 0.4445, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9122216701507568, |
|
"rewards/margins": 1.1558340787887573, |
|
"rewards/rejected": -3.0680556297302246, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.5518234165067178, |
|
"grad_norm": 11.84384992024014, |
|
"learning_rate": 2.4842963276769555e-07, |
|
"logits/chosen": -0.6177406311035156, |
|
"logits/rejected": -0.5921697616577148, |
|
"logps/chosen": -425.8565368652344, |
|
"logps/rejected": -591.1513061523438, |
|
"loss": 0.4711, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9242675304412842, |
|
"rewards/margins": 1.2209120988845825, |
|
"rewards/rejected": -3.1451797485351562, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5542226487523992, |
|
"grad_norm": 10.558621692008028, |
|
"learning_rate": 2.463359168864189e-07, |
|
"logits/chosen": -0.6363598108291626, |
|
"logits/rejected": -0.7210627794265747, |
|
"logps/chosen": -480.21820068359375, |
|
"logps/rejected": -575.8844604492188, |
|
"loss": 0.4867, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.584695816040039, |
|
"rewards/margins": 1.3920116424560547, |
|
"rewards/rejected": -2.9767074584960938, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.5566218809980806, |
|
"grad_norm": 13.809223972324538, |
|
"learning_rate": 2.4424245802601555e-07, |
|
"logits/chosen": -0.7176483869552612, |
|
"logits/rejected": -0.7233623266220093, |
|
"logps/chosen": -392.2666931152344, |
|
"logps/rejected": -544.047607421875, |
|
"loss": 0.4604, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4483287334442139, |
|
"rewards/margins": 0.9412357211112976, |
|
"rewards/rejected": -2.3895645141601562, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.559021113243762, |
|
"grad_norm": 10.320795589655923, |
|
"learning_rate": 2.421494030343072e-07, |
|
"logits/chosen": -0.5995772480964661, |
|
"logits/rejected": -0.665002703666687, |
|
"logps/chosen": -429.8282165527344, |
|
"logps/rejected": -476.87384033203125, |
|
"loss": 0.5063, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5427360534667969, |
|
"rewards/margins": 1.0240482091903687, |
|
"rewards/rejected": -2.566784143447876, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.5614203454894434, |
|
"grad_norm": 11.374067564936935, |
|
"learning_rate": 2.400568987307861e-07, |
|
"logits/chosen": -0.6323488354682922, |
|
"logits/rejected": -0.6464725732803345, |
|
"logps/chosen": -405.65814208984375, |
|
"logps/rejected": -461.2422790527344, |
|
"loss": 0.4381, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.6847827434539795, |
|
"rewards/margins": 0.6106036305427551, |
|
"rewards/rejected": -2.29538631439209, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.5638195777351248, |
|
"grad_norm": 11.798719511944995, |
|
"learning_rate": 2.379650918963156e-07, |
|
"logits/chosen": -0.7201340198516846, |
|
"logits/rejected": -0.7137752771377563, |
|
"logps/chosen": -407.8214416503906, |
|
"logps/rejected": -557.0501708984375, |
|
"loss": 0.4396, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9475882053375244, |
|
"rewards/margins": 1.3403925895690918, |
|
"rewards/rejected": -3.287980556488037, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.5662188099808061, |
|
"grad_norm": 18.10704498970427, |
|
"learning_rate": 2.3587412926283438e-07, |
|
"logits/chosen": -0.7477551698684692, |
|
"logits/rejected": -0.7495108842849731, |
|
"logps/chosen": -487.01788330078125, |
|
"logps/rejected": -621.1641235351562, |
|
"loss": 0.4846, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7702643871307373, |
|
"rewards/margins": 1.6865708827972412, |
|
"rewards/rejected": -3.4568352699279785, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.5686180422264875, |
|
"grad_norm": 9.023431566315685, |
|
"learning_rate": 2.337841575030642e-07, |
|
"logits/chosen": -0.6413623690605164, |
|
"logits/rejected": -0.6585931777954102, |
|
"logps/chosen": -468.298095703125, |
|
"logps/rejected": -593.5641479492188, |
|
"loss": 0.4868, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6977428197860718, |
|
"rewards/margins": 1.104875087738037, |
|
"rewards/rejected": -2.8026180267333984, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.5710172744721689, |
|
"grad_norm": 9.106685136975523, |
|
"learning_rate": 2.316953232202206e-07, |
|
"logits/chosen": -0.6131690740585327, |
|
"logits/rejected": -0.705342710018158, |
|
"logps/chosen": -403.64483642578125, |
|
"logps/rejected": -455.7774353027344, |
|
"loss": 0.4345, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.58620285987854, |
|
"rewards/margins": 1.1645066738128662, |
|
"rewards/rejected": -2.7507095336914062, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.5734165067178503, |
|
"grad_norm": 12.485032451040595, |
|
"learning_rate": 2.2960777293772958e-07, |
|
"logits/chosen": -0.5965815186500549, |
|
"logits/rejected": -0.6691153049468994, |
|
"logps/chosen": -375.786376953125, |
|
"logps/rejected": -546.0467529296875, |
|
"loss": 0.4677, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.4897288084030151, |
|
"rewards/margins": 1.8273935317993164, |
|
"rewards/rejected": -3.3171226978302, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.5758157389635317, |
|
"grad_norm": 9.104242742336105, |
|
"learning_rate": 2.2752165308894974e-07, |
|
"logits/chosen": -0.6820736527442932, |
|
"logits/rejected": -0.6869875192642212, |
|
"logps/chosen": -366.31060791015625, |
|
"logps/rejected": -517.474365234375, |
|
"loss": 0.4591, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6863653659820557, |
|
"rewards/margins": 1.5893397331237793, |
|
"rewards/rejected": -3.275705337524414, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5782149712092131, |
|
"grad_norm": 15.565842828873247, |
|
"learning_rate": 2.254371100069005e-07, |
|
"logits/chosen": -0.6215115189552307, |
|
"logits/rejected": -0.5873704552650452, |
|
"logps/chosen": -400.32440185546875, |
|
"logps/rejected": -548.8357543945312, |
|
"loss": 0.4672, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.3634350299835205, |
|
"rewards/margins": 1.2007476091384888, |
|
"rewards/rejected": -2.564182758331299, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.5806142034548945, |
|
"grad_norm": 10.548601465166293, |
|
"learning_rate": 2.2335428991399725e-07, |
|
"logits/chosen": -0.6729727387428284, |
|
"logits/rejected": -0.6920270919799805, |
|
"logps/chosen": -388.9903564453125, |
|
"logps/rejected": -711.9734497070312, |
|
"loss": 0.4679, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8660037517547607, |
|
"rewards/margins": 3.018165111541748, |
|
"rewards/rejected": -4.884169101715088, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.5830134357005758, |
|
"grad_norm": 9.156657974638184, |
|
"learning_rate": 2.2127333891179458e-07, |
|
"logits/chosen": -0.7091829180717468, |
|
"logits/rejected": -0.7354472875595093, |
|
"logps/chosen": -383.76605224609375, |
|
"logps/rejected": -601.3203735351562, |
|
"loss": 0.48, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6603578329086304, |
|
"rewards/margins": 1.862908959388733, |
|
"rewards/rejected": -3.523266553878784, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.5854126679462572, |
|
"grad_norm": 13.232575343054773, |
|
"learning_rate": 2.1919440297073782e-07, |
|
"logits/chosen": -0.7067408561706543, |
|
"logits/rejected": -0.7342425584793091, |
|
"logps/chosen": -383.7411804199219, |
|
"logps/rejected": -589.1730346679688, |
|
"loss": 0.4993, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7060611248016357, |
|
"rewards/margins": 1.9039154052734375, |
|
"rewards/rejected": -3.609976291656494, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.5878119001919386, |
|
"grad_norm": 9.071189608456551, |
|
"learning_rate": 2.1711762791992368e-07, |
|
"logits/chosen": -0.6443785429000854, |
|
"logits/rejected": -0.6465337872505188, |
|
"logps/chosen": -449.90753173828125, |
|
"logps/rejected": -567.1222534179688, |
|
"loss": 0.4979, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4745603799819946, |
|
"rewards/margins": 1.4671189785003662, |
|
"rewards/rejected": -2.941678762435913, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.5902111324376199, |
|
"grad_norm": 10.300854259670189, |
|
"learning_rate": 2.1504315943687114e-07, |
|
"logits/chosen": -0.7359960675239563, |
|
"logits/rejected": -0.72270667552948, |
|
"logps/chosen": -403.669189453125, |
|
"logps/rejected": -606.3935546875, |
|
"loss": 0.4464, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.6255872249603271, |
|
"rewards/margins": 1.5683765411376953, |
|
"rewards/rejected": -3.1939637660980225, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.5926103646833013, |
|
"grad_norm": 12.922478808451812, |
|
"learning_rate": 2.1297114303730248e-07, |
|
"logits/chosen": -0.6276537775993347, |
|
"logits/rejected": -0.5880897045135498, |
|
"logps/chosen": -394.9385986328125, |
|
"logps/rejected": -579.1409912109375, |
|
"loss": 0.5033, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5723788738250732, |
|
"rewards/margins": 1.3019646406173706, |
|
"rewards/rejected": -2.8743433952331543, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.5950095969289827, |
|
"grad_norm": 11.565229978778513, |
|
"learning_rate": 2.1090172406493616e-07, |
|
"logits/chosen": -0.6361690163612366, |
|
"logits/rejected": -0.6227170825004578, |
|
"logps/chosen": -399.3015441894531, |
|
"logps/rejected": -556.216064453125, |
|
"loss": 0.4182, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.5578765869140625, |
|
"rewards/margins": 1.3979572057724, |
|
"rewards/rejected": -2.955833911895752, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.5974088291746641, |
|
"grad_norm": 13.512372450905238, |
|
"learning_rate": 2.0883504768129146e-07, |
|
"logits/chosen": -0.7200502157211304, |
|
"logits/rejected": -0.7266454696655273, |
|
"logps/chosen": -463.3838806152344, |
|
"logps/rejected": -626.7127075195312, |
|
"loss": 0.4704, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.8044846057891846, |
|
"rewards/margins": 1.658591628074646, |
|
"rewards/rejected": -3.46307635307312, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.5998080614203455, |
|
"grad_norm": 10.955905269308658, |
|
"learning_rate": 2.0677125885550571e-07, |
|
"logits/chosen": -0.560949444770813, |
|
"logits/rejected": -0.6333897113800049, |
|
"logps/chosen": -404.170654296875, |
|
"logps/rejected": -486.53631591796875, |
|
"loss": 0.4561, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.536853551864624, |
|
"rewards/margins": 1.2563847303390503, |
|
"rewards/rejected": -2.7932381629943848, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6022072936660269, |
|
"grad_norm": 11.614988822427518, |
|
"learning_rate": 2.0471050235416587e-07, |
|
"logits/chosen": -0.6411922574043274, |
|
"logits/rejected": -0.7291480898857117, |
|
"logps/chosen": -442.89910888671875, |
|
"logps/rejected": -545.2685546875, |
|
"loss": 0.438, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.7440217733383179, |
|
"rewards/margins": 1.5008093118667603, |
|
"rewards/rejected": -3.24483060836792, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.6046065259117083, |
|
"grad_norm": 12.607898741358827, |
|
"learning_rate": 2.026529227311532e-07, |
|
"logits/chosen": -0.7110682725906372, |
|
"logits/rejected": -0.7056074142456055, |
|
"logps/chosen": -416.0528259277344, |
|
"logps/rejected": -573.2369384765625, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.9202907085418701, |
|
"rewards/margins": 1.4374290704727173, |
|
"rewards/rejected": -3.357719898223877, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.6070057581573897, |
|
"grad_norm": 10.178927094903639, |
|
"learning_rate": 2.005986643175036e-07, |
|
"logits/chosen": -0.6290922164916992, |
|
"logits/rejected": -0.5829756259918213, |
|
"logps/chosen": -434.39288330078125, |
|
"logps/rejected": -613.2723999023438, |
|
"loss": 0.4025, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4553261995315552, |
|
"rewards/margins": 1.7923997640609741, |
|
"rewards/rejected": -3.2477259635925293, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6094049904030711, |
|
"grad_norm": 13.408248580673138, |
|
"learning_rate": 1.9854787121128328e-07, |
|
"logits/chosen": -0.6658229231834412, |
|
"logits/rejected": -0.7100438475608826, |
|
"logps/chosen": -389.3979797363281, |
|
"logps/rejected": -438.6206970214844, |
|
"loss": 0.4888, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6204715967178345, |
|
"rewards/margins": 0.9446170926094055, |
|
"rewards/rejected": -2.5650887489318848, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6118042226487524, |
|
"grad_norm": 12.880050575259938, |
|
"learning_rate": 1.9650068726748106e-07, |
|
"logits/chosen": -0.6123485565185547, |
|
"logits/rejected": -0.6827987432479858, |
|
"logps/chosen": -440.87896728515625, |
|
"logps/rejected": -584.0040283203125, |
|
"loss": 0.4767, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.731406569480896, |
|
"rewards/margins": 1.396422028541565, |
|
"rewards/rejected": -3.127828359603882, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6142034548944337, |
|
"grad_norm": 10.788734052701793, |
|
"learning_rate": 1.9445725608791718e-07, |
|
"logits/chosen": -0.6031758785247803, |
|
"logits/rejected": -0.648442268371582, |
|
"logps/chosen": -400.7000732421875, |
|
"logps/rejected": -660.7904052734375, |
|
"loss": 0.4752, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2926169633865356, |
|
"rewards/margins": 2.534430980682373, |
|
"rewards/rejected": -3.827047824859619, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.6166026871401151, |
|
"grad_norm": 10.567224853092739, |
|
"learning_rate": 1.924177210111705e-07, |
|
"logits/chosen": -0.7051092386245728, |
|
"logits/rejected": -0.7292466759681702, |
|
"logps/chosen": -377.7469787597656, |
|
"logps/rejected": -552.3789672851562, |
|
"loss": 0.4712, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5156134366989136, |
|
"rewards/margins": 1.6158390045166016, |
|
"rewards/rejected": -3.1314525604248047, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.6190019193857965, |
|
"grad_norm": 9.777520636965628, |
|
"learning_rate": 1.9038222510252364e-07, |
|
"logits/chosen": -0.6945359110832214, |
|
"logits/rejected": -0.6680124998092651, |
|
"logps/chosen": -410.309326171875, |
|
"logps/rejected": -502.10760498046875, |
|
"loss": 0.4815, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4556920528411865, |
|
"rewards/margins": 1.0640310049057007, |
|
"rewards/rejected": -2.5197231769561768, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.6214011516314779, |
|
"grad_norm": 11.680219063781859, |
|
"learning_rate": 1.883509111439277e-07, |
|
"logits/chosen": -0.6178931593894958, |
|
"logits/rejected": -0.6295452117919922, |
|
"logps/chosen": -406.61749267578125, |
|
"logps/rejected": -649.4032592773438, |
|
"loss": 0.4905, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6173099279403687, |
|
"rewards/margins": 1.7425906658172607, |
|
"rewards/rejected": -3.359900712966919, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6238003838771593, |
|
"grad_norm": 8.702965805638529, |
|
"learning_rate": 1.8632392162398665e-07, |
|
"logits/chosen": -0.7019624710083008, |
|
"logits/rejected": -0.6865247488021851, |
|
"logps/chosen": -422.4203186035156, |
|
"logps/rejected": -627.749755859375, |
|
"loss": 0.446, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.275315761566162, |
|
"rewards/margins": 2.0367074012756348, |
|
"rewards/rejected": -3.312023639678955, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6261996161228407, |
|
"grad_norm": 10.5146873413561, |
|
"learning_rate": 1.84301398727962e-07, |
|
"logits/chosen": -0.6342155933380127, |
|
"logits/rejected": -0.5797609686851501, |
|
"logps/chosen": -340.15179443359375, |
|
"logps/rejected": -614.032470703125, |
|
"loss": 0.449, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.4568018913269043, |
|
"rewards/margins": 2.3389487266540527, |
|
"rewards/rejected": -3.795750379562378, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6285988483685221, |
|
"grad_norm": 10.737478950061426, |
|
"learning_rate": 1.8228348432779966e-07, |
|
"logits/chosen": -0.7070366740226746, |
|
"logits/rejected": -0.717880129814148, |
|
"logps/chosen": -418.17706298828125, |
|
"logps/rejected": -548.4085083007812, |
|
"loss": 0.501, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8227876424789429, |
|
"rewards/margins": 1.3973190784454346, |
|
"rewards/rejected": -3.220106840133667, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6309980806142035, |
|
"grad_norm": 9.684713099622218, |
|
"learning_rate": 1.8027031997217773e-07, |
|
"logits/chosen": -0.7213168740272522, |
|
"logits/rejected": -0.7527577877044678, |
|
"logps/chosen": -404.2679748535156, |
|
"logps/rejected": -679.9412841796875, |
|
"loss": 0.4013, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9124399423599243, |
|
"rewards/margins": 2.531831741333008, |
|
"rewards/rejected": -4.444272041320801, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6333973128598849, |
|
"grad_norm": 11.280247842445823, |
|
"learning_rate": 1.7826204687657758e-07, |
|
"logits/chosen": -0.6257885098457336, |
|
"logits/rejected": -0.5951186418533325, |
|
"logps/chosen": -457.1598205566406, |
|
"logps/rejected": -517.6695556640625, |
|
"loss": 0.4193, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.6708452701568604, |
|
"rewards/margins": 1.033866286277771, |
|
"rewards/rejected": -2.704711437225342, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6357965451055663, |
|
"grad_norm": 14.057616949353399, |
|
"learning_rate": 1.762588059133781e-07, |
|
"logits/chosen": -0.6142539381980896, |
|
"logits/rejected": -0.6413928270339966, |
|
"logps/chosen": -473.72979736328125, |
|
"logps/rejected": -600.7677001953125, |
|
"loss": 0.4437, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.7104568481445312, |
|
"rewards/margins": 1.6506239175796509, |
|
"rewards/rejected": -3.3610808849334717, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.6381957773512476, |
|
"grad_norm": 10.007284775964992, |
|
"learning_rate": 1.7426073760197406e-07, |
|
"logits/chosen": -0.768576979637146, |
|
"logits/rejected": -0.7556449174880981, |
|
"logps/chosen": -412.39984130859375, |
|
"logps/rejected": -669.0284423828125, |
|
"loss": 0.4858, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.6826118230819702, |
|
"rewards/margins": 2.168964147567749, |
|
"rewards/rejected": -3.8515758514404297, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6405950095969289, |
|
"grad_norm": 8.96835406227991, |
|
"learning_rate": 1.7226798209891935e-07, |
|
"logits/chosen": -0.583341658115387, |
|
"logits/rejected": -0.6630910038948059, |
|
"logps/chosen": -434.9918518066406, |
|
"logps/rejected": -545.5853271484375, |
|
"loss": 0.4433, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.7140676975250244, |
|
"rewards/margins": 1.6664565801620483, |
|
"rewards/rejected": -3.380524158477783, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.6429942418426103, |
|
"grad_norm": 10.326630103018084, |
|
"learning_rate": 1.7028067918809535e-07, |
|
"logits/chosen": -0.6508952379226685, |
|
"logits/rejected": -0.6744917631149292, |
|
"logps/chosen": -381.95367431640625, |
|
"logps/rejected": -674.4815673828125, |
|
"loss": 0.4495, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.5953381061553955, |
|
"rewards/margins": 2.39109468460083, |
|
"rewards/rejected": -3.9864323139190674, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.6453934740882917, |
|
"grad_norm": 12.773425150047508, |
|
"learning_rate": 1.6829896827090584e-07, |
|
"logits/chosen": -0.7672047019004822, |
|
"logits/rejected": -0.7807837724685669, |
|
"logps/chosen": -420.0484313964844, |
|
"logps/rejected": -511.435302734375, |
|
"loss": 0.4804, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.6788091659545898, |
|
"rewards/margins": 1.1816326379776, |
|
"rewards/rejected": -2.8604416847229004, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.6477927063339731, |
|
"grad_norm": 8.097344363797557, |
|
"learning_rate": 1.6632298835649844e-07, |
|
"logits/chosen": -0.6450155973434448, |
|
"logits/rejected": -0.6305941343307495, |
|
"logps/chosen": -443.16607666015625, |
|
"logps/rejected": -679.9429321289062, |
|
"loss": 0.4316, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5886855125427246, |
|
"rewards/margins": 1.9584792852401733, |
|
"rewards/rejected": -3.5471644401550293, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.6501919385796545, |
|
"grad_norm": 17.48613369684693, |
|
"learning_rate": 1.6435287805201364e-07, |
|
"logits/chosen": -0.6135013103485107, |
|
"logits/rejected": -0.6035945415496826, |
|
"logps/chosen": -451.90380859375, |
|
"logps/rejected": -557.508544921875, |
|
"loss": 0.4885, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8703639507293701, |
|
"rewards/margins": 1.1114423274993896, |
|
"rewards/rejected": -2.9818062782287598, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.6525911708253359, |
|
"grad_norm": 10.490682463258194, |
|
"learning_rate": 1.6238877555286207e-07, |
|
"logits/chosen": -0.6777503490447998, |
|
"logits/rejected": -0.6797904968261719, |
|
"logps/chosen": -436.258056640625, |
|
"logps/rejected": -608.3687744140625, |
|
"loss": 0.4457, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.422296166419983, |
|
"rewards/margins": 1.5763972997665405, |
|
"rewards/rejected": -2.9986937046051025, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.6549904030710173, |
|
"grad_norm": 12.224882835993649, |
|
"learning_rate": 1.60430818633031e-07, |
|
"logits/chosen": -0.6939619779586792, |
|
"logits/rejected": -0.6975654363632202, |
|
"logps/chosen": -427.3936462402344, |
|
"logps/rejected": -593.472412109375, |
|
"loss": 0.4393, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.530230164527893, |
|
"rewards/margins": 1.6595938205718994, |
|
"rewards/rejected": -3.189823865890503, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.6573896353166987, |
|
"grad_norm": 10.153894922340301, |
|
"learning_rate": 1.5847914463541939e-07, |
|
"logits/chosen": -0.6700283288955688, |
|
"logits/rejected": -0.6896187663078308, |
|
"logps/chosen": -357.0517578125, |
|
"logps/rejected": -522.7760620117188, |
|
"loss": 0.4347, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.513653039932251, |
|
"rewards/margins": 1.3927968740463257, |
|
"rewards/rejected": -2.906449794769287, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.6597888675623801, |
|
"grad_norm": 8.373116906245645, |
|
"learning_rate": 1.5653389046220427e-07, |
|
"logits/chosen": -0.60322105884552, |
|
"logits/rejected": -0.6336368322372437, |
|
"logps/chosen": -365.42059326171875, |
|
"logps/rejected": -530.8365478515625, |
|
"loss": 0.4393, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.241115927696228, |
|
"rewards/margins": 1.4525740146636963, |
|
"rewards/rejected": -2.693690299987793, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.6621880998080614, |
|
"grad_norm": 13.542426256092975, |
|
"learning_rate": 1.545951925652375e-07, |
|
"logits/chosen": -0.6148853302001953, |
|
"logits/rejected": -0.6405919194221497, |
|
"logps/chosen": -476.22589111328125, |
|
"logps/rejected": -573.9639282226562, |
|
"loss": 0.4321, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.4998128414154053, |
|
"rewards/margins": 1.6009342670440674, |
|
"rewards/rejected": -3.1007466316223145, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.6645873320537428, |
|
"grad_norm": 11.419777174024256, |
|
"learning_rate": 1.5266318693647423e-07, |
|
"logits/chosen": -0.6193658113479614, |
|
"logits/rejected": -0.6027348637580872, |
|
"logps/chosen": -428.71490478515625, |
|
"logps/rejected": -535.6744995117188, |
|
"loss": 0.4509, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.558387041091919, |
|
"rewards/margins": 1.1832726001739502, |
|
"rewards/rejected": -2.741659641265869, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.6669865642994242, |
|
"grad_norm": 12.984704637723206, |
|
"learning_rate": 1.5073800909843353e-07, |
|
"logits/chosen": -0.6714409589767456, |
|
"logits/rejected": -0.7126461267471313, |
|
"logps/chosen": -428.8514099121094, |
|
"logps/rejected": -529.1953735351562, |
|
"loss": 0.4526, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5292034149169922, |
|
"rewards/margins": 1.5168203115463257, |
|
"rewards/rejected": -3.0460238456726074, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.6693857965451055, |
|
"grad_norm": 14.138835125115458, |
|
"learning_rate": 1.488197940946922e-07, |
|
"logits/chosen": -0.6455475687980652, |
|
"logits/rejected": -0.642737090587616, |
|
"logps/chosen": -417.8585510253906, |
|
"logps/rejected": -533.5113525390625, |
|
"loss": 0.4155, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.2823712825775146, |
|
"rewards/margins": 1.770923376083374, |
|
"rewards/rejected": -3.0532946586608887, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.6717850287907869, |
|
"grad_norm": 15.610740173237787, |
|
"learning_rate": 1.4690867648041167e-07, |
|
"logits/chosen": -0.6172278523445129, |
|
"logits/rejected": -0.6776692271232605, |
|
"logps/chosen": -418.36578369140625, |
|
"logps/rejected": -581.3909912109375, |
|
"loss": 0.4866, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4995958805084229, |
|
"rewards/margins": 1.887927770614624, |
|
"rewards/rejected": -3.387523651123047, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.6741842610364683, |
|
"grad_norm": 10.722965313997076, |
|
"learning_rate": 1.4500479031289987e-07, |
|
"logits/chosen": -0.6301898956298828, |
|
"logits/rejected": -0.6822000741958618, |
|
"logps/chosen": -449.79840087890625, |
|
"logps/rejected": -575.036865234375, |
|
"loss": 0.4839, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.575657606124878, |
|
"rewards/margins": 1.3444297313690186, |
|
"rewards/rejected": -2.9200873374938965, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.6765834932821497, |
|
"grad_norm": 10.427262900663766, |
|
"learning_rate": 1.4310826914220747e-07, |
|
"logits/chosen": -0.6417717933654785, |
|
"logits/rejected": -0.6694071888923645, |
|
"logps/chosen": -496.41064453125, |
|
"logps/rejected": -595.4664306640625, |
|
"loss": 0.468, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6847069263458252, |
|
"rewards/margins": 1.2323038578033447, |
|
"rewards/rejected": -2.91701078414917, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.6789827255278311, |
|
"grad_norm": 11.587445881578779, |
|
"learning_rate": 1.412192460017597e-07, |
|
"logits/chosen": -0.6943923234939575, |
|
"logits/rejected": -0.6795603036880493, |
|
"logps/chosen": -427.15155029296875, |
|
"logps/rejected": -583.9935913085938, |
|
"loss": 0.4638, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.664332628250122, |
|
"rewards/margins": 1.5579102039337158, |
|
"rewards/rejected": -3.222242832183838, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.6813819577735125, |
|
"grad_norm": 8.40191068876138, |
|
"learning_rate": 1.3933785339902504e-07, |
|
"logits/chosen": -0.6684115529060364, |
|
"logits/rejected": -0.6200501918792725, |
|
"logps/chosen": -355.61358642578125, |
|
"logps/rejected": -538.16650390625, |
|
"loss": 0.4765, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4830284118652344, |
|
"rewards/margins": 1.320604681968689, |
|
"rewards/rejected": -2.803633213043213, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.6837811900191939, |
|
"grad_norm": 9.705292210107526, |
|
"learning_rate": 1.374642233062197e-07, |
|
"logits/chosen": -0.6299320459365845, |
|
"logits/rejected": -0.6841608285903931, |
|
"logps/chosen": -473.47222900390625, |
|
"logps/rejected": -577.1047973632812, |
|
"loss": 0.4608, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.683821439743042, |
|
"rewards/margins": 1.3992817401885986, |
|
"rewards/rejected": -3.0831027030944824, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.6861804222648752, |
|
"grad_norm": 10.377222974537387, |
|
"learning_rate": 1.355984871510511e-07, |
|
"logits/chosen": -0.6160681247711182, |
|
"logits/rejected": -0.5785273313522339, |
|
"logps/chosen": -480.8841857910156, |
|
"logps/rejected": -627.6201171875, |
|
"loss": 0.4366, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7052888870239258, |
|
"rewards/margins": 1.3676353693008423, |
|
"rewards/rejected": -3.0729241371154785, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.6885796545105566, |
|
"grad_norm": 9.428651099395465, |
|
"learning_rate": 1.3374077580749783e-07, |
|
"logits/chosen": -0.6848248243331909, |
|
"logits/rejected": -0.6872170567512512, |
|
"logps/chosen": -386.19873046875, |
|
"logps/rejected": -546.9776000976562, |
|
"loss": 0.4409, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8132606744766235, |
|
"rewards/margins": 1.4182673692703247, |
|
"rewards/rejected": -3.2315280437469482, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.690978886756238, |
|
"grad_norm": 13.945186305417156, |
|
"learning_rate": 1.3189121958663024e-07, |
|
"logits/chosen": -0.6140165328979492, |
|
"logits/rejected": -0.6878429651260376, |
|
"logps/chosen": -531.7335205078125, |
|
"logps/rejected": -573.6845092773438, |
|
"loss": 0.4699, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.3029494285583496, |
|
"rewards/margins": 0.8266263008117676, |
|
"rewards/rejected": -3.129575490951538, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.6933781190019194, |
|
"grad_norm": 12.851850092870997, |
|
"learning_rate": 1.3004994822746895e-07, |
|
"logits/chosen": -0.7798065543174744, |
|
"logits/rejected": -0.7741595506668091, |
|
"logps/chosen": -430.0048828125, |
|
"logps/rejected": -566.271240234375, |
|
"loss": 0.4687, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7590996026992798, |
|
"rewards/margins": 1.2983884811401367, |
|
"rewards/rejected": -3.057487964630127, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.6957773512476008, |
|
"grad_norm": 12.345371556886562, |
|
"learning_rate": 1.2821709088788434e-07, |
|
"logits/chosen": -0.5772908329963684, |
|
"logits/rejected": -0.6081336140632629, |
|
"logps/chosen": -378.8913879394531, |
|
"logps/rejected": -535.4404907226562, |
|
"loss": 0.4542, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5490992069244385, |
|
"rewards/margins": 1.543939232826233, |
|
"rewards/rejected": -3.093038558959961, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.6981765834932822, |
|
"grad_norm": 14.289160494822747, |
|
"learning_rate": 1.2639277613553736e-07, |
|
"logits/chosen": -0.6734031438827515, |
|
"logits/rejected": -0.6486900448799133, |
|
"logps/chosen": -372.4572448730469, |
|
"logps/rejected": -487.9190979003906, |
|
"loss": 0.461, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.6701858043670654, |
|
"rewards/margins": 1.1198476552963257, |
|
"rewards/rejected": -2.7900338172912598, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.7005758157389635, |
|
"grad_norm": 12.042269042020521, |
|
"learning_rate": 1.2457713193885975e-07, |
|
"logits/chosen": -0.6462276577949524, |
|
"logits/rejected": -0.660896897315979, |
|
"logps/chosen": -339.7335510253906, |
|
"logps/rejected": -547.5769653320312, |
|
"loss": 0.4327, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.608481764793396, |
|
"rewards/margins": 1.6944421529769897, |
|
"rewards/rejected": -3.3029239177703857, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.7029750479846449, |
|
"grad_norm": 15.19569101654986, |
|
"learning_rate": 1.2277028565807838e-07, |
|
"logits/chosen": -0.6471028923988342, |
|
"logits/rejected": -0.6764336824417114, |
|
"logps/chosen": -425.2708435058594, |
|
"logps/rejected": -565.1405639648438, |
|
"loss": 0.4666, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5919914245605469, |
|
"rewards/margins": 1.4726965427398682, |
|
"rewards/rejected": -3.064688205718994, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.7053742802303263, |
|
"grad_norm": 13.971571066599896, |
|
"learning_rate": 1.209723640362815e-07, |
|
"logits/chosen": -0.6739888191223145, |
|
"logits/rejected": -0.6833500266075134, |
|
"logps/chosen": -457.6881408691406, |
|
"logps/rejected": -645.4636840820312, |
|
"loss": 0.5118, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7702935934066772, |
|
"rewards/margins": 1.9555785655975342, |
|
"rewards/rejected": -3.725872039794922, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.7077735124760077, |
|
"grad_norm": 11.688443402207835, |
|
"learning_rate": 1.191834931905277e-07, |
|
"logits/chosen": -0.6156803369522095, |
|
"logits/rejected": -0.6335949897766113, |
|
"logps/chosen": -491.3165588378906, |
|
"logps/rejected": -634.9505004882812, |
|
"loss": 0.4246, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7908356189727783, |
|
"rewards/margins": 1.4069457054138184, |
|
"rewards/rejected": -3.1977813243865967, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.710172744721689, |
|
"grad_norm": 10.926862919127126, |
|
"learning_rate": 1.1740379860299988e-07, |
|
"logits/chosen": -0.6044400334358215, |
|
"logits/rejected": -0.6133986711502075, |
|
"logps/chosen": -445.9336853027344, |
|
"logps/rejected": -599.2249755859375, |
|
"loss": 0.4724, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5493725538253784, |
|
"rewards/margins": 1.346935749053955, |
|
"rewards/rejected": -2.896308183670044, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.7125719769673704, |
|
"grad_norm": 10.901886694528228, |
|
"learning_rate": 1.1563340511220254e-07, |
|
"logits/chosen": -0.6457855105400085, |
|
"logits/rejected": -0.6682008504867554, |
|
"logps/chosen": -491.98797607421875, |
|
"logps/rejected": -631.3724975585938, |
|
"loss": 0.4918, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8513435125350952, |
|
"rewards/margins": 1.5765281915664673, |
|
"rewards/rejected": -3.4278717041015625, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.7149712092130518, |
|
"grad_norm": 10.052694308856596, |
|
"learning_rate": 1.1387243690420556e-07, |
|
"logits/chosen": -0.6152561902999878, |
|
"logits/rejected": -0.6195570230484009, |
|
"logps/chosen": -467.3623962402344, |
|
"logps/rejected": -649.93994140625, |
|
"loss": 0.4614, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5257104635238647, |
|
"rewards/margins": 1.9131603240966797, |
|
"rewards/rejected": -3.438870906829834, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.7173704414587332, |
|
"grad_norm": 13.617310196785702, |
|
"learning_rate": 1.1212101750393235e-07, |
|
"logits/chosen": -0.652159571647644, |
|
"logits/rejected": -0.669161856174469, |
|
"logps/chosen": -421.92620849609375, |
|
"logps/rejected": -561.7144775390625, |
|
"loss": 0.4315, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6268739700317383, |
|
"rewards/margins": 1.6520655155181885, |
|
"rewards/rejected": -3.278939723968506, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.7197696737044146, |
|
"grad_norm": 9.598022401827114, |
|
"learning_rate": 1.1037926976649562e-07, |
|
"logits/chosen": -0.6599806547164917, |
|
"logits/rejected": -0.6797146797180176, |
|
"logps/chosen": -446.956787109375, |
|
"logps/rejected": -640.7252807617188, |
|
"loss": 0.5011, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7980464696884155, |
|
"rewards/margins": 1.6290537118911743, |
|
"rewards/rejected": -3.427100419998169, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.722168905950096, |
|
"grad_norm": 10.802854623400075, |
|
"learning_rate": 1.0864731586857936e-07, |
|
"logits/chosen": -0.5600841045379639, |
|
"logits/rejected": -0.5877747535705566, |
|
"logps/chosen": -460.4256286621094, |
|
"logps/rejected": -603.6989135742188, |
|
"loss": 0.4476, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.7123295068740845, |
|
"rewards/margins": 1.7583147287368774, |
|
"rewards/rejected": -3.470644474029541, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.7245681381957774, |
|
"grad_norm": 10.40502367000872, |
|
"learning_rate": 1.0692527729986839e-07, |
|
"logits/chosen": -0.6589699387550354, |
|
"logits/rejected": -0.6785061955451965, |
|
"logps/chosen": -431.74407958984375, |
|
"logps/rejected": -569.8807373046875, |
|
"loss": 0.4092, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.640982985496521, |
|
"rewards/margins": 1.5366647243499756, |
|
"rewards/rejected": -3.177647829055786, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.7269673704414588, |
|
"grad_norm": 13.119977378434658, |
|
"learning_rate": 1.0521327485452692e-07, |
|
"logits/chosen": -0.5950068831443787, |
|
"logits/rejected": -0.6270568370819092, |
|
"logps/chosen": -422.1985778808594, |
|
"logps/rejected": -560.9178466796875, |
|
"loss": 0.4589, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6238855123519897, |
|
"rewards/margins": 1.5490210056304932, |
|
"rewards/rejected": -3.1729063987731934, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7293666026871402, |
|
"grad_norm": 12.65683765122561, |
|
"learning_rate": 1.0351142862272468e-07, |
|
"logits/chosen": -0.6144478917121887, |
|
"logits/rejected": -0.673326849937439, |
|
"logps/chosen": -397.64007568359375, |
|
"logps/rejected": -620.5786743164062, |
|
"loss": 0.4609, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.721166968345642, |
|
"rewards/margins": 2.2845423221588135, |
|
"rewards/rejected": -4.005709648132324, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7317658349328215, |
|
"grad_norm": 12.041290560966866, |
|
"learning_rate": 1.0181985798221343e-07, |
|
"logits/chosen": -0.550287663936615, |
|
"logits/rejected": -0.5756568908691406, |
|
"logps/chosen": -453.2012634277344, |
|
"logps/rejected": -650.4305419921875, |
|
"loss": 0.5115, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7032783031463623, |
|
"rewards/margins": 1.8229873180389404, |
|
"rewards/rejected": -3.5262656211853027, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.7341650671785028, |
|
"grad_norm": 13.307257346339915, |
|
"learning_rate": 1.0013868158995329e-07, |
|
"logits/chosen": -0.5099418759346008, |
|
"logits/rejected": -0.5463215112686157, |
|
"logps/chosen": -423.4984436035156, |
|
"logps/rejected": -534.3104248046875, |
|
"loss": 0.488, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6515445709228516, |
|
"rewards/margins": 1.3121881484985352, |
|
"rewards/rejected": -2.9637324810028076, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.7365642994241842, |
|
"grad_norm": 10.351075268333476, |
|
"learning_rate": 9.84680173737887e-08, |
|
"logits/chosen": -0.6528457403182983, |
|
"logits/rejected": -0.6615931987762451, |
|
"logps/chosen": -444.66717529296875, |
|
"logps/rejected": -533.7962646484375, |
|
"loss": 0.4646, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.675614595413208, |
|
"rewards/margins": 1.2561010122299194, |
|
"rewards/rejected": -2.931715488433838, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.7389635316698656, |
|
"grad_norm": 9.443696454315829, |
|
"learning_rate": 9.680798252417713e-08, |
|
"logits/chosen": -0.6959069967269897, |
|
"logits/rejected": -0.7350667715072632, |
|
"logps/chosen": -372.649658203125, |
|
"logps/rejected": -555.9948120117188, |
|
"loss": 0.4445, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.5621325969696045, |
|
"rewards/margins": 1.445049524307251, |
|
"rewards/rejected": -3.0071818828582764, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.741362763915547, |
|
"grad_norm": 11.47296651628307, |
|
"learning_rate": 9.515869348596808e-08, |
|
"logits/chosen": -0.6271109580993652, |
|
"logits/rejected": -0.6984132528305054, |
|
"logps/chosen": -474.30230712890625, |
|
"logps/rejected": -584.3538818359375, |
|
"loss": 0.4685, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6461492776870728, |
|
"rewards/margins": 1.4845248460769653, |
|
"rewards/rejected": -3.130673885345459, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.7437619961612284, |
|
"grad_norm": 10.021909817688707, |
|
"learning_rate": 9.352026595023493e-08, |
|
"logits/chosen": -0.6822315454483032, |
|
"logits/rejected": -0.6859509944915771, |
|
"logps/chosen": -464.59588623046875, |
|
"logps/rejected": -531.1356201171875, |
|
"loss": 0.4735, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.5760090351104736, |
|
"rewards/margins": 1.0710208415985107, |
|
"rewards/rejected": -2.6470298767089844, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.7461612284069098, |
|
"grad_norm": 12.68094636270811, |
|
"learning_rate": 9.189281484616004e-08, |
|
"logits/chosen": -0.6438357830047607, |
|
"logits/rejected": -0.6551543474197388, |
|
"logps/chosen": -383.98248291015625, |
|
"logps/rejected": -559.4468383789062, |
|
"loss": 0.4749, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.792620301246643, |
|
"rewards/margins": 1.217846155166626, |
|
"rewards/rejected": -3.0104660987854004, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.7485604606525912, |
|
"grad_norm": 11.028927932236359, |
|
"learning_rate": 9.027645433297249e-08, |
|
"logits/chosen": -0.5930813550949097, |
|
"logits/rejected": -0.5801911950111389, |
|
"logps/chosen": -542.3621826171875, |
|
"logps/rejected": -639.3001708984375, |
|
"loss": 0.4961, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.2047979831695557, |
|
"rewards/margins": 1.3231174945831299, |
|
"rewards/rejected": -3.5279152393341064, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.7509596928982726, |
|
"grad_norm": 13.142799913341205, |
|
"learning_rate": 8.867129779194066e-08, |
|
"logits/chosen": -0.6943696141242981, |
|
"logits/rejected": -0.7373479604721069, |
|
"logps/chosen": -370.00933837890625, |
|
"logps/rejected": -546.3580322265625, |
|
"loss": 0.4559, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5144741535186768, |
|
"rewards/margins": 1.7807083129882812, |
|
"rewards/rejected": -3.295182466506958, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.753358925143954, |
|
"grad_norm": 11.188015298374586, |
|
"learning_rate": 8.707745781841866e-08, |
|
"logits/chosen": -0.6203271150588989, |
|
"logits/rejected": -0.6621488332748413, |
|
"logps/chosen": -396.5414123535156, |
|
"logps/rejected": -583.9200439453125, |
|
"loss": 0.4851, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.708814263343811, |
|
"rewards/margins": 1.800474762916565, |
|
"rewards/rejected": -3.509288787841797, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.7557581573896354, |
|
"grad_norm": 6.520933471158698, |
|
"learning_rate": 8.549504621394831e-08, |
|
"logits/chosen": -0.7140206694602966, |
|
"logits/rejected": -0.7158041000366211, |
|
"logps/chosen": -390.3932189941406, |
|
"logps/rejected": -608.0358276367188, |
|
"loss": 0.3909, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4705148935317993, |
|
"rewards/margins": 2.1252219676971436, |
|
"rewards/rejected": -3.5957369804382324, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.7581573896353166, |
|
"grad_norm": 13.927678669681274, |
|
"learning_rate": 8.392417397841703e-08, |
|
"logits/chosen": -0.6197787523269653, |
|
"logits/rejected": -0.6519285440444946, |
|
"logps/chosen": -416.1162109375, |
|
"logps/rejected": -562.5856323242188, |
|
"loss": 0.4768, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.56044602394104, |
|
"rewards/margins": 1.3187508583068848, |
|
"rewards/rejected": -2.879196882247925, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.760556621880998, |
|
"grad_norm": 10.074795778090818, |
|
"learning_rate": 8.236495130227083e-08, |
|
"logits/chosen": -0.5864537358283997, |
|
"logits/rejected": -0.6371886730194092, |
|
"logps/chosen": -442.40313720703125, |
|
"logps/rejected": -615.7220458984375, |
|
"loss": 0.4786, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.4601584672927856, |
|
"rewards/margins": 2.044390916824341, |
|
"rewards/rejected": -3.504549503326416, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.7629558541266794, |
|
"grad_norm": 13.663011005109329, |
|
"learning_rate": 8.081748755878612e-08, |
|
"logits/chosen": -0.6179635524749756, |
|
"logits/rejected": -0.6545384526252747, |
|
"logps/chosen": -452.73382568359375, |
|
"logps/rejected": -525.165283203125, |
|
"loss": 0.4475, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.757580041885376, |
|
"rewards/margins": 1.2399779558181763, |
|
"rewards/rejected": -2.9975578784942627, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.7653550863723608, |
|
"grad_norm": 11.361503632955099, |
|
"learning_rate": 7.928189129639632e-08, |
|
"logits/chosen": -0.5514404773712158, |
|
"logits/rejected": -0.5337514281272888, |
|
"logps/chosen": -404.6654052734375, |
|
"logps/rejected": -557.6652221679688, |
|
"loss": 0.4311, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.5441641807556152, |
|
"rewards/margins": 1.4246327877044678, |
|
"rewards/rejected": -2.968797206878662, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.7677543186180422, |
|
"grad_norm": 14.862474133401097, |
|
"learning_rate": 7.775827023107834e-08, |
|
"logits/chosen": -0.6156660318374634, |
|
"logits/rejected": -0.643264889717102, |
|
"logps/chosen": -428.32989501953125, |
|
"logps/rejected": -573.34130859375, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.7375695705413818, |
|
"rewards/margins": 1.2411071062088013, |
|
"rewards/rejected": -2.9786763191223145, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.7701535508637236, |
|
"grad_norm": 11.951225803984062, |
|
"learning_rate": 7.624673123879682e-08, |
|
"logits/chosen": -0.6403513550758362, |
|
"logits/rejected": -0.6913474798202515, |
|
"logps/chosen": -411.776611328125, |
|
"logps/rejected": -524.7989501953125, |
|
"loss": 0.4563, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.625312089920044, |
|
"rewards/margins": 1.2939417362213135, |
|
"rewards/rejected": -2.9192535877227783, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.772552783109405, |
|
"grad_norm": 9.593855988626308, |
|
"learning_rate": 7.474738034800663e-08, |
|
"logits/chosen": -0.7377493381500244, |
|
"logits/rejected": -0.728441596031189, |
|
"logps/chosen": -364.731201171875, |
|
"logps/rejected": -579.6549682617188, |
|
"loss": 0.4688, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6445392370224, |
|
"rewards/margins": 2.2165136337280273, |
|
"rewards/rejected": -3.861053466796875, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.7749520153550864, |
|
"grad_norm": 12.640503406225628, |
|
"learning_rate": 7.326032273221606e-08, |
|
"logits/chosen": -0.6615322828292847, |
|
"logits/rejected": -0.6496458053588867, |
|
"logps/chosen": -469.51007080078125, |
|
"logps/rejected": -604.8178100585938, |
|
"loss": 0.4379, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.678299903869629, |
|
"rewards/margins": 1.5208964347839355, |
|
"rewards/rejected": -3.1991963386535645, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.7773512476007678, |
|
"grad_norm": 13.937512065135813, |
|
"learning_rate": 7.178566270260872e-08, |
|
"logits/chosen": -0.6518770456314087, |
|
"logits/rejected": -0.6966899633407593, |
|
"logps/chosen": -453.48797607421875, |
|
"logps/rejected": -625.8817138671875, |
|
"loss": 0.4913, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9149278402328491, |
|
"rewards/margins": 1.3741127252578735, |
|
"rewards/rejected": -3.2890403270721436, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.7797504798464492, |
|
"grad_norm": 11.229901391937847, |
|
"learning_rate": 7.032350370072709e-08, |
|
"logits/chosen": -0.5851765871047974, |
|
"logits/rejected": -0.6159471273422241, |
|
"logps/chosen": -437.6891174316406, |
|
"logps/rejected": -598.682861328125, |
|
"loss": 0.4339, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.56214439868927, |
|
"rewards/margins": 1.7052253484725952, |
|
"rewards/rejected": -3.2673697471618652, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.7821497120921305, |
|
"grad_norm": 11.146263352042403, |
|
"learning_rate": 6.887394829121596e-08, |
|
"logits/chosen": -0.6397983431816101, |
|
"logits/rejected": -0.7075640559196472, |
|
"logps/chosen": -459.11553955078125, |
|
"logps/rejected": -686.8187255859375, |
|
"loss": 0.4385, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8990970849990845, |
|
"rewards/margins": 2.3527369499206543, |
|
"rewards/rejected": -4.251833915710449, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.7845489443378119, |
|
"grad_norm": 11.067281012027754, |
|
"learning_rate": 6.743709815462833e-08, |
|
"logits/chosen": -0.702612042427063, |
|
"logits/rejected": -0.7422297596931458, |
|
"logps/chosen": -440.0489196777344, |
|
"logps/rejected": -574.8292846679688, |
|
"loss": 0.4316, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7014598846435547, |
|
"rewards/margins": 1.6431381702423096, |
|
"rewards/rejected": -3.3445980548858643, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.7869481765834933, |
|
"grad_norm": 9.830989747261166, |
|
"learning_rate": 6.601305408029287e-08, |
|
"logits/chosen": -0.5348180532455444, |
|
"logits/rejected": -0.5696184039115906, |
|
"logps/chosen": -442.927490234375, |
|
"logps/rejected": -583.0240478515625, |
|
"loss": 0.4608, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.906508445739746, |
|
"rewards/margins": 1.4029037952423096, |
|
"rewards/rejected": -3.3094124794006348, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.7893474088291746, |
|
"grad_norm": 12.175693604464533, |
|
"learning_rate": 6.460191595924366e-08, |
|
"logits/chosen": -0.5926901698112488, |
|
"logits/rejected": -0.6048527956008911, |
|
"logps/chosen": -458.3211975097656, |
|
"logps/rejected": -586.0518798828125, |
|
"loss": 0.4435, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.8188337087631226, |
|
"rewards/margins": 1.2453868389129639, |
|
"rewards/rejected": -3.064220905303955, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.791746641074856, |
|
"grad_norm": 11.750711543669468, |
|
"learning_rate": 6.320378277721342e-08, |
|
"logits/chosen": -0.6148731112480164, |
|
"logits/rejected": -0.6050759553909302, |
|
"logps/chosen": -457.68878173828125, |
|
"logps/rejected": -555.8333740234375, |
|
"loss": 0.4667, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.960931420326233, |
|
"rewards/margins": 1.0489509105682373, |
|
"rewards/rejected": -3.0098819732666016, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.7941458733205374, |
|
"grad_norm": 13.919290209857113, |
|
"learning_rate": 6.181875260769032e-08, |
|
"logits/chosen": -0.6457343101501465, |
|
"logits/rejected": -0.6977934241294861, |
|
"logps/chosen": -435.4483947753906, |
|
"logps/rejected": -547.3876953125, |
|
"loss": 0.4771, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3289425373077393, |
|
"rewards/margins": 1.8005937337875366, |
|
"rewards/rejected": -3.1295368671417236, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.7965451055662188, |
|
"grad_norm": 12.774155044726525, |
|
"learning_rate": 6.044692260503797e-08, |
|
"logits/chosen": -0.5455335378646851, |
|
"logits/rejected": -0.569166362285614, |
|
"logps/chosen": -488.89923095703125, |
|
"logps/rejected": -631.3294677734375, |
|
"loss": 0.4144, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.8063256740570068, |
|
"rewards/margins": 1.6628071069717407, |
|
"rewards/rejected": -3.469132661819458, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.7989443378119002, |
|
"grad_norm": 11.394655126013888, |
|
"learning_rate": 5.9088388997680984e-08, |
|
"logits/chosen": -0.5993139147758484, |
|
"logits/rejected": -0.6322329044342041, |
|
"logps/chosen": -519.2103271484375, |
|
"logps/rejected": -596.4190063476562, |
|
"loss": 0.4286, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7323102951049805, |
|
"rewards/margins": 1.5095983743667603, |
|
"rewards/rejected": -3.241908550262451, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.8013435700575816, |
|
"grad_norm": 12.200878623728958, |
|
"learning_rate": 5.774324708135439e-08, |
|
"logits/chosen": -0.6741082668304443, |
|
"logits/rejected": -0.7074322700500488, |
|
"logps/chosen": -373.7408447265625, |
|
"logps/rejected": -486.2967224121094, |
|
"loss": 0.4564, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.4916250705718994, |
|
"rewards/margins": 1.3450305461883545, |
|
"rewards/rejected": -2.836656093597412, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.803742802303263, |
|
"grad_norm": 10.453220701732908, |
|
"learning_rate": 5.641159121241953e-08, |
|
"logits/chosen": -0.651732325553894, |
|
"logits/rejected": -0.6395163536071777, |
|
"logps/chosen": -382.8707580566406, |
|
"logps/rejected": -592.2684936523438, |
|
"loss": 0.4593, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6059911251068115, |
|
"rewards/margins": 1.7453718185424805, |
|
"rewards/rejected": -3.351362943649292, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.8061420345489443, |
|
"grad_norm": 11.15644450768947, |
|
"learning_rate": 5.5093514801245106e-08, |
|
"logits/chosen": -0.58311527967453, |
|
"logits/rejected": -0.6077650189399719, |
|
"logps/chosen": -422.38287353515625, |
|
"logps/rejected": -605.2293701171875, |
|
"loss": 0.4403, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6799404621124268, |
|
"rewards/margins": 1.5105533599853516, |
|
"rewards/rejected": -3.1904940605163574, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.8085412667946257, |
|
"grad_norm": 13.646112951195525, |
|
"learning_rate": 5.378911030565453e-08, |
|
"logits/chosen": -0.520195722579956, |
|
"logits/rejected": -0.5245386362075806, |
|
"logps/chosen": -505.70819091796875, |
|
"logps/rejected": -675.5585327148438, |
|
"loss": 0.4727, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.0382139682769775, |
|
"rewards/margins": 1.3585295677185059, |
|
"rewards/rejected": -3.3967432975769043, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.8109404990403071, |
|
"grad_norm": 10.108220217158234, |
|
"learning_rate": 5.249846922444101e-08, |
|
"logits/chosen": -0.6458074450492859, |
|
"logits/rejected": -0.7096244096755981, |
|
"logps/chosen": -390.3080139160156, |
|
"logps/rejected": -658.1427001953125, |
|
"loss": 0.4316, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.7075704336166382, |
|
"rewards/margins": 2.7826123237609863, |
|
"rewards/rejected": -4.490181922912598, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.8133397312859885, |
|
"grad_norm": 12.66986825653512, |
|
"learning_rate": 5.122168209094865e-08, |
|
"logits/chosen": -0.5679661631584167, |
|
"logits/rejected": -0.5969215631484985, |
|
"logps/chosen": -402.6626892089844, |
|
"logps/rejected": -498.47479248046875, |
|
"loss": 0.4469, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8141701221466064, |
|
"rewards/margins": 0.9520853757858276, |
|
"rewards/rejected": -2.7662553787231445, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.8157389635316699, |
|
"grad_norm": 9.834215615688265, |
|
"learning_rate": 4.995883846672222e-08, |
|
"logits/chosen": -0.5988723039627075, |
|
"logits/rejected": -0.6316601037979126, |
|
"logps/chosen": -566.8046875, |
|
"logps/rejected": -627.3985595703125, |
|
"loss": 0.4445, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.795799970626831, |
|
"rewards/margins": 1.456854224205017, |
|
"rewards/rejected": -3.2526543140411377, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8181381957773513, |
|
"grad_norm": 11.502139473474164, |
|
"learning_rate": 4.871002693522486e-08, |
|
"logits/chosen": -0.5939972996711731, |
|
"logits/rejected": -0.5957666635513306, |
|
"logps/chosen": -431.6761169433594, |
|
"logps/rejected": -533.3076171875, |
|
"loss": 0.471, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.6297683715820312, |
|
"rewards/margins": 1.2114887237548828, |
|
"rewards/rejected": -2.841256856918335, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.8205374280230326, |
|
"grad_norm": 9.038305230217524, |
|
"learning_rate": 4.7475335095623956e-08, |
|
"logits/chosen": -0.598876416683197, |
|
"logits/rejected": -0.6024787425994873, |
|
"logps/chosen": -451.1143493652344, |
|
"logps/rejected": -610.4466552734375, |
|
"loss": 0.4555, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8450918197631836, |
|
"rewards/margins": 1.7127292156219482, |
|
"rewards/rejected": -3.5578207969665527, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.822936660268714, |
|
"grad_norm": 17.498105828738847, |
|
"learning_rate": 4.6254849556646714e-08, |
|
"logits/chosen": -0.5433209538459778, |
|
"logits/rejected": -0.5503520965576172, |
|
"logps/chosen": -476.6543884277344, |
|
"logps/rejected": -635.9801635742188, |
|
"loss": 0.4553, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6800941228866577, |
|
"rewards/margins": 1.8871549367904663, |
|
"rewards/rejected": -3.567249298095703, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.8253358925143954, |
|
"grad_norm": 12.97369382211523, |
|
"learning_rate": 4.504865593050483e-08, |
|
"logits/chosen": -0.5857795476913452, |
|
"logits/rejected": -0.6014319658279419, |
|
"logps/chosen": -460.88348388671875, |
|
"logps/rejected": -596.8003540039062, |
|
"loss": 0.4711, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.8348299264907837, |
|
"rewards/margins": 1.2898591756820679, |
|
"rewards/rejected": -3.1246893405914307, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.8277351247600768, |
|
"grad_norm": 12.980103635170739, |
|
"learning_rate": 4.385683882688895e-08, |
|
"logits/chosen": -0.5943895578384399, |
|
"logits/rejected": -0.6202970743179321, |
|
"logps/chosen": -484.2207946777344, |
|
"logps/rejected": -530.2681884765625, |
|
"loss": 0.5219, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8965202569961548, |
|
"rewards/margins": 1.0006572008132935, |
|
"rewards/rejected": -2.897177219390869, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.8301343570057581, |
|
"grad_norm": 13.02526735064428, |
|
"learning_rate": 4.2679481847033985e-08, |
|
"logits/chosen": -0.6043378114700317, |
|
"logits/rejected": -0.6147600412368774, |
|
"logps/chosen": -457.24072265625, |
|
"logps/rejected": -620.4854125976562, |
|
"loss": 0.4657, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.8802976608276367, |
|
"rewards/margins": 1.4927122592926025, |
|
"rewards/rejected": -3.3730101585388184, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.8325335892514395, |
|
"grad_norm": 10.240226182122116, |
|
"learning_rate": 4.151666757785435e-08, |
|
"logits/chosen": -0.6241481304168701, |
|
"logits/rejected": -0.6314017176628113, |
|
"logps/chosen": -401.0560302734375, |
|
"logps/rejected": -634.5782470703125, |
|
"loss": 0.4354, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.427109956741333, |
|
"rewards/margins": 2.2766079902648926, |
|
"rewards/rejected": -3.7037181854248047, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.8349328214971209, |
|
"grad_norm": 12.450431309081564, |
|
"learning_rate": 4.036847758615136e-08, |
|
"logits/chosen": -0.5069397687911987, |
|
"logits/rejected": -0.5848828554153442, |
|
"logps/chosen": -474.01043701171875, |
|
"logps/rejected": -627.2423706054688, |
|
"loss": 0.4704, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.224709987640381, |
|
"rewards/margins": 1.437635898590088, |
|
"rewards/rejected": -3.6623454093933105, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.8373320537428023, |
|
"grad_norm": 10.138959991207273, |
|
"learning_rate": 3.923499241289113e-08, |
|
"logits/chosen": -0.6470298171043396, |
|
"logits/rejected": -0.6826261878013611, |
|
"logps/chosen": -513.265869140625, |
|
"logps/rejected": -608.2982177734375, |
|
"loss": 0.4976, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9213802814483643, |
|
"rewards/margins": 1.5853230953216553, |
|
"rewards/rejected": -3.5067031383514404, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.8397312859884837, |
|
"grad_norm": 7.737508793759033, |
|
"learning_rate": 3.811629156755541e-08, |
|
"logits/chosen": -0.5882548093795776, |
|
"logits/rejected": -0.6023901104927063, |
|
"logps/chosen": -484.13787841796875, |
|
"logps/rejected": -623.9624633789062, |
|
"loss": 0.4558, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8149983882904053, |
|
"rewards/margins": 1.4490883350372314, |
|
"rewards/rejected": -3.2640864849090576, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.8421305182341651, |
|
"grad_norm": 9.568696750818255, |
|
"learning_rate": 3.701245352256391e-08, |
|
"logits/chosen": -0.5902693867683411, |
|
"logits/rejected": -0.6269119381904602, |
|
"logps/chosen": -477.4012756347656, |
|
"logps/rejected": -554.7974853515625, |
|
"loss": 0.4444, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.74009108543396, |
|
"rewards/margins": 1.0817813873291016, |
|
"rewards/rejected": -2.8218724727630615, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.8445297504798465, |
|
"grad_norm": 12.00695207332559, |
|
"learning_rate": 3.592355570776984e-08, |
|
"logits/chosen": -0.6704959273338318, |
|
"logits/rejected": -0.6980074644088745, |
|
"logps/chosen": -379.5652770996094, |
|
"logps/rejected": -551.5906372070312, |
|
"loss": 0.4273, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.503880500793457, |
|
"rewards/margins": 1.5545501708984375, |
|
"rewards/rejected": -3.0584301948547363, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.8469289827255279, |
|
"grad_norm": 9.257056695952906, |
|
"learning_rate": 3.484967450502904e-08, |
|
"logits/chosen": -0.583393394947052, |
|
"logits/rejected": -0.626370370388031, |
|
"logps/chosen": -371.80194091796875, |
|
"logps/rejected": -579.4976806640625, |
|
"loss": 0.4622, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.546886920928955, |
|
"rewards/margins": 1.578477382659912, |
|
"rewards/rejected": -3.1253647804260254, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.8493282149712092, |
|
"grad_norm": 14.494964173795129, |
|
"learning_rate": 3.3790885242841296e-08, |
|
"logits/chosen": -0.610164999961853, |
|
"logits/rejected": -0.6447314023971558, |
|
"logps/chosen": -432.84210205078125, |
|
"logps/rejected": -672.5138549804688, |
|
"loss": 0.419, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7256730794906616, |
|
"rewards/margins": 2.3827433586120605, |
|
"rewards/rejected": -4.1084160804748535, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.8517274472168906, |
|
"grad_norm": 10.946709475218693, |
|
"learning_rate": 3.274726219106677e-08, |
|
"logits/chosen": -0.6309023499488831, |
|
"logits/rejected": -0.6657734513282776, |
|
"logps/chosen": -481.0071716308594, |
|
"logps/rejected": -643.405029296875, |
|
"loss": 0.4691, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7343776226043701, |
|
"rewards/margins": 1.648224115371704, |
|
"rewards/rejected": -3.3826019763946533, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.8541266794625719, |
|
"grad_norm": 10.689900945283165, |
|
"learning_rate": 3.171887855571642e-08, |
|
"logits/chosen": -0.618683934211731, |
|
"logits/rejected": -0.5845375657081604, |
|
"logps/chosen": -405.7684631347656, |
|
"logps/rejected": -498.9336853027344, |
|
"loss": 0.4636, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7307859659194946, |
|
"rewards/margins": 0.9967991709709167, |
|
"rewards/rejected": -2.7275853157043457, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.8565259117082533, |
|
"grad_norm": 12.01909548635221, |
|
"learning_rate": 3.070580647381643e-08, |
|
"logits/chosen": -0.5815375447273254, |
|
"logits/rejected": -0.62933748960495, |
|
"logps/chosen": -413.2583923339844, |
|
"logps/rejected": -569.9872436523438, |
|
"loss": 0.4664, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.719911813735962, |
|
"rewards/margins": 1.5588918924331665, |
|
"rewards/rejected": -3.278803586959839, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.8589251439539347, |
|
"grad_norm": 11.866805408924924, |
|
"learning_rate": 2.9708117008348576e-08, |
|
"logits/chosen": -0.57561856508255, |
|
"logits/rejected": -0.6152477264404297, |
|
"logps/chosen": -487.6710510253906, |
|
"logps/rejected": -560.8670043945312, |
|
"loss": 0.4308, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7181422710418701, |
|
"rewards/margins": 1.274820327758789, |
|
"rewards/rejected": -2.992962598800659, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.8613243761996161, |
|
"grad_norm": 11.368661908364057, |
|
"learning_rate": 2.8725880143264992e-08, |
|
"logits/chosen": -0.6161478757858276, |
|
"logits/rejected": -0.621782660484314, |
|
"logps/chosen": -455.6564025878906, |
|
"logps/rejected": -615.894775390625, |
|
"loss": 0.5076, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.038883686065674, |
|
"rewards/margins": 1.1252130270004272, |
|
"rewards/rejected": -3.1640963554382324, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.8637236084452975, |
|
"grad_norm": 16.14783859072051, |
|
"learning_rate": 2.775916477857948e-08, |
|
"logits/chosen": -0.6006742715835571, |
|
"logits/rejected": -0.6039419770240784, |
|
"logps/chosen": -416.3548278808594, |
|
"logps/rejected": -545.7139892578125, |
|
"loss": 0.4477, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8900909423828125, |
|
"rewards/margins": 1.278205394744873, |
|
"rewards/rejected": -3.1682963371276855, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.8661228406909789, |
|
"grad_norm": 11.990331851376549, |
|
"learning_rate": 2.680803872553408e-08, |
|
"logits/chosen": -0.624252200126648, |
|
"logits/rejected": -0.7020074725151062, |
|
"logps/chosen": -399.41583251953125, |
|
"logps/rejected": -661.79638671875, |
|
"loss": 0.4534, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.495900273323059, |
|
"rewards/margins": 2.714350461959839, |
|
"rewards/rejected": -4.210250377655029, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.8685220729366603, |
|
"grad_norm": 14.100883523011712, |
|
"learning_rate": 2.5872568701842706e-08, |
|
"logits/chosen": -0.58869868516922, |
|
"logits/rejected": -0.6457717418670654, |
|
"logps/chosen": -384.4469299316406, |
|
"logps/rejected": -560.6753540039062, |
|
"loss": 0.5292, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.7529369592666626, |
|
"rewards/margins": 1.4963237047195435, |
|
"rewards/rejected": -3.249260425567627, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.8709213051823417, |
|
"grad_norm": 14.528120128162028, |
|
"learning_rate": 2.495282032701096e-08, |
|
"logits/chosen": -0.6547173857688904, |
|
"logits/rejected": -0.7049331068992615, |
|
"logps/chosen": -343.5353698730469, |
|
"logps/rejected": -484.39617919921875, |
|
"loss": 0.4662, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.712456464767456, |
|
"rewards/margins": 1.597670316696167, |
|
"rewards/rejected": -3.310126781463623, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.8733205374280231, |
|
"grad_norm": 14.220779667409527, |
|
"learning_rate": 2.4048858117733133e-08, |
|
"logits/chosen": -0.6767258644104004, |
|
"logits/rejected": -0.7082260847091675, |
|
"logps/chosen": -444.30328369140625, |
|
"logps/rejected": -614.4713745117188, |
|
"loss": 0.4496, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7594902515411377, |
|
"rewards/margins": 2.157029628753662, |
|
"rewards/rejected": -3.9165198802948, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.8757197696737045, |
|
"grad_norm": 11.643290676232011, |
|
"learning_rate": 2.3160745483366938e-08, |
|
"logits/chosen": -0.6050413846969604, |
|
"logits/rejected": -0.6035085916519165, |
|
"logps/chosen": -438.84942626953125, |
|
"logps/rejected": -599.5989990234375, |
|
"loss": 0.4611, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.013329029083252, |
|
"rewards/margins": 1.2097079753875732, |
|
"rewards/rejected": -3.223036527633667, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.8781190019193857, |
|
"grad_norm": 12.836965077883955, |
|
"learning_rate": 2.2288544721485197e-08, |
|
"logits/chosen": -0.7066579461097717, |
|
"logits/rejected": -0.7247270941734314, |
|
"logps/chosen": -371.9989929199219, |
|
"logps/rejected": -582.3410034179688, |
|
"loss": 0.4348, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5276857614517212, |
|
"rewards/margins": 1.909120798110962, |
|
"rewards/rejected": -3.4368062019348145, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.8805182341650671, |
|
"grad_norm": 11.52899292103731, |
|
"learning_rate": 2.1432317013506117e-08, |
|
"logits/chosen": -0.7156012654304504, |
|
"logits/rejected": -0.7498332262039185, |
|
"logps/chosen": -447.65216064453125, |
|
"logps/rejected": -560.2888793945312, |
|
"loss": 0.5002, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.9097896814346313, |
|
"rewards/margins": 1.5398523807525635, |
|
"rewards/rejected": -3.449641704559326, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.8829174664107485, |
|
"grad_norm": 13.634086015337521, |
|
"learning_rate": 2.0592122420401704e-08, |
|
"logits/chosen": -0.5250085592269897, |
|
"logits/rejected": -0.57183438539505, |
|
"logps/chosen": -402.1532287597656, |
|
"logps/rejected": -514.146484375, |
|
"loss": 0.4715, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7628848552703857, |
|
"rewards/margins": 0.9792253375053406, |
|
"rewards/rejected": -2.7421107292175293, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.8853166986564299, |
|
"grad_norm": 10.931227876480555, |
|
"learning_rate": 1.976801987848459e-08, |
|
"logits/chosen": -0.6632574796676636, |
|
"logits/rejected": -0.6789246201515198, |
|
"logps/chosen": -438.82598876953125, |
|
"logps/rejected": -618.0784912109375, |
|
"loss": 0.4579, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6889044046401978, |
|
"rewards/margins": 1.663873314857483, |
|
"rewards/rejected": -3.3527779579162598, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.8877159309021113, |
|
"grad_norm": 12.88089932313707, |
|
"learning_rate": 1.8960067195273987e-08, |
|
"logits/chosen": -0.6466517448425293, |
|
"logits/rejected": -0.6885952949523926, |
|
"logps/chosen": -386.842529296875, |
|
"logps/rejected": -563.5064697265625, |
|
"loss": 0.4368, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.6945703029632568, |
|
"rewards/margins": 1.814061164855957, |
|
"rewards/rejected": -3.508631467819214, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.8901151631477927, |
|
"grad_norm": 11.33604603257016, |
|
"learning_rate": 1.816832104544072e-08, |
|
"logits/chosen": -0.5263174772262573, |
|
"logits/rejected": -0.5546278953552246, |
|
"logps/chosen": -467.58062744140625, |
|
"logps/rejected": -575.5345458984375, |
|
"loss": 0.4632, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8927171230316162, |
|
"rewards/margins": 1.3500896692276, |
|
"rewards/rejected": -3.2428061962127686, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.8925143953934741, |
|
"grad_norm": 10.620251504859734, |
|
"learning_rate": 1.7392836966831553e-08, |
|
"logits/chosen": -0.5253115892410278, |
|
"logits/rejected": -0.5563468933105469, |
|
"logps/chosen": -434.1669921875, |
|
"logps/rejected": -624.7203369140625, |
|
"loss": 0.4168, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.62359619140625, |
|
"rewards/margins": 2.1972010135650635, |
|
"rewards/rejected": -3.8207976818084717, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.8949136276391555, |
|
"grad_norm": 13.275739947749992, |
|
"learning_rate": 1.663366935657373e-08, |
|
"logits/chosen": -0.6284958124160767, |
|
"logits/rejected": -0.6665322780609131, |
|
"logps/chosen": -392.99346923828125, |
|
"logps/rejected": -562.0194091796875, |
|
"loss": 0.4876, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7423441410064697, |
|
"rewards/margins": 1.6086766719818115, |
|
"rewards/rejected": -3.3510212898254395, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.8973128598848369, |
|
"grad_norm": 15.530425079669428, |
|
"learning_rate": 1.5890871467258898e-08, |
|
"logits/chosen": -0.5325186252593994, |
|
"logits/rejected": -0.5298448204994202, |
|
"logps/chosen": -506.15118408203125, |
|
"logps/rejected": -606.4880981445312, |
|
"loss": 0.4461, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7241334915161133, |
|
"rewards/margins": 1.3581666946411133, |
|
"rewards/rejected": -3.0822999477386475, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.8997120921305183, |
|
"grad_norm": 10.19750465326913, |
|
"learning_rate": 1.5164495403207967e-08, |
|
"logits/chosen": -0.6508103609085083, |
|
"logits/rejected": -0.6848149299621582, |
|
"logps/chosen": -467.98046875, |
|
"logps/rejected": -680.7239379882812, |
|
"loss": 0.4473, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8734798431396484, |
|
"rewards/margins": 1.8285648822784424, |
|
"rewards/rejected": -3.702044725418091, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.9021113243761996, |
|
"grad_norm": 12.346658548809334, |
|
"learning_rate": 1.4454592116815962e-08, |
|
"logits/chosen": -0.5491658449172974, |
|
"logits/rejected": -0.5658199787139893, |
|
"logps/chosen": -436.49505615234375, |
|
"logps/rejected": -605.0525512695312, |
|
"loss": 0.4476, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5681920051574707, |
|
"rewards/margins": 1.4636965990066528, |
|
"rewards/rejected": -3.031888484954834, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.904510556621881, |
|
"grad_norm": 8.029053228209763, |
|
"learning_rate": 1.3761211404977934e-08, |
|
"logits/chosen": -0.6819595098495483, |
|
"logits/rejected": -0.6787452101707458, |
|
"logps/chosen": -416.7828063964844, |
|
"logps/rejected": -647.9034423828125, |
|
"loss": 0.4124, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8057578802108765, |
|
"rewards/margins": 2.299898862838745, |
|
"rewards/rejected": -4.105656623840332, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.9069097888675623, |
|
"grad_norm": 12.064654099310772, |
|
"learning_rate": 1.3084401905596177e-08, |
|
"logits/chosen": -0.6439425349235535, |
|
"logits/rejected": -0.7118849158287048, |
|
"logps/chosen": -462.96624755859375, |
|
"logps/rejected": -571.2410278320312, |
|
"loss": 0.4681, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6763248443603516, |
|
"rewards/margins": 1.4996883869171143, |
|
"rewards/rejected": -3.176013469696045, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.9093090211132437, |
|
"grad_norm": 11.710340217031446, |
|
"learning_rate": 1.2424211094168053e-08, |
|
"logits/chosen": -0.4598866403102875, |
|
"logits/rejected": -0.5065708756446838, |
|
"logps/chosen": -502.1546936035156, |
|
"logps/rejected": -631.9319458007812, |
|
"loss": 0.4355, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6645265817642212, |
|
"rewards/margins": 1.4170030355453491, |
|
"rewards/rejected": -3.0815296173095703, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.9117082533589251, |
|
"grad_norm": 10.967428634066259, |
|
"learning_rate": 1.1780685280456143e-08, |
|
"logits/chosen": -0.5946656465530396, |
|
"logits/rejected": -0.6331689953804016, |
|
"logps/chosen": -513.2894287109375, |
|
"logps/rejected": -712.6598510742188, |
|
"loss": 0.5091, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.061929225921631, |
|
"rewards/margins": 1.8402255773544312, |
|
"rewards/rejected": -3.9021544456481934, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9141074856046065, |
|
"grad_norm": 12.665118166759022, |
|
"learning_rate": 1.1153869605239564e-08, |
|
"logits/chosen": -0.5937948226928711, |
|
"logits/rejected": -0.6147378087043762, |
|
"logps/chosen": -453.622314453125, |
|
"logps/rejected": -509.7294006347656, |
|
"loss": 0.4617, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6426572799682617, |
|
"rewards/margins": 1.0369822978973389, |
|
"rewards/rejected": -2.6796395778656006, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.9165067178502879, |
|
"grad_norm": 13.103880470124455, |
|
"learning_rate": 1.0543808037147606e-08, |
|
"logits/chosen": -0.6876846551895142, |
|
"logits/rejected": -0.7068900465965271, |
|
"logps/chosen": -422.0281677246094, |
|
"logps/rejected": -667.2548828125, |
|
"loss": 0.448, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5988212823867798, |
|
"rewards/margins": 2.381234645843506, |
|
"rewards/rejected": -3.980056047439575, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.9189059500959693, |
|
"grad_norm": 8.997982934710759, |
|
"learning_rate": 9.95054336957557e-09, |
|
"logits/chosen": -0.6352418661117554, |
|
"logits/rejected": -0.6384015083312988, |
|
"logps/chosen": -425.876220703125, |
|
"logps/rejected": -567.1570434570312, |
|
"loss": 0.4062, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.594925045967102, |
|
"rewards/margins": 1.3477894067764282, |
|
"rewards/rejected": -2.9427144527435303, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.9213051823416507, |
|
"grad_norm": 11.532295976661894, |
|
"learning_rate": 9.37411721768286e-09, |
|
"logits/chosen": -0.5964576601982117, |
|
"logits/rejected": -0.6410446763038635, |
|
"logps/chosen": -464.87823486328125, |
|
"logps/rejected": -694.9117431640625, |
|
"loss": 0.4116, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9001286029815674, |
|
"rewards/margins": 1.8672752380371094, |
|
"rewards/rejected": -3.767404079437256, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.9237044145873321, |
|
"grad_norm": 11.154793279181051, |
|
"learning_rate": 8.81457001547392e-09, |
|
"logits/chosen": -0.5532232522964478, |
|
"logits/rejected": -0.537521481513977, |
|
"logps/chosen": -463.60540771484375, |
|
"logps/rejected": -587.6224365234375, |
|
"loss": 0.4493, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.8449652194976807, |
|
"rewards/margins": 1.1032222509384155, |
|
"rewards/rejected": -2.9481875896453857, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.9261036468330134, |
|
"grad_norm": 11.05082468706879, |
|
"learning_rate": 8.271941012961942e-09, |
|
"logits/chosen": -0.546120822429657, |
|
"logits/rejected": -0.5496604442596436, |
|
"logps/chosen": -394.8270568847656, |
|
"logps/rejected": -661.518310546875, |
|
"loss": 0.452, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7090051174163818, |
|
"rewards/margins": 2.0601634979248047, |
|
"rewards/rejected": -3.7691688537597656, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.9285028790786948, |
|
"grad_norm": 12.11334503986506, |
|
"learning_rate": 7.746268273415568e-09, |
|
"logits/chosen": -0.6160927414894104, |
|
"logits/rejected": -0.5806897878646851, |
|
"logps/chosen": -448.4591369628906, |
|
"logps/rejected": -560.7288818359375, |
|
"loss": 0.4697, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6977341175079346, |
|
"rewards/margins": 0.6812986135482788, |
|
"rewards/rejected": -2.379032611846924, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.9309021113243762, |
|
"grad_norm": 11.544284346542328, |
|
"learning_rate": 7.237588670689076e-09, |
|
"logits/chosen": -0.6722389459609985, |
|
"logits/rejected": -0.7193390727043152, |
|
"logps/chosen": -424.52545166015625, |
|
"logps/rejected": -611.8565063476562, |
|
"loss": 0.4297, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7004144191741943, |
|
"rewards/margins": 2.1898179054260254, |
|
"rewards/rejected": -3.890232801437378, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.9333013435700576, |
|
"grad_norm": 11.715147679206341, |
|
"learning_rate": 6.745937886635606e-09, |
|
"logits/chosen": -0.5881049633026123, |
|
"logits/rejected": -0.6139761805534363, |
|
"logps/chosen": -464.87908935546875, |
|
"logps/rejected": -680.6104125976562, |
|
"loss": 0.4339, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7303098440170288, |
|
"rewards/margins": 2.0931639671325684, |
|
"rewards/rejected": -3.8234734535217285, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.935700575815739, |
|
"grad_norm": 10.400118656620084, |
|
"learning_rate": 6.271350408604409e-09, |
|
"logits/chosen": -0.6110928058624268, |
|
"logits/rejected": -0.6207016706466675, |
|
"logps/chosen": -360.6642761230469, |
|
"logps/rejected": -565.9451904296875, |
|
"loss": 0.427, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.2418937683105469, |
|
"rewards/margins": 1.8387107849121094, |
|
"rewards/rejected": -3.0806050300598145, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.9380998080614203, |
|
"grad_norm": 10.41650137772363, |
|
"learning_rate": 5.813859527021487e-09, |
|
"logits/chosen": -0.5888563394546509, |
|
"logits/rejected": -0.6184204816818237, |
|
"logps/chosen": -425.99176025390625, |
|
"logps/rejected": -601.2418823242188, |
|
"loss": 0.4341, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6858470439910889, |
|
"rewards/margins": 2.0192112922668457, |
|
"rewards/rejected": -3.7050583362579346, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.9404990403071017, |
|
"grad_norm": 11.8325653977223, |
|
"learning_rate": 5.373497333054616e-09, |
|
"logits/chosen": -0.627325713634491, |
|
"logits/rejected": -0.6293385028839111, |
|
"logps/chosen": -477.244140625, |
|
"logps/rejected": -571.5916748046875, |
|
"loss": 0.485, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9243043661117554, |
|
"rewards/margins": 1.038652777671814, |
|
"rewards/rejected": -2.9629569053649902, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.9428982725527831, |
|
"grad_norm": 12.78783329314139, |
|
"learning_rate": 4.950294716362213e-09, |
|
"logits/chosen": -0.5876864194869995, |
|
"logits/rejected": -0.6184590458869934, |
|
"logps/chosen": -502.6764221191406, |
|
"logps/rejected": -607.631103515625, |
|
"loss": 0.4598, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.936902403831482, |
|
"rewards/margins": 1.1025243997573853, |
|
"rewards/rejected": -3.0394270420074463, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.9452975047984645, |
|
"grad_norm": 9.281745185619053, |
|
"learning_rate": 4.544281362926422e-09, |
|
"logits/chosen": -0.6346616148948669, |
|
"logits/rejected": -0.6384531855583191, |
|
"logps/chosen": -471.59912109375, |
|
"logps/rejected": -625.6812133789062, |
|
"loss": 0.4382, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.560572862625122, |
|
"rewards/margins": 1.6168543100357056, |
|
"rewards/rejected": -3.177427291870117, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.9476967370441459, |
|
"grad_norm": 11.774371944259814, |
|
"learning_rate": 4.15548575297095e-09, |
|
"logits/chosen": -0.6360484957695007, |
|
"logits/rejected": -0.6688522696495056, |
|
"logps/chosen": -415.83685302734375, |
|
"logps/rejected": -604.4151611328125, |
|
"loss": 0.4287, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7383111715316772, |
|
"rewards/margins": 1.9439513683319092, |
|
"rewards/rejected": -3.682262420654297, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.9500959692898272, |
|
"grad_norm": 9.746861342706476, |
|
"learning_rate": 3.7839351589631366e-09, |
|
"logits/chosen": -0.6292358040809631, |
|
"logits/rejected": -0.5761995911598206, |
|
"logps/chosen": -410.7013244628906, |
|
"logps/rejected": -591.5662841796875, |
|
"loss": 0.4475, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7394983768463135, |
|
"rewards/margins": 1.2296544313430786, |
|
"rewards/rejected": -2.9691526889801025, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.9524952015355086, |
|
"grad_norm": 11.778423651753915, |
|
"learning_rate": 3.4296556437010405e-09, |
|
"logits/chosen": -0.6809018850326538, |
|
"logits/rejected": -0.695138692855835, |
|
"logps/chosen": -382.27337646484375, |
|
"logps/rejected": -545.0702514648438, |
|
"loss": 0.4623, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.746303915977478, |
|
"rewards/margins": 1.5834695100784302, |
|
"rewards/rejected": -3.32977294921875, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.95489443378119, |
|
"grad_norm": 12.78112886016458, |
|
"learning_rate": 3.092672058485124e-09, |
|
"logits/chosen": -0.6508705615997314, |
|
"logits/rejected": -0.6388789415359497, |
|
"logps/chosen": -406.3719482421875, |
|
"logps/rejected": -647.2185668945312, |
|
"loss": 0.4965, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.6541764736175537, |
|
"rewards/margins": 2.194026231765747, |
|
"rewards/rejected": -3.8482024669647217, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.9572936660268714, |
|
"grad_norm": 11.56058546078559, |
|
"learning_rate": 2.7730080413750356e-09, |
|
"logits/chosen": -0.5227060914039612, |
|
"logits/rejected": -0.5568557977676392, |
|
"logps/chosen": -435.97833251953125, |
|
"logps/rejected": -572.4579467773438, |
|
"loss": 0.4687, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5814177989959717, |
|
"rewards/margins": 1.316449522972107, |
|
"rewards/rejected": -2.897867441177368, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.9596928982725528, |
|
"grad_norm": 10.601860302681265, |
|
"learning_rate": 2.4706860155316033e-09, |
|
"logits/chosen": -0.6122329235076904, |
|
"logits/rejected": -0.6286668181419373, |
|
"logps/chosen": -524.0120849609375, |
|
"logps/rejected": -665.0354614257812, |
|
"loss": 0.4644, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8280586004257202, |
|
"rewards/margins": 1.3872017860412598, |
|
"rewards/rejected": -3.2152607440948486, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9596928982725528, |
|
"eval_logits/chosen": -0.6084198951721191, |
|
"eval_logits/rejected": -0.626106858253479, |
|
"eval_logps/chosen": -439.457275390625, |
|
"eval_logps/rejected": -623.4827270507812, |
|
"eval_loss": 0.43927037715911865, |
|
"eval_rewards/accuracies": 0.8285714387893677, |
|
"eval_rewards/chosen": -1.7351824045181274, |
|
"eval_rewards/margins": 1.7476173639297485, |
|
"eval_rewards/rejected": -3.482799530029297, |
|
"eval_runtime": 205.925, |
|
"eval_samples_per_second": 21.663, |
|
"eval_steps_per_second": 0.34, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9620921305182342, |
|
"grad_norm": 12.00737344425498, |
|
"learning_rate": 2.185727187643843e-09, |
|
"logits/chosen": -0.6551751494407654, |
|
"logits/rejected": -0.6754254102706909, |
|
"logps/chosen": -384.97015380859375, |
|
"logps/rejected": -610.7628784179688, |
|
"loss": 0.4753, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.6510511636734009, |
|
"rewards/margins": 2.127711296081543, |
|
"rewards/rejected": -3.7787623405456543, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.9644913627639156, |
|
"grad_norm": 13.508620118248077, |
|
"learning_rate": 1.9181515464413434e-09, |
|
"logits/chosen": -0.5892629623413086, |
|
"logits/rejected": -0.6146517395973206, |
|
"logps/chosen": -518.9400024414062, |
|
"logps/rejected": -719.6813354492188, |
|
"loss": 0.4167, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.4437657594680786, |
|
"rewards/margins": 1.971710205078125, |
|
"rewards/rejected": -3.4154763221740723, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.966890595009597, |
|
"grad_norm": 10.39563083030194, |
|
"learning_rate": 1.6679778612923302e-09, |
|
"logits/chosen": -0.5608581304550171, |
|
"logits/rejected": -0.6161444187164307, |
|
"logps/chosen": -488.674560546875, |
|
"logps/rejected": -594.23681640625, |
|
"loss": 0.4207, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8151352405548096, |
|
"rewards/margins": 1.053483247756958, |
|
"rewards/rejected": -2.8686180114746094, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.9692898272552783, |
|
"grad_norm": 11.778106767916414, |
|
"learning_rate": 1.43522368088686e-09, |
|
"logits/chosen": -0.5647310018539429, |
|
"logits/rejected": -0.6243175864219666, |
|
"logps/chosen": -465.7294006347656, |
|
"logps/rejected": -713.4627685546875, |
|
"loss": 0.4842, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9448398351669312, |
|
"rewards/margins": 2.4202308654785156, |
|
"rewards/rejected": -4.365070819854736, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.9716890595009597, |
|
"grad_norm": 13.924155842753079, |
|
"learning_rate": 1.2199053320059993e-09, |
|
"logits/chosen": -0.5596794486045837, |
|
"logits/rejected": -0.5762395858764648, |
|
"logps/chosen": -459.1459045410156, |
|
"logps/rejected": -605.2967529296875, |
|
"loss": 0.4569, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7754650115966797, |
|
"rewards/margins": 1.3398131132125854, |
|
"rewards/rejected": -3.1152782440185547, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.974088291746641, |
|
"grad_norm": 10.101674481691141, |
|
"learning_rate": 1.0220379183764338e-09, |
|
"logits/chosen": -0.6857717633247375, |
|
"logits/rejected": -0.67542564868927, |
|
"logps/chosen": -366.9244384765625, |
|
"logps/rejected": -573.9992065429688, |
|
"loss": 0.4449, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5600941181182861, |
|
"rewards/margins": 1.933546781539917, |
|
"rewards/rejected": -3.493640899658203, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.9764875239923224, |
|
"grad_norm": 10.9782818923637, |
|
"learning_rate": 8.416353196111503e-10, |
|
"logits/chosen": -0.5480167269706726, |
|
"logits/rejected": -0.551671028137207, |
|
"logps/chosen": -435.30120849609375, |
|
"logps/rejected": -583.2462158203125, |
|
"loss": 0.4928, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.900551438331604, |
|
"rewards/margins": 1.5537331104278564, |
|
"rewards/rejected": -3.45428466796875, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.9788867562380038, |
|
"grad_norm": 13.709217546306082, |
|
"learning_rate": 6.787101902356873e-10, |
|
"logits/chosen": -0.5955997705459595, |
|
"logits/rejected": -0.5746399760246277, |
|
"logps/chosen": -457.44512939453125, |
|
"logps/rejected": -655.3438110351562, |
|
"loss": 0.4125, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.843754529953003, |
|
"rewards/margins": 1.7090778350830078, |
|
"rewards/rejected": -3.5528323650360107, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.9812859884836852, |
|
"grad_norm": 15.002773863211088, |
|
"learning_rate": 5.332739588005953e-10, |
|
"logits/chosen": -0.7055156826972961, |
|
"logits/rejected": -0.7228876352310181, |
|
"logps/chosen": -376.8843994140625, |
|
"logps/rejected": -592.8224487304688, |
|
"loss": 0.4463, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6472818851470947, |
|
"rewards/margins": 1.8167731761932373, |
|
"rewards/rejected": -3.464055299758911, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.9836852207293666, |
|
"grad_norm": 13.389436536453555, |
|
"learning_rate": 4.053368270797164e-10, |
|
"logits/chosen": -0.5252457857131958, |
|
"logits/rejected": -0.5572882890701294, |
|
"logps/chosen": -437.04620361328125, |
|
"logps/rejected": -581.270751953125, |
|
"loss": 0.4427, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8863855600357056, |
|
"rewards/margins": 1.4563047885894775, |
|
"rewards/rejected": -3.3426902294158936, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.986084452975048, |
|
"grad_norm": 9.112429191727882, |
|
"learning_rate": 2.949077693545354e-10, |
|
"logits/chosen": -0.49874311685562134, |
|
"logits/rejected": -0.5566374063491821, |
|
"logps/chosen": -467.4441833496094, |
|
"logps/rejected": -625.98681640625, |
|
"loss": 0.4872, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7639166116714478, |
|
"rewards/margins": 1.2108697891235352, |
|
"rewards/rejected": -2.9747862815856934, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.9884836852207294, |
|
"grad_norm": 10.18504682387471, |
|
"learning_rate": 2.0199453178471047e-10, |
|
"logits/chosen": -0.5316934585571289, |
|
"logits/rejected": -0.5874772071838379, |
|
"logps/chosen": -509.3759765625, |
|
"logps/rejected": -588.4547119140625, |
|
"loss": 0.4279, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8416917324066162, |
|
"rewards/margins": 1.1702340841293335, |
|
"rewards/rejected": -3.0119261741638184, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.9908829174664108, |
|
"grad_norm": 9.481256396419454, |
|
"learning_rate": 1.266036318647301e-10, |
|
"logits/chosen": -0.593826413154602, |
|
"logits/rejected": -0.6198239326477051, |
|
"logps/chosen": -489.977294921875, |
|
"logps/rejected": -679.3530883789062, |
|
"loss": 0.42, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.5848186016082764, |
|
"rewards/margins": 2.1017608642578125, |
|
"rewards/rejected": -3.686579465866089, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.9932821497120922, |
|
"grad_norm": 14.774926148490673, |
|
"learning_rate": 6.874035796672339e-11, |
|
"logits/chosen": -0.6392898559570312, |
|
"logits/rejected": -0.65348219871521, |
|
"logps/chosen": -449.4820251464844, |
|
"logps/rejected": -634.1976318359375, |
|
"loss": 0.4374, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.4988961219787598, |
|
"rewards/margins": 2.359536647796631, |
|
"rewards/rejected": -3.858433246612549, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.9956813819577736, |
|
"grad_norm": 13.569592726410221, |
|
"learning_rate": 2.8408768969423458e-11, |
|
"logits/chosen": -0.6521973609924316, |
|
"logits/rejected": -0.6605287194252014, |
|
"logps/chosen": -451.91217041015625, |
|
"logps/rejected": -617.4226684570312, |
|
"loss": 0.446, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6109716892242432, |
|
"rewards/margins": 1.5008330345153809, |
|
"rewards/rejected": -3.111804485321045, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.9980806142034548, |
|
"grad_norm": 14.763770162561924, |
|
"learning_rate": 5.611693973617271e-12, |
|
"logits/chosen": -0.5573083162307739, |
|
"logits/rejected": -0.5667176246643066, |
|
"logps/chosen": -397.83111572265625, |
|
"logps/rejected": -570.945068359375, |
|
"loss": 0.465, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6926639080047607, |
|
"rewards/margins": 1.5600066184997559, |
|
"rewards/rejected": -3.2526707649230957, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 4168, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5098277106738136, |
|
"train_runtime": 16630.655, |
|
"train_samples_per_second": 8.019, |
|
"train_steps_per_second": 0.251 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4168, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 5000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|