|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 2000, |
|
"global_step": 1911, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005232862375719519, |
|
"grad_norm": 8.940283023609332, |
|
"learning_rate": 2.6041666666666664e-09, |
|
"logits/chosen": -3.4411821365356445, |
|
"logits/rejected": -3.41083025932312, |
|
"logps/chosen": -501.4610595703125, |
|
"logps/rejected": -596.95849609375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0052328623757195184, |
|
"grad_norm": 8.689551063328278, |
|
"learning_rate": 2.6041666666666667e-08, |
|
"logits/chosen": -3.0671932697296143, |
|
"logits/rejected": -3.0745370388031006, |
|
"logps/chosen": -335.75750732421875, |
|
"logps/rejected": -280.19635009765625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4166666567325592, |
|
"rewards/chosen": -0.000958989083301276, |
|
"rewards/margins": -0.0004560473607853055, |
|
"rewards/rejected": -0.0005029416061006486, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010465724751439037, |
|
"grad_norm": 8.212994986770337, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -3.061262845993042, |
|
"logits/rejected": -3.061492443084717, |
|
"logps/chosen": -226.217529296875, |
|
"logps/rejected": -215.25961303710938, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.00020114154904149473, |
|
"rewards/margins": -0.0007163770496845245, |
|
"rewards/rejected": 0.0005152354133315384, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.015698587127158554, |
|
"grad_norm": 8.003664804799678, |
|
"learning_rate": 7.812499999999999e-08, |
|
"logits/chosen": -2.958186388015747, |
|
"logits/rejected": -2.946017026901245, |
|
"logps/chosen": -300.97979736328125, |
|
"logps/rejected": -276.68634033203125, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0008273817365989089, |
|
"rewards/margins": 0.001302235876210034, |
|
"rewards/rejected": -0.00047485390678048134, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.020931449502878074, |
|
"grad_norm": 8.325071471418028, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -3.1442675590515137, |
|
"logits/rejected": -3.0619583129882812, |
|
"logps/chosen": -316.81396484375, |
|
"logps/rejected": -308.1053771972656, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0002774396270979196, |
|
"rewards/margins": -0.0006835688254795969, |
|
"rewards/rejected": 0.000406129052862525, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.026164311878597593, |
|
"grad_norm": 8.866152231216553, |
|
"learning_rate": 1.3020833333333334e-07, |
|
"logits/chosen": -3.1651201248168945, |
|
"logits/rejected": -3.0696120262145996, |
|
"logps/chosen": -297.9878845214844, |
|
"logps/rejected": -271.53094482421875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.00013881332415621728, |
|
"rewards/margins": -0.0007885316153988242, |
|
"rewards/rejected": 0.0006497182184830308, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03139717425431711, |
|
"grad_norm": 8.702138127987917, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -3.0590338706970215, |
|
"logits/rejected": -3.033952236175537, |
|
"logps/chosen": -271.07489013671875, |
|
"logps/rejected": -266.8861389160156, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.0012097412254661322, |
|
"rewards/margins": -0.0009210550342686474, |
|
"rewards/rejected": -0.00028868610388599336, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03663003663003663, |
|
"grad_norm": 9.17847945114856, |
|
"learning_rate": 1.8229166666666666e-07, |
|
"logits/chosen": -3.159991979598999, |
|
"logits/rejected": -3.108083963394165, |
|
"logps/chosen": -331.7613830566406, |
|
"logps/rejected": -266.53192138671875, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0022141693625599146, |
|
"rewards/margins": 0.004044364206492901, |
|
"rewards/rejected": -0.0018301953095942736, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04186289900575615, |
|
"grad_norm": 8.70085934375087, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -3.105524778366089, |
|
"logits/rejected": -3.0857417583465576, |
|
"logps/chosen": -309.269287109375, |
|
"logps/rejected": -304.593505859375, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0005593494279310107, |
|
"rewards/margins": 0.0006081314058974385, |
|
"rewards/rejected": -0.0011674808338284492, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04709576138147567, |
|
"grad_norm": 7.70370023214065, |
|
"learning_rate": 2.3437499999999998e-07, |
|
"logits/chosen": -3.1123859882354736, |
|
"logits/rejected": -3.0873000621795654, |
|
"logps/chosen": -244.9102783203125, |
|
"logps/rejected": -213.2863311767578, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.003876964095979929, |
|
"rewards/margins": 0.003002858255058527, |
|
"rewards/rejected": 0.0008741060155443847, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.052328623757195186, |
|
"grad_norm": 7.584430634088357, |
|
"learning_rate": 2.604166666666667e-07, |
|
"logits/chosen": -3.119906425476074, |
|
"logits/rejected": -3.0290744304656982, |
|
"logps/chosen": -222.6891326904297, |
|
"logps/rejected": -199.02757263183594, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0010819355957210064, |
|
"rewards/margins": 0.0032298602163791656, |
|
"rewards/rejected": -0.0021479243878275156, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0575614861329147, |
|
"grad_norm": 8.205907431125933, |
|
"learning_rate": 2.864583333333333e-07, |
|
"logits/chosen": -3.0188896656036377, |
|
"logits/rejected": -2.9393975734710693, |
|
"logps/chosen": -261.2885437011719, |
|
"logps/rejected": -197.14413452148438, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.00237687723711133, |
|
"rewards/margins": 0.006252645049244165, |
|
"rewards/rejected": -0.003875765949487686, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06279434850863422, |
|
"grad_norm": 8.123874667175091, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -3.150888681411743, |
|
"logits/rejected": -3.160604953765869, |
|
"logps/chosen": -369.46343994140625, |
|
"logps/rejected": -341.2452392578125, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.002890329109504819, |
|
"rewards/margins": 0.0010580271482467651, |
|
"rewards/rejected": 0.0018323017284274101, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06802721088435375, |
|
"grad_norm": 7.876000482657188, |
|
"learning_rate": 3.3854166666666667e-07, |
|
"logits/chosen": -3.0063118934631348, |
|
"logits/rejected": -3.01861834526062, |
|
"logps/chosen": -227.5029296875, |
|
"logps/rejected": -238.10140991210938, |
|
"loss": 0.6884, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.004924282897263765, |
|
"rewards/margins": 0.012934369035065174, |
|
"rewards/rejected": -0.008010086603462696, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07326007326007326, |
|
"grad_norm": 8.190955614521185, |
|
"learning_rate": 3.645833333333333e-07, |
|
"logits/chosen": -2.970432996749878, |
|
"logits/rejected": -2.9969277381896973, |
|
"logps/chosen": -298.6478576660156, |
|
"logps/rejected": -310.3978271484375, |
|
"loss": 0.6858, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.009556067176163197, |
|
"rewards/margins": 0.02303231693804264, |
|
"rewards/rejected": -0.013476249761879444, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07849293563579278, |
|
"grad_norm": 9.052012593627612, |
|
"learning_rate": 3.9062499999999997e-07, |
|
"logits/chosen": -3.103742837905884, |
|
"logits/rejected": -3.0245158672332764, |
|
"logps/chosen": -263.4891662597656, |
|
"logps/rejected": -248.58151245117188, |
|
"loss": 0.6844, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.002595582278445363, |
|
"rewards/margins": 0.0049351779744029045, |
|
"rewards/rejected": -0.007530760020017624, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0837257980115123, |
|
"grad_norm": 8.432630883076405, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -3.114664077758789, |
|
"logits/rejected": -3.012159824371338, |
|
"logps/chosen": -262.4814453125, |
|
"logps/rejected": -236.3439178466797, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.009164141491055489, |
|
"rewards/margins": 0.02413203939795494, |
|
"rewards/rejected": -0.014967897906899452, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08895866038723181, |
|
"grad_norm": 9.124712051546432, |
|
"learning_rate": 4.427083333333333e-07, |
|
"logits/chosen": -3.08809494972229, |
|
"logits/rejected": -3.0950043201446533, |
|
"logps/chosen": -232.0874481201172, |
|
"logps/rejected": -265.0681457519531, |
|
"loss": 0.6811, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.01028988417237997, |
|
"rewards/margins": 0.015653502196073532, |
|
"rewards/rejected": -0.005363619886338711, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09419152276295134, |
|
"grad_norm": 7.433236173747129, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -3.065229892730713, |
|
"logits/rejected": -2.972902774810791, |
|
"logps/chosen": -295.44805908203125, |
|
"logps/rejected": -269.96026611328125, |
|
"loss": 0.6817, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.028342243283987045, |
|
"rewards/margins": 0.017250288277864456, |
|
"rewards/rejected": -0.0455925352871418, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09942438513867086, |
|
"grad_norm": 9.490780191701429, |
|
"learning_rate": 4.947916666666667e-07, |
|
"logits/chosen": -3.0910661220550537, |
|
"logits/rejected": -3.00136137008667, |
|
"logps/chosen": -304.7739562988281, |
|
"logps/rejected": -260.3194274902344, |
|
"loss": 0.6741, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0017203543102368712, |
|
"rewards/margins": 0.03843419998884201, |
|
"rewards/rejected": -0.036713846027851105, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.10465724751439037, |
|
"grad_norm": 8.139099488973592, |
|
"learning_rate": 4.999732803821339e-07, |
|
"logits/chosen": -2.9885010719299316, |
|
"logits/rejected": -2.911945104598999, |
|
"logps/chosen": -274.64801025390625, |
|
"logps/rejected": -299.32476806640625, |
|
"loss": 0.6711, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.015315435826778412, |
|
"rewards/margins": 0.0384557843208313, |
|
"rewards/rejected": -0.05377122014760971, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.10989010989010989, |
|
"grad_norm": 8.597620117357772, |
|
"learning_rate": 4.998647417232375e-07, |
|
"logits/chosen": -3.049499750137329, |
|
"logits/rejected": -2.9921631813049316, |
|
"logps/chosen": -214.9530487060547, |
|
"logps/rejected": -198.6853485107422, |
|
"loss": 0.6828, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.011373547837138176, |
|
"rewards/margins": 0.011143224313855171, |
|
"rewards/rejected": -0.022516775876283646, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1151229722658294, |
|
"grad_norm": 7.814974338489718, |
|
"learning_rate": 4.996727502703357e-07, |
|
"logits/chosen": -3.0792040824890137, |
|
"logits/rejected": -3.0471749305725098, |
|
"logps/chosen": -280.1625671386719, |
|
"logps/rejected": -250.3578643798828, |
|
"loss": 0.6749, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.013256365433335304, |
|
"rewards/margins": 0.07305942475795746, |
|
"rewards/rejected": -0.0598030686378479, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12035583464154893, |
|
"grad_norm": 8.021422865019257, |
|
"learning_rate": 4.993973701470142e-07, |
|
"logits/chosen": -3.0776336193084717, |
|
"logits/rejected": -3.065192461013794, |
|
"logps/chosen": -246.03701782226562, |
|
"logps/rejected": -330.24395751953125, |
|
"loss": 0.6617, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.004228830337524414, |
|
"rewards/margins": 0.06871353834867477, |
|
"rewards/rejected": -0.07294236868619919, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.12558869701726844, |
|
"grad_norm": 8.830102325688348, |
|
"learning_rate": 4.990386933279972e-07, |
|
"logits/chosen": -3.057614326477051, |
|
"logits/rejected": -2.985898971557617, |
|
"logps/chosen": -237.7249755859375, |
|
"logps/rejected": -227.34384155273438, |
|
"loss": 0.6631, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.05727599188685417, |
|
"rewards/margins": 0.04271426051855087, |
|
"rewards/rejected": -0.09999025613069534, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13082155939298795, |
|
"grad_norm": 9.25379873829916, |
|
"learning_rate": 4.985968396084284e-07, |
|
"logits/chosen": -2.9885993003845215, |
|
"logits/rejected": -2.986743688583374, |
|
"logps/chosen": -295.55853271484375, |
|
"logps/rejected": -264.4269104003906, |
|
"loss": 0.6579, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.047038041055202484, |
|
"rewards/margins": 0.10233273357152939, |
|
"rewards/rejected": -0.14937077462673187, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1360544217687075, |
|
"grad_norm": 10.58421450887609, |
|
"learning_rate": 4.98071956563861e-07, |
|
"logits/chosen": -3.092935085296631, |
|
"logits/rejected": -3.026994466781616, |
|
"logps/chosen": -293.06158447265625, |
|
"logps/rejected": -282.7720642089844, |
|
"loss": 0.6588, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.01732378825545311, |
|
"rewards/margins": 0.11920974403619766, |
|
"rewards/rejected": -0.13653352856636047, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.141287284144427, |
|
"grad_norm": 7.811018005032728, |
|
"learning_rate": 4.97464219500968e-07, |
|
"logits/chosen": -3.0531859397888184, |
|
"logits/rejected": -2.9968655109405518, |
|
"logps/chosen": -283.08465576171875, |
|
"logps/rejected": -289.63763427734375, |
|
"loss": 0.6429, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.10270702838897705, |
|
"rewards/margins": 0.05593450739979744, |
|
"rewards/rejected": -0.1586415320634842, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.14652014652014653, |
|
"grad_norm": 9.605514735660224, |
|
"learning_rate": 4.967738313989918e-07, |
|
"logits/chosen": -3.0212960243225098, |
|
"logits/rejected": -3.0366883277893066, |
|
"logps/chosen": -314.5849914550781, |
|
"logps/rejected": -308.0143127441406, |
|
"loss": 0.6574, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.0773221030831337, |
|
"rewards/margins": 0.13322624564170837, |
|
"rewards/rejected": -0.21054835617542267, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15175300889586604, |
|
"grad_norm": 8.389574929032044, |
|
"learning_rate": 4.960010228419499e-07, |
|
"logits/chosen": -3.1096034049987793, |
|
"logits/rejected": -3.0065712928771973, |
|
"logps/chosen": -336.0714416503906, |
|
"logps/rejected": -258.11334228515625, |
|
"loss": 0.6613, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.12762203812599182, |
|
"rewards/margins": 0.07918674498796463, |
|
"rewards/rejected": -0.20680880546569824, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.15698587127158556, |
|
"grad_norm": 8.7670335055176, |
|
"learning_rate": 4.951460519416227e-07, |
|
"logits/chosen": -3.085927963256836, |
|
"logits/rejected": -3.0476162433624268, |
|
"logps/chosen": -332.5134582519531, |
|
"logps/rejected": -279.91424560546875, |
|
"loss": 0.6507, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.18624143302440643, |
|
"rewards/margins": 0.0757715106010437, |
|
"rewards/rejected": -0.26201292872428894, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16221873364730507, |
|
"grad_norm": 8.663994541963719, |
|
"learning_rate": 4.942092042513458e-07, |
|
"logits/chosen": -3.14369535446167, |
|
"logits/rejected": -3.0388572216033936, |
|
"logps/chosen": -326.9494934082031, |
|
"logps/rejected": -318.8394470214844, |
|
"loss": 0.6461, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.05960199981927872, |
|
"rewards/margins": 0.17124859988689423, |
|
"rewards/rejected": -0.23085062205791473, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.1674515960230246, |
|
"grad_norm": 10.214098303939142, |
|
"learning_rate": 4.931907926706373e-07, |
|
"logits/chosen": -3.1341705322265625, |
|
"logits/rejected": -3.0006356239318848, |
|
"logps/chosen": -352.4683837890625, |
|
"logps/rejected": -254.84872436523438, |
|
"loss": 0.6359, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.207993745803833, |
|
"rewards/margins": 0.12400402128696442, |
|
"rewards/rejected": -0.3319977819919586, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1726844583987441, |
|
"grad_norm": 11.9926488132874, |
|
"learning_rate": 4.920911573406924e-07, |
|
"logits/chosen": -2.988920211791992, |
|
"logits/rejected": -2.8621726036071777, |
|
"logps/chosen": -270.1966247558594, |
|
"logps/rejected": -231.31307983398438, |
|
"loss": 0.6416, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.17856433987617493, |
|
"rewards/margins": 0.1818085014820099, |
|
"rewards/rejected": -0.3603728413581848, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.17791732077446362, |
|
"grad_norm": 11.181487800996836, |
|
"learning_rate": 4.909106655307787e-07, |
|
"logits/chosen": -3.071873188018799, |
|
"logits/rejected": -3.073513984680176, |
|
"logps/chosen": -300.5464172363281, |
|
"logps/rejected": -336.34735107421875, |
|
"loss": 0.6082, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2116507738828659, |
|
"rewards/margins": 0.15836475789546967, |
|
"rewards/rejected": -0.37001553177833557, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18315018315018314, |
|
"grad_norm": 11.847042665159117, |
|
"learning_rate": 4.896497115155709e-07, |
|
"logits/chosen": -2.988060235977173, |
|
"logits/rejected": -3.0429558753967285, |
|
"logps/chosen": -211.3376007080078, |
|
"logps/rejected": -275.08087158203125, |
|
"loss": 0.6017, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2585861086845398, |
|
"rewards/margins": 0.3116615116596222, |
|
"rewards/rejected": -0.5702476501464844, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.18838304552590268, |
|
"grad_norm": 10.78945471485412, |
|
"learning_rate": 4.883087164434672e-07, |
|
"logits/chosen": -3.053856372833252, |
|
"logits/rejected": -2.9947004318237305, |
|
"logps/chosen": -300.0679931640625, |
|
"logps/rejected": -320.3852233886719, |
|
"loss": 0.6157, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.35630059242248535, |
|
"rewards/margins": 0.1511966437101364, |
|
"rewards/rejected": -0.5074971914291382, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1936159079016222, |
|
"grad_norm": 12.222844062228763, |
|
"learning_rate": 4.868881281959282e-07, |
|
"logits/chosen": -3.011864423751831, |
|
"logits/rejected": -2.965883255004883, |
|
"logps/chosen": -337.5576171875, |
|
"logps/rejected": -347.80572509765625, |
|
"loss": 0.6028, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4423811435699463, |
|
"rewards/margins": 0.4099253714084625, |
|
"rewards/rejected": -0.8523064851760864, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1988487702773417, |
|
"grad_norm": 13.768587447328201, |
|
"learning_rate": 4.853884212378889e-07, |
|
"logits/chosen": -2.873631715774536, |
|
"logits/rejected": -2.939685344696045, |
|
"logps/chosen": -235.6674346923828, |
|
"logps/rejected": -387.4900817871094, |
|
"loss": 0.5963, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.33242642879486084, |
|
"rewards/margins": 0.29951974749565125, |
|
"rewards/rejected": -0.6319462060928345, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.20408163265306123, |
|
"grad_norm": 18.448504645275186, |
|
"learning_rate": 4.838100964592904e-07, |
|
"logits/chosen": -3.0143513679504395, |
|
"logits/rejected": -2.915879726409912, |
|
"logps/chosen": -423.55926513671875, |
|
"logps/rejected": -330.427490234375, |
|
"loss": 0.6128, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.36892321705818176, |
|
"rewards/margins": 0.2754189372062683, |
|
"rewards/rejected": -0.6443422436714172, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.20931449502878074, |
|
"grad_norm": 13.04561395514187, |
|
"learning_rate": 4.821536810077878e-07, |
|
"logits/chosen": -3.05737042427063, |
|
"logits/rejected": -2.9584059715270996, |
|
"logps/chosen": -340.3260192871094, |
|
"logps/rejected": -336.16961669921875, |
|
"loss": 0.5796, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5146622061729431, |
|
"rewards/margins": 0.4159639775753021, |
|
"rewards/rejected": -0.9306262731552124, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21454735740450026, |
|
"grad_norm": 12.961227614879007, |
|
"learning_rate": 4.804197281126862e-07, |
|
"logits/chosen": -2.953254222869873, |
|
"logits/rejected": -2.9386799335479736, |
|
"logps/chosen": -332.95013427734375, |
|
"logps/rejected": -379.0150146484375, |
|
"loss": 0.6141, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5353738069534302, |
|
"rewards/margins": 0.16674116253852844, |
|
"rewards/rejected": -0.7021149396896362, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.21978021978021978, |
|
"grad_norm": 13.525221402164235, |
|
"learning_rate": 4.786088169001671e-07, |
|
"logits/chosen": -3.0310072898864746, |
|
"logits/rejected": -3.0133447647094727, |
|
"logps/chosen": -328.05743408203125, |
|
"logps/rejected": -359.2528076171875, |
|
"loss": 0.5931, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.5046383738517761, |
|
"rewards/margins": 0.18837007880210876, |
|
"rewards/rejected": -0.6930084824562073, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.2250130821559393, |
|
"grad_norm": 15.343151149244173, |
|
"learning_rate": 4.767215521998648e-07, |
|
"logits/chosen": -3.097900867462158, |
|
"logits/rejected": -2.9799551963806152, |
|
"logps/chosen": -344.5359802246094, |
|
"logps/rejected": -340.30279541015625, |
|
"loss": 0.5977, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.3433271050453186, |
|
"rewards/margins": 0.6302416324615479, |
|
"rewards/rejected": -0.9735687971115112, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2302459445316588, |
|
"grad_norm": 15.463582386840484, |
|
"learning_rate": 4.7475856434285853e-07, |
|
"logits/chosen": -3.0013861656188965, |
|
"logits/rejected": -2.9684953689575195, |
|
"logps/chosen": -315.1118469238281, |
|
"logps/rejected": -319.0517578125, |
|
"loss": 0.5884, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.44304031133651733, |
|
"rewards/margins": 0.31362444162368774, |
|
"rewards/rejected": -0.7566647529602051, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.23547880690737832, |
|
"grad_norm": 18.270478359234886, |
|
"learning_rate": 4.727205089511466e-07, |
|
"logits/chosen": -2.8321094512939453, |
|
"logits/rejected": -2.848545551300049, |
|
"logps/chosen": -289.8956604003906, |
|
"logps/rejected": -334.09063720703125, |
|
"loss": 0.5472, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5760111808776855, |
|
"rewards/margins": 0.3905490040779114, |
|
"rewards/rejected": -0.9665601849555969, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24071166928309787, |
|
"grad_norm": 18.394272193166348, |
|
"learning_rate": 4.706080667186738e-07, |
|
"logits/chosen": -2.9533510208129883, |
|
"logits/rejected": -2.85048246383667, |
|
"logps/chosen": -339.3458557128906, |
|
"logps/rejected": -362.229736328125, |
|
"loss": 0.5852, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.527037501335144, |
|
"rewards/margins": 0.513110876083374, |
|
"rewards/rejected": -1.040148377418518, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.24594453165881738, |
|
"grad_norm": 14.635877926172551, |
|
"learning_rate": 4.68421943183986e-07, |
|
"logits/chosen": -2.9311420917510986, |
|
"logits/rejected": -2.9059174060821533, |
|
"logps/chosen": -353.56158447265625, |
|
"logps/rejected": -397.67315673828125, |
|
"loss": 0.5503, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6963423490524292, |
|
"rewards/margins": 0.4823771119117737, |
|
"rewards/rejected": -1.178719401359558, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.25117739403453687, |
|
"grad_norm": 15.746195707332934, |
|
"learning_rate": 4.661628684945851e-07, |
|
"logits/chosen": -2.9695117473602295, |
|
"logits/rejected": -2.9841675758361816, |
|
"logps/chosen": -291.04376220703125, |
|
"logps/rejected": -328.80816650390625, |
|
"loss": 0.5728, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5066032409667969, |
|
"rewards/margins": 0.49630409479141235, |
|
"rewards/rejected": -1.002907156944275, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2564102564102564, |
|
"grad_norm": 13.427286500899548, |
|
"learning_rate": 4.638315971630662e-07, |
|
"logits/chosen": -2.9672865867614746, |
|
"logits/rejected": -2.9439778327941895, |
|
"logps/chosen": -344.151611328125, |
|
"logps/rejected": -350.19305419921875, |
|
"loss": 0.5485, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.540307343006134, |
|
"rewards/margins": 0.437363862991333, |
|
"rewards/rejected": -0.9776712656021118, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2616431187859759, |
|
"grad_norm": 17.8293277249549, |
|
"learning_rate": 4.6142890781511635e-07, |
|
"logits/chosen": -2.928375720977783, |
|
"logits/rejected": -2.9310758113861084, |
|
"logps/chosen": -301.03839111328125, |
|
"logps/rejected": -364.05670166015625, |
|
"loss": 0.5414, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.43716445565223694, |
|
"rewards/margins": 0.5820390582084656, |
|
"rewards/rejected": -1.0192034244537354, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2668759811616955, |
|
"grad_norm": 13.134882476577088, |
|
"learning_rate": 4.5895560292945996e-07, |
|
"logits/chosen": -3.02789306640625, |
|
"logits/rejected": -2.938778877258301, |
|
"logps/chosen": -346.013427734375, |
|
"logps/rejected": -349.2925109863281, |
|
"loss": 0.5768, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6063824892044067, |
|
"rewards/margins": 0.4094172418117523, |
|
"rewards/rejected": -1.015799641609192, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.272108843537415, |
|
"grad_norm": 14.111790777809139, |
|
"learning_rate": 4.5641250856983743e-07, |
|
"logits/chosen": -2.993448495864868, |
|
"logits/rejected": -2.9830093383789062, |
|
"logps/chosen": -336.80517578125, |
|
"logps/rejected": -378.89544677734375, |
|
"loss": 0.5603, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.7271286845207214, |
|
"rewards/margins": 0.24076862633228302, |
|
"rewards/rejected": -0.9678972959518433, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.2773417059131345, |
|
"grad_norm": 23.979569462254332, |
|
"learning_rate": 4.5380047410910655e-07, |
|
"logits/chosen": -2.937358856201172, |
|
"logits/rejected": -2.9407734870910645, |
|
"logps/chosen": -383.3623962402344, |
|
"logps/rejected": -362.71099853515625, |
|
"loss": 0.547, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.45562949776649475, |
|
"rewards/margins": 0.7254458665847778, |
|
"rewards/rejected": -1.1810753345489502, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.282574568288854, |
|
"grad_norm": 19.701709513451974, |
|
"learning_rate": 4.5112037194555876e-07, |
|
"logits/chosen": -2.910482406616211, |
|
"logits/rejected": -2.8749754428863525, |
|
"logps/chosen": -393.69268798828125, |
|
"logps/rejected": -415.40338134765625, |
|
"loss": 0.5233, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9999048113822937, |
|
"rewards/margins": 0.4355775713920593, |
|
"rewards/rejected": -1.435482382774353, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.28780743066457354, |
|
"grad_norm": 16.547979220851758, |
|
"learning_rate": 4.4837309721154536e-07, |
|
"logits/chosen": -3.030179977416992, |
|
"logits/rejected": -2.925313949584961, |
|
"logps/chosen": -398.4112243652344, |
|
"logps/rejected": -411.71881103515625, |
|
"loss": 0.5809, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.6699625253677368, |
|
"rewards/margins": 0.7033188939094543, |
|
"rewards/rejected": -1.3732813596725464, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.29304029304029305, |
|
"grad_norm": 17.970228607671963, |
|
"learning_rate": 4.4555956747451065e-07, |
|
"logits/chosen": -3.0206010341644287, |
|
"logits/rejected": -2.9801762104034424, |
|
"logps/chosen": -325.3530578613281, |
|
"logps/rejected": -367.80718994140625, |
|
"loss": 0.5519, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3854089379310608, |
|
"rewards/margins": 0.6328403353691101, |
|
"rewards/rejected": -1.018249273300171, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.29827315541601257, |
|
"grad_norm": 14.821894805782252, |
|
"learning_rate": 4.426807224305315e-07, |
|
"logits/chosen": -3.058014392852783, |
|
"logits/rejected": -2.9584782123565674, |
|
"logps/chosen": -334.9781494140625, |
|
"logps/rejected": -347.7608337402344, |
|
"loss": 0.5869, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5377572774887085, |
|
"rewards/margins": 0.4088308811187744, |
|
"rewards/rejected": -0.9465881586074829, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3035060177917321, |
|
"grad_norm": 15.304057814913962, |
|
"learning_rate": 4.397375235904669e-07, |
|
"logits/chosen": -3.0159687995910645, |
|
"logits/rejected": -2.940138339996338, |
|
"logps/chosen": -378.4619445800781, |
|
"logps/rejected": -325.83795166015625, |
|
"loss": 0.5461, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.6433390378952026, |
|
"rewards/margins": 0.4729389250278473, |
|
"rewards/rejected": -1.1162779331207275, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.3087388801674516, |
|
"grad_norm": 15.41023592760314, |
|
"learning_rate": 4.3673095395882074e-07, |
|
"logits/chosen": -2.826242446899414, |
|
"logits/rejected": -2.8306374549865723, |
|
"logps/chosen": -297.02642822265625, |
|
"logps/rejected": -344.32562255859375, |
|
"loss": 0.5647, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8071243166923523, |
|
"rewards/margins": 0.44610705971717834, |
|
"rewards/rejected": -1.2532315254211426, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.3139717425431711, |
|
"grad_norm": 19.465252228371234, |
|
"learning_rate": 4.3366201770542687e-07, |
|
"logits/chosen": -2.9092609882354736, |
|
"logits/rejected": -2.910013198852539, |
|
"logps/chosen": -344.72369384765625, |
|
"logps/rejected": -383.1976623535156, |
|
"loss": 0.5827, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6237624883651733, |
|
"rewards/margins": 0.513608455657959, |
|
"rewards/rejected": -1.1373710632324219, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.31920460491889063, |
|
"grad_norm": 20.05320596442739, |
|
"learning_rate": 4.3053173983006395e-07, |
|
"logits/chosen": -2.9518351554870605, |
|
"logits/rejected": -2.872385263442993, |
|
"logps/chosen": -259.81561279296875, |
|
"logps/rejected": -335.95587158203125, |
|
"loss": 0.5515, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5689873695373535, |
|
"rewards/margins": 0.6198877096176147, |
|
"rewards/rejected": -1.1888750791549683, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.32443746729461015, |
|
"grad_norm": 15.709917499515434, |
|
"learning_rate": 4.2734116582011403e-07, |
|
"logits/chosen": -2.9943740367889404, |
|
"logits/rejected": -2.838672161102295, |
|
"logps/chosen": -406.30401611328125, |
|
"logps/rejected": -319.8934020996094, |
|
"loss": 0.5606, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4886740744113922, |
|
"rewards/margins": 0.6085253953933716, |
|
"rewards/rejected": -1.097199559211731, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.32967032967032966, |
|
"grad_norm": 12.984865579984941, |
|
"learning_rate": 4.2409136130137845e-07, |
|
"logits/chosen": -2.9008967876434326, |
|
"logits/rejected": -2.91571044921875, |
|
"logps/chosen": -289.12115478515625, |
|
"logps/rejected": -377.608154296875, |
|
"loss": 0.6095, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6557156443595886, |
|
"rewards/margins": 0.4830680787563324, |
|
"rewards/rejected": -1.1387838125228882, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.3349031920460492, |
|
"grad_norm": 19.717030591939775, |
|
"learning_rate": 4.207834116821672e-07, |
|
"logits/chosen": -2.958247661590576, |
|
"logits/rejected": -2.885899066925049, |
|
"logps/chosen": -340.4076232910156, |
|
"logps/rejected": -417.45916748046875, |
|
"loss": 0.5439, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5420235991477966, |
|
"rewards/margins": 0.7290440797805786, |
|
"rewards/rejected": -1.2710676193237305, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3401360544217687, |
|
"grad_norm": 16.729020989503553, |
|
"learning_rate": 4.174184217907818e-07, |
|
"logits/chosen": -2.9285740852355957, |
|
"logits/rejected": -2.892252206802368, |
|
"logps/chosen": -332.4837951660156, |
|
"logps/rejected": -364.90606689453125, |
|
"loss": 0.5831, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6785880923271179, |
|
"rewards/margins": 0.5523657202720642, |
|
"rewards/rejected": -1.2309538125991821, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3453689167974882, |
|
"grad_norm": 20.83462510110715, |
|
"learning_rate": 4.1399751550651084e-07, |
|
"logits/chosen": -2.904776096343994, |
|
"logits/rejected": -2.8706138134002686, |
|
"logps/chosen": -321.9764099121094, |
|
"logps/rejected": -347.6967468261719, |
|
"loss": 0.561, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8602074384689331, |
|
"rewards/margins": 0.3864290714263916, |
|
"rewards/rejected": -1.2466365098953247, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35060177917320773, |
|
"grad_norm": 16.939482005435558, |
|
"learning_rate": 4.1052183538426426e-07, |
|
"logits/chosen": -2.8574535846710205, |
|
"logits/rejected": -2.8395981788635254, |
|
"logps/chosen": -335.42327880859375, |
|
"logps/rejected": -357.9546813964844, |
|
"loss": 0.5691, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7099284529685974, |
|
"rewards/margins": 0.5589883923530579, |
|
"rewards/rejected": -1.2689168453216553, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.35583464154892724, |
|
"grad_norm": 14.529627338961385, |
|
"learning_rate": 4.0699254227296884e-07, |
|
"logits/chosen": -2.7800791263580322, |
|
"logits/rejected": -2.7351772785186768, |
|
"logps/chosen": -342.0890808105469, |
|
"logps/rejected": -360.97552490234375, |
|
"loss": 0.5595, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.009031057357788, |
|
"rewards/margins": 0.451460063457489, |
|
"rewards/rejected": -1.4604910612106323, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.36106750392464676, |
|
"grad_norm": 15.776547924608785, |
|
"learning_rate": 4.034108149278543e-07, |
|
"logits/chosen": -2.926506996154785, |
|
"logits/rejected": -2.839150905609131, |
|
"logps/chosen": -330.08648681640625, |
|
"logps/rejected": -370.6238708496094, |
|
"loss": 0.5372, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8743458986282349, |
|
"rewards/margins": 0.5746399164199829, |
|
"rewards/rejected": -1.4489858150482178, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.3663003663003663, |
|
"grad_norm": 15.377113085837133, |
|
"learning_rate": 3.9977784961675833e-07, |
|
"logits/chosen": -2.8555071353912354, |
|
"logits/rejected": -2.8349578380584717, |
|
"logps/chosen": -342.14251708984375, |
|
"logps/rejected": -366.9553527832031, |
|
"loss": 0.5548, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8782415390014648, |
|
"rewards/margins": 0.4512537121772766, |
|
"rewards/rejected": -1.3294951915740967, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3715332286760858, |
|
"grad_norm": 14.215944756798452, |
|
"learning_rate": 3.96094859720583e-07, |
|
"logits/chosen": -2.897223472595215, |
|
"logits/rejected": -2.7988810539245605, |
|
"logps/chosen": -398.7936706542969, |
|
"logps/rejected": -397.49578857421875, |
|
"loss": 0.5165, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6832660436630249, |
|
"rewards/margins": 0.5724307298660278, |
|
"rewards/rejected": -1.2556967735290527, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.37676609105180536, |
|
"grad_norm": 15.259982422665061, |
|
"learning_rate": 3.923630753280357e-07, |
|
"logits/chosen": -2.7229888439178467, |
|
"logits/rejected": -2.7818284034729004, |
|
"logps/chosen": -273.22772216796875, |
|
"logps/rejected": -326.69171142578125, |
|
"loss": 0.5265, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5913980603218079, |
|
"rewards/margins": 0.4389529228210449, |
|
"rewards/rejected": -1.030350923538208, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.3819989534275249, |
|
"grad_norm": 16.540503842805457, |
|
"learning_rate": 3.8858374282478893e-07, |
|
"logits/chosen": -2.847386360168457, |
|
"logits/rejected": -2.822706460952759, |
|
"logps/chosen": -340.0333251953125, |
|
"logps/rejected": -446.2408752441406, |
|
"loss": 0.5689, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7927729487419128, |
|
"rewards/margins": 0.775786280632019, |
|
"rewards/rejected": -1.5685592889785767, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.3872318158032444, |
|
"grad_norm": 14.485113206171722, |
|
"learning_rate": 3.8475812447719823e-07, |
|
"logits/chosen": -2.7510900497436523, |
|
"logits/rejected": -2.770341396331787, |
|
"logps/chosen": -311.25360107421875, |
|
"logps/rejected": -339.53631591796875, |
|
"loss": 0.5326, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7380820512771606, |
|
"rewards/margins": 0.47503456473350525, |
|
"rewards/rejected": -1.2131164073944092, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3924646781789639, |
|
"grad_norm": 20.826039047439295, |
|
"learning_rate": 3.8088749801071496e-07, |
|
"logits/chosen": -2.784240245819092, |
|
"logits/rejected": -2.7459471225738525, |
|
"logps/chosen": -353.0906677246094, |
|
"logps/rejected": -405.60406494140625, |
|
"loss": 0.5086, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7327491044998169, |
|
"rewards/margins": 0.3788543939590454, |
|
"rewards/rejected": -1.1116034984588623, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3976975405546834, |
|
"grad_norm": 17.658078906056954, |
|
"learning_rate": 3.7697315618313644e-07, |
|
"logits/chosen": -2.7973134517669678, |
|
"logits/rejected": -2.7560336589813232, |
|
"logps/chosen": -279.4862365722656, |
|
"logps/rejected": -299.8984375, |
|
"loss": 0.5791, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5168331861495972, |
|
"rewards/margins": 0.5671908855438232, |
|
"rewards/rejected": -1.08402419090271, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.40293040293040294, |
|
"grad_norm": 13.415561426957483, |
|
"learning_rate": 3.7301640635283584e-07, |
|
"logits/chosen": -2.7720260620117188, |
|
"logits/rejected": -2.748436450958252, |
|
"logps/chosen": -328.19390869140625, |
|
"logps/rejected": -405.95709228515625, |
|
"loss": 0.5757, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8876395225524902, |
|
"rewards/margins": 0.42753082513809204, |
|
"rewards/rejected": -1.315170168876648, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 14.769300667177983, |
|
"learning_rate": 3.6901857004211443e-07, |
|
"logits/chosen": -2.749990463256836, |
|
"logits/rejected": -2.7357590198516846, |
|
"logps/chosen": -313.04180908203125, |
|
"logps/rejected": -345.66485595703125, |
|
"loss": 0.5333, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7682701349258423, |
|
"rewards/margins": 0.6885534524917603, |
|
"rewards/rejected": -1.4568235874176025, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.413396127681842, |
|
"grad_norm": 14.678645322251919, |
|
"learning_rate": 3.6498098249582444e-07, |
|
"logits/chosen": -2.7600743770599365, |
|
"logits/rejected": -2.767582416534424, |
|
"logps/chosen": -304.5693664550781, |
|
"logps/rejected": -396.30731201171875, |
|
"loss": 0.5435, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.837949275970459, |
|
"rewards/margins": 0.3060819208621979, |
|
"rewards/rejected": -1.144031047821045, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.4186289900575615, |
|
"grad_norm": 19.043570676274012, |
|
"learning_rate": 3.6090499223540757e-07, |
|
"logits/chosen": -2.816871166229248, |
|
"logits/rejected": -2.819472551345825, |
|
"logps/chosen": -386.77410888671875, |
|
"logps/rejected": -432.9602966308594, |
|
"loss": 0.594, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.9629328846931458, |
|
"rewards/margins": 0.41979989409446716, |
|
"rewards/rejected": -1.3827327489852905, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.423861852433281, |
|
"grad_norm": 16.07598371652075, |
|
"learning_rate": 3.5679196060850034e-07, |
|
"logits/chosen": -2.774369478225708, |
|
"logits/rejected": -2.704817771911621, |
|
"logps/chosen": -381.45367431640625, |
|
"logps/rejected": -394.75054931640625, |
|
"loss": 0.5502, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8025716543197632, |
|
"rewards/margins": 0.5534734725952148, |
|
"rewards/rejected": -1.356045126914978, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.4290947148090005, |
|
"grad_norm": 17.575598817541977, |
|
"learning_rate": 3.5264326133425464e-07, |
|
"logits/chosen": -2.808215856552124, |
|
"logits/rejected": -2.7602808475494385, |
|
"logps/chosen": -377.1565856933594, |
|
"logps/rejected": -380.73577880859375, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9608553051948547, |
|
"rewards/margins": 0.5722512006759644, |
|
"rewards/rejected": -1.5331064462661743, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.43432757718472004, |
|
"grad_norm": 16.32115598655498, |
|
"learning_rate": 3.4846028004452693e-07, |
|
"logits/chosen": -2.8719420433044434, |
|
"logits/rejected": -2.8173866271972656, |
|
"logps/chosen": -307.88714599609375, |
|
"logps/rejected": -346.9542236328125, |
|
"loss": 0.6023, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7333047389984131, |
|
"rewards/margins": 0.4502180516719818, |
|
"rewards/rejected": -1.1835228204727173, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.43956043956043955, |
|
"grad_norm": 16.91945418937996, |
|
"learning_rate": 3.4424441382108826e-07, |
|
"logits/chosen": -2.9018168449401855, |
|
"logits/rejected": -2.726928472518921, |
|
"logps/chosen": -491.023193359375, |
|
"logps/rejected": -441.81494140625, |
|
"loss": 0.5438, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9286457896232605, |
|
"rewards/margins": 0.6778661012649536, |
|
"rewards/rejected": -1.6065118312835693, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.44479330193615907, |
|
"grad_norm": 21.188197365998956, |
|
"learning_rate": 3.399970707290105e-07, |
|
"logits/chosen": -2.784093141555786, |
|
"logits/rejected": -2.6797289848327637, |
|
"logps/chosen": -347.7838439941406, |
|
"logps/rejected": -357.4574279785156, |
|
"loss": 0.5766, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9829579591751099, |
|
"rewards/margins": 0.4461473822593689, |
|
"rewards/rejected": -1.4291054010391235, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.4500261643118786, |
|
"grad_norm": 16.855672588469375, |
|
"learning_rate": 3.3571966934638376e-07, |
|
"logits/chosen": -2.818727731704712, |
|
"logits/rejected": -2.8157877922058105, |
|
"logps/chosen": -258.5130615234375, |
|
"logps/rejected": -407.8525085449219, |
|
"loss": 0.5452, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7257243394851685, |
|
"rewards/margins": 0.8329108953475952, |
|
"rewards/rejected": -1.5586349964141846, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.4552590266875981, |
|
"grad_norm": 13.734802575828237, |
|
"learning_rate": 3.314136382905234e-07, |
|
"logits/chosen": -2.577549457550049, |
|
"logits/rejected": -2.634438991546631, |
|
"logps/chosen": -296.9052429199219, |
|
"logps/rejected": -361.5945739746094, |
|
"loss": 0.5641, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.766598641872406, |
|
"rewards/margins": 0.615802526473999, |
|
"rewards/rejected": -1.3824012279510498, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.4604918890633176, |
|
"grad_norm": 16.099032009321178, |
|
"learning_rate": 3.270804157408225e-07, |
|
"logits/chosen": -2.836336612701416, |
|
"logits/rejected": -2.819361925125122, |
|
"logps/chosen": -354.25347900390625, |
|
"logps/rejected": -366.732421875, |
|
"loss": 0.539, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8732249140739441, |
|
"rewards/margins": 0.35818585753440857, |
|
"rewards/rejected": -1.2314107418060303, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.46572475143903713, |
|
"grad_norm": 17.106740036561465, |
|
"learning_rate": 3.227214489584128e-07, |
|
"logits/chosen": -2.8799033164978027, |
|
"logits/rejected": -2.8418822288513184, |
|
"logps/chosen": -386.0233459472656, |
|
"logps/rejected": -369.4678039550781, |
|
"loss": 0.5312, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7949320077896118, |
|
"rewards/margins": 0.5880209803581238, |
|
"rewards/rejected": -1.3829529285430908, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.47095761381475665, |
|
"grad_norm": 21.890320275591346, |
|
"learning_rate": 3.1833819380279023e-07, |
|
"logits/chosen": -2.721087694168091, |
|
"logits/rejected": -2.697716474533081, |
|
"logps/chosen": -314.33087158203125, |
|
"logps/rejected": -464.30413818359375, |
|
"loss": 0.5303, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7493630051612854, |
|
"rewards/margins": 1.0829684734344482, |
|
"rewards/rejected": -1.8323314189910889, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.47619047619047616, |
|
"grad_norm": 18.614163560193642, |
|
"learning_rate": 3.139321142455703e-07, |
|
"logits/chosen": -2.7572057247161865, |
|
"logits/rejected": -2.706200122833252, |
|
"logps/chosen": -257.91607666015625, |
|
"logps/rejected": -347.5008239746094, |
|
"loss": 0.577, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8465842008590698, |
|
"rewards/margins": 0.8367371559143066, |
|
"rewards/rejected": -1.683321237564087, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.48142333856619574, |
|
"grad_norm": 15.269349106291124, |
|
"learning_rate": 3.095046818815331e-07, |
|
"logits/chosen": -2.8949315547943115, |
|
"logits/rejected": -2.79899525642395, |
|
"logps/chosen": -398.53765869140625, |
|
"logps/rejected": -398.479736328125, |
|
"loss": 0.5602, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9853115081787109, |
|
"rewards/margins": 0.5784602761268616, |
|
"rewards/rejected": -1.5637718439102173, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.48665620094191525, |
|
"grad_norm": 15.355520412605172, |
|
"learning_rate": 3.0505737543712275e-07, |
|
"logits/chosen": -2.806957960128784, |
|
"logits/rejected": -2.785641670227051, |
|
"logps/chosen": -353.44854736328125, |
|
"logps/rejected": -384.927490234375, |
|
"loss": 0.5432, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.9327161908149719, |
|
"rewards/margins": 0.3711306154727936, |
|
"rewards/rejected": -1.303847074508667, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.49188906331763477, |
|
"grad_norm": 15.67157273698381, |
|
"learning_rate": 3.0059168027656475e-07, |
|
"logits/chosen": -2.888259172439575, |
|
"logits/rejected": -2.8196072578430176, |
|
"logps/chosen": -374.23443603515625, |
|
"logps/rejected": -388.24578857421875, |
|
"loss": 0.4706, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8297722935676575, |
|
"rewards/margins": 0.6955471038818359, |
|
"rewards/rejected": -1.5253194570541382, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.4971219256933543, |
|
"grad_norm": 14.462678359824464, |
|
"learning_rate": 2.9610908790576663e-07, |
|
"logits/chosen": -2.7698135375976562, |
|
"logits/rejected": -2.661236524581909, |
|
"logps/chosen": -364.80810546875, |
|
"logps/rejected": -443.79425048828125, |
|
"loss": 0.5139, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7954778671264648, |
|
"rewards/margins": 0.9584125280380249, |
|
"rewards/rejected": -1.7538903951644897, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5023547880690737, |
|
"grad_norm": 13.595756965461069, |
|
"learning_rate": 2.9161109547416667e-07, |
|
"logits/chosen": -2.8679168224334717, |
|
"logits/rejected": -2.795522689819336, |
|
"logps/chosen": -391.12115478515625, |
|
"logps/rejected": -440.32940673828125, |
|
"loss": 0.5418, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1392987966537476, |
|
"rewards/margins": 0.6391605734825134, |
|
"rewards/rejected": -1.7784591913223267, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5075876504447933, |
|
"grad_norm": 17.796999860342627, |
|
"learning_rate": 2.8709920527469834e-07, |
|
"logits/chosen": -2.7307040691375732, |
|
"logits/rejected": -2.691157341003418, |
|
"logps/chosen": -373.8775939941406, |
|
"logps/rejected": -425.29400634765625, |
|
"loss": 0.5328, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.8613603711128235, |
|
"rewards/margins": 0.9478839039802551, |
|
"rewards/rejected": -1.80924391746521, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 11.72784157089814, |
|
"learning_rate": 2.8257492424203685e-07, |
|
"logits/chosen": -2.8900365829467773, |
|
"logits/rejected": -2.731393337249756, |
|
"logps/chosen": -380.15576171875, |
|
"logps/rejected": -384.27630615234375, |
|
"loss": 0.53, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.7285529971122742, |
|
"rewards/margins": 0.7510075569152832, |
|
"rewards/rejected": -1.4795606136322021, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5180533751962323, |
|
"grad_norm": 17.896633281401765, |
|
"learning_rate": 2.780397634492949e-07, |
|
"logits/chosen": -2.7340455055236816, |
|
"logits/rejected": -2.663761615753174, |
|
"logps/chosen": -312.6453552246094, |
|
"logps/rejected": -369.53790283203125, |
|
"loss": 0.5483, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.68805330991745, |
|
"rewards/margins": 0.8142625093460083, |
|
"rewards/rejected": -1.5023157596588135, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5232862375719518, |
|
"grad_norm": 18.34859064941708, |
|
"learning_rate": 2.7349523760333674e-07, |
|
"logits/chosen": -2.7495155334472656, |
|
"logits/rejected": -2.6967289447784424, |
|
"logps/chosen": -327.76214599609375, |
|
"logps/rejected": -376.7933044433594, |
|
"loss": 0.5416, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9751359820365906, |
|
"rewards/margins": 0.5826241970062256, |
|
"rewards/rejected": -1.55776047706604, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5285190999476713, |
|
"grad_norm": 17.334821505661097, |
|
"learning_rate": 2.6894286453887827e-07, |
|
"logits/chosen": -2.7708683013916016, |
|
"logits/rejected": -2.763948440551758, |
|
"logps/chosen": -315.4136047363281, |
|
"logps/rejected": -405.6109924316406, |
|
"loss": 0.5349, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.835088849067688, |
|
"rewards/margins": 0.722664475440979, |
|
"rewards/rejected": -1.5577532052993774, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.533751962323391, |
|
"grad_norm": 16.461462996228413, |
|
"learning_rate": 2.6438416471154273e-07, |
|
"logits/chosen": -2.8052124977111816, |
|
"logits/rejected": -2.7714879512786865, |
|
"logps/chosen": -361.3446044921875, |
|
"logps/rejected": -400.1153564453125, |
|
"loss": 0.4871, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.7003945112228394, |
|
"rewards/margins": 0.8514345288276672, |
|
"rewards/rejected": -1.5518289804458618, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.5389848246991105, |
|
"grad_norm": 16.993297156547936, |
|
"learning_rate": 2.598206606900406e-07, |
|
"logits/chosen": -2.812356472015381, |
|
"logits/rejected": -2.779200553894043, |
|
"logps/chosen": -351.882568359375, |
|
"logps/rejected": -349.3376159667969, |
|
"loss": 0.5472, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.9720566868782043, |
|
"rewards/margins": 0.37396326661109924, |
|
"rewards/rejected": -1.3460201025009155, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.54421768707483, |
|
"grad_norm": 16.369305843640827, |
|
"learning_rate": 2.552538766476443e-07, |
|
"logits/chosen": -2.807950973510742, |
|
"logits/rejected": -2.8600311279296875, |
|
"logps/chosen": -336.7136535644531, |
|
"logps/rejected": -404.81646728515625, |
|
"loss": 0.5614, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8792396783828735, |
|
"rewards/margins": 0.5810363292694092, |
|
"rewards/rejected": -1.4602760076522827, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.5494505494505495, |
|
"grad_norm": 18.03446272194485, |
|
"learning_rate": 2.5068533785312666e-07, |
|
"logits/chosen": -2.873033046722412, |
|
"logits/rejected": -2.799868583679199, |
|
"logps/chosen": -392.97955322265625, |
|
"logps/rejected": -442.9043884277344, |
|
"loss": 0.5167, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.6563512086868286, |
|
"rewards/margins": 0.8989084959030151, |
|
"rewards/rejected": -1.5552598237991333, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.554683411826269, |
|
"grad_norm": 20.315404295924612, |
|
"learning_rate": 2.461165701613333e-07, |
|
"logits/chosen": -2.759457588195801, |
|
"logits/rejected": -2.7653160095214844, |
|
"logps/chosen": -315.7008056640625, |
|
"logps/rejected": -420.513671875, |
|
"loss": 0.5364, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.5269359946250916, |
|
"rewards/margins": 1.0427325963974, |
|
"rewards/rejected": -1.5696685314178467, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5599162742019885, |
|
"grad_norm": 14.713699575864963, |
|
"learning_rate": 2.415490995035596e-07, |
|
"logits/chosen": -2.7564265727996826, |
|
"logits/rejected": -2.7757070064544678, |
|
"logps/chosen": -408.191162109375, |
|
"logps/rejected": -409.313232421875, |
|
"loss": 0.5296, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8357181549072266, |
|
"rewards/margins": 0.5712189674377441, |
|
"rewards/rejected": -1.4069370031356812, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.565149136577708, |
|
"grad_norm": 18.09014309365496, |
|
"learning_rate": 2.3698445137790258e-07, |
|
"logits/chosen": -2.8548378944396973, |
|
"logits/rejected": -2.7975635528564453, |
|
"logps/chosen": -307.1778869628906, |
|
"logps/rejected": -374.8013610839844, |
|
"loss": 0.5449, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7622523307800293, |
|
"rewards/margins": 0.7371785640716553, |
|
"rewards/rejected": -1.4994310140609741, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5703819989534276, |
|
"grad_norm": 24.666086366933353, |
|
"learning_rate": 2.3242415033975575e-07, |
|
"logits/chosen": -2.7526750564575195, |
|
"logits/rejected": -2.6710307598114014, |
|
"logps/chosen": -406.1108093261719, |
|
"logps/rejected": -334.859375, |
|
"loss": 0.5465, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0330530405044556, |
|
"rewards/margins": 0.43497997522354126, |
|
"rewards/rejected": -1.4680330753326416, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.5756148613291471, |
|
"grad_norm": 19.70750428042825, |
|
"learning_rate": 2.2786971949262134e-07, |
|
"logits/chosen": -2.7635788917541504, |
|
"logits/rejected": -2.740485668182373, |
|
"logps/chosen": -350.9692077636719, |
|
"logps/rejected": -427.8968811035156, |
|
"loss": 0.5149, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9213571548461914, |
|
"rewards/margins": 0.6530935764312744, |
|
"rewards/rejected": -1.5744506120681763, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5808477237048666, |
|
"grad_norm": 19.421047147213937, |
|
"learning_rate": 2.2332267997940513e-07, |
|
"logits/chosen": -2.593143939971924, |
|
"logits/rejected": -2.5872962474823, |
|
"logps/chosen": -264.0029296875, |
|
"logps/rejected": -365.4161682128906, |
|
"loss": 0.561, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.9193994402885437, |
|
"rewards/margins": 0.7727819681167603, |
|
"rewards/rejected": -1.6921813488006592, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.5860805860805861, |
|
"grad_norm": 20.25715075172959, |
|
"learning_rate": 2.1878455047436753e-07, |
|
"logits/chosen": -2.7330455780029297, |
|
"logits/rejected": -2.701078176498413, |
|
"logps/chosen": -377.7864074707031, |
|
"logps/rejected": -417.6946716308594, |
|
"loss": 0.5272, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0114750862121582, |
|
"rewards/margins": 0.6698529124259949, |
|
"rewards/rejected": -1.6813280582427979, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5913134484563056, |
|
"grad_norm": 18.65929450263775, |
|
"learning_rate": 2.1425684667589852e-07, |
|
"logits/chosen": -2.640451669692993, |
|
"logits/rejected": -2.5956547260284424, |
|
"logps/chosen": -344.3413391113281, |
|
"logps/rejected": -445.24627685546875, |
|
"loss": 0.5213, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2718535661697388, |
|
"rewards/margins": 0.5820540189743042, |
|
"rewards/rejected": -1.8539073467254639, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.5965463108320251, |
|
"grad_norm": 23.91921863916153, |
|
"learning_rate": 2.0974108080028692e-07, |
|
"logits/chosen": -2.876276969909668, |
|
"logits/rejected": -2.8425920009613037, |
|
"logps/chosen": -358.00347900390625, |
|
"logps/rejected": -435.3857421875, |
|
"loss": 0.5311, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0057141780853271, |
|
"rewards/margins": 0.6711603403091431, |
|
"rewards/rejected": -1.6768745183944702, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6017791732077447, |
|
"grad_norm": 18.381841775077824, |
|
"learning_rate": 2.0523876107665194e-07, |
|
"logits/chosen": -2.829072952270508, |
|
"logits/rejected": -2.6755383014678955, |
|
"logps/chosen": -359.2571105957031, |
|
"logps/rejected": -397.68743896484375, |
|
"loss": 0.5262, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9120863080024719, |
|
"rewards/margins": 0.7489384412765503, |
|
"rewards/rejected": -1.6610246896743774, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6070120355834642, |
|
"grad_norm": 20.516414043729537, |
|
"learning_rate": 2.0075139124320787e-07, |
|
"logits/chosen": -2.6632769107818604, |
|
"logits/rejected": -2.6845157146453857, |
|
"logps/chosen": -317.9290771484375, |
|
"logps/rejected": -327.5867614746094, |
|
"loss": 0.553, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0191724300384521, |
|
"rewards/margins": 0.4334065020084381, |
|
"rewards/rejected": -1.452579140663147, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.6122448979591837, |
|
"grad_norm": 18.076421208226538, |
|
"learning_rate": 1.962804700450265e-07, |
|
"logits/chosen": -2.7555832862854004, |
|
"logits/rejected": -2.7026753425598145, |
|
"logps/chosen": -358.18212890625, |
|
"logps/rejected": -462.0071716308594, |
|
"loss": 0.5437, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.876019299030304, |
|
"rewards/margins": 1.0211801528930664, |
|
"rewards/rejected": -1.8971996307373047, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6174777603349032, |
|
"grad_norm": 16.308484308025697, |
|
"learning_rate": 1.9182749073346943e-07, |
|
"logits/chosen": -2.806734085083008, |
|
"logits/rejected": -2.7705514430999756, |
|
"logps/chosen": -416.02520751953125, |
|
"logps/rejected": -418.12347412109375, |
|
"loss": 0.482, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.0437430143356323, |
|
"rewards/margins": 0.47317224740982056, |
|
"rewards/rejected": -1.516915202140808, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.6227106227106227, |
|
"grad_norm": 19.5121441695378, |
|
"learning_rate": 1.8739394056745372e-07, |
|
"logits/chosen": -2.85368013381958, |
|
"logits/rejected": -2.792527675628662, |
|
"logps/chosen": -444.78948974609375, |
|
"logps/rejected": -417.74078369140625, |
|
"loss": 0.4903, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.825792670249939, |
|
"rewards/margins": 0.635454535484314, |
|
"rewards/rejected": -1.461247205734253, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6279434850863422, |
|
"grad_norm": 16.627989570364402, |
|
"learning_rate": 1.8298130031671972e-07, |
|
"logits/chosen": -2.568850517272949, |
|
"logits/rejected": -2.4875073432922363, |
|
"logps/chosen": -408.0639953613281, |
|
"logps/rejected": -431.19329833984375, |
|
"loss": 0.5083, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0868682861328125, |
|
"rewards/margins": 0.5747972726821899, |
|
"rewards/rejected": -1.6616655588150024, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6331763474620618, |
|
"grad_norm": 20.140246761449887, |
|
"learning_rate": 1.785910437672658e-07, |
|
"logits/chosen": -2.8672873973846436, |
|
"logits/rejected": -2.822535514831543, |
|
"logps/chosen": -386.6443786621094, |
|
"logps/rejected": -416.11328125, |
|
"loss": 0.5426, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.953647792339325, |
|
"rewards/margins": 0.6560716032981873, |
|
"rewards/rejected": -1.6097195148468018, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.6384092098377813, |
|
"grad_norm": 20.20724844042649, |
|
"learning_rate": 1.7422463722911624e-07, |
|
"logits/chosen": -2.8543591499328613, |
|
"logits/rejected": -2.8314263820648193, |
|
"logps/chosen": -403.2071228027344, |
|
"logps/rejected": -456.78924560546875, |
|
"loss": 0.5261, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.862319827079773, |
|
"rewards/margins": 0.9367027282714844, |
|
"rewards/rejected": -1.7990226745605469, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.6436420722135008, |
|
"grad_norm": 17.62902811045846, |
|
"learning_rate": 1.6988353904658492e-07, |
|
"logits/chosen": -2.796889543533325, |
|
"logits/rejected": -2.7177813053131104, |
|
"logps/chosen": -430.34515380859375, |
|
"logps/rejected": -412.7137145996094, |
|
"loss": 0.4988, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9104453921318054, |
|
"rewards/margins": 0.9096380472183228, |
|
"rewards/rejected": -1.8200836181640625, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.6488749345892203, |
|
"grad_norm": 20.291430758326484, |
|
"learning_rate": 1.6556919911120081e-07, |
|
"logits/chosen": -2.7235121726989746, |
|
"logits/rejected": -2.704380512237549, |
|
"logps/chosen": -316.89495849609375, |
|
"logps/rejected": -354.3640441894531, |
|
"loss": 0.5081, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8046097755432129, |
|
"rewards/margins": 0.6576355695724487, |
|
"rewards/rejected": -1.4622454643249512, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.6541077969649398, |
|
"grad_norm": 16.634847998265094, |
|
"learning_rate": 1.6128305837745546e-07, |
|
"logits/chosen": -2.8713958263397217, |
|
"logits/rejected": -2.780726909637451, |
|
"logps/chosen": -357.17352294921875, |
|
"logps/rejected": -450.5567321777344, |
|
"loss": 0.5163, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8796365857124329, |
|
"rewards/margins": 0.8217870593070984, |
|
"rewards/rejected": -1.7014236450195312, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.6593406593406593, |
|
"grad_norm": 13.440845355698691, |
|
"learning_rate": 1.570265483815364e-07, |
|
"logits/chosen": -2.7988827228546143, |
|
"logits/rejected": -2.7722063064575195, |
|
"logps/chosen": -336.47259521484375, |
|
"logps/rejected": -320.25933837890625, |
|
"loss": 0.5074, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7505493760108948, |
|
"rewards/margins": 0.728230357170105, |
|
"rewards/rejected": -1.4787797927856445, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6645735217163788, |
|
"grad_norm": 17.012464455361766, |
|
"learning_rate": 1.5280109076320506e-07, |
|
"logits/chosen": -2.7736287117004395, |
|
"logits/rejected": -2.7175097465515137, |
|
"logps/chosen": -305.7979736328125, |
|
"logps/rejected": -366.69677734375, |
|
"loss": 0.5134, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.756097137928009, |
|
"rewards/margins": 0.8465806841850281, |
|
"rewards/rejected": -1.6026777029037476, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.6698063840920984, |
|
"grad_norm": 20.946223504777155, |
|
"learning_rate": 1.4860809679098158e-07, |
|
"logits/chosen": -2.7644202709198, |
|
"logits/rejected": -2.674136161804199, |
|
"logps/chosen": -328.1389465332031, |
|
"logps/rejected": -375.33331298828125, |
|
"loss": 0.5189, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.8715450167655945, |
|
"rewards/margins": 0.7798687219619751, |
|
"rewards/rejected": -1.6514136791229248, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6750392464678179, |
|
"grad_norm": 16.38862564822018, |
|
"learning_rate": 1.444489668907914e-07, |
|
"logits/chosen": -2.688934087753296, |
|
"logits/rejected": -2.6829075813293457, |
|
"logps/chosen": -314.0862121582031, |
|
"logps/rejected": -436.2018127441406, |
|
"loss": 0.5481, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7365253567695618, |
|
"rewards/margins": 0.8902775645256042, |
|
"rewards/rejected": -1.6268030405044556, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.6802721088435374, |
|
"grad_norm": 18.868265740047633, |
|
"learning_rate": 1.403250901782354e-07, |
|
"logits/chosen": -2.7281813621520996, |
|
"logits/rejected": -2.748370409011841, |
|
"logps/chosen": -362.42266845703125, |
|
"logps/rejected": -434.9849548339844, |
|
"loss": 0.5109, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9654933214187622, |
|
"rewards/margins": 0.6172199249267578, |
|
"rewards/rejected": -1.58271324634552, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6855049712192569, |
|
"grad_norm": 22.766524494989497, |
|
"learning_rate": 1.3623784399463584e-07, |
|
"logits/chosen": -2.8341379165649414, |
|
"logits/rejected": -2.8009238243103027, |
|
"logps/chosen": -321.3329772949219, |
|
"logps/rejected": -359.7023010253906, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.7877645492553711, |
|
"rewards/margins": 0.7746297717094421, |
|
"rewards/rejected": -1.562394380569458, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.6907378335949764, |
|
"grad_norm": 18.14832280420879, |
|
"learning_rate": 1.3218859344701632e-07, |
|
"logits/chosen": -2.7510125637054443, |
|
"logits/rejected": -2.7323222160339355, |
|
"logps/chosen": -392.82965087890625, |
|
"logps/rejected": -459.291015625, |
|
"loss": 0.4874, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0846540927886963, |
|
"rewards/margins": 0.6299933195114136, |
|
"rewards/rejected": -1.7146475315093994, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.6959706959706959, |
|
"grad_norm": 22.852759896856153, |
|
"learning_rate": 1.2817869095216624e-07, |
|
"logits/chosen": -2.7460663318634033, |
|
"logits/rejected": -2.7363333702087402, |
|
"logps/chosen": -344.58636474609375, |
|
"logps/rejected": -462.2234802246094, |
|
"loss": 0.4831, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7783873677253723, |
|
"rewards/margins": 0.8798893094062805, |
|
"rewards/rejected": -1.6582765579223633, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7012035583464155, |
|
"grad_norm": 23.6570903287471, |
|
"learning_rate": 1.2420947578494522e-07, |
|
"logits/chosen": -2.689542293548584, |
|
"logits/rejected": -2.6176483631134033, |
|
"logps/chosen": -351.36468505859375, |
|
"logps/rejected": -383.5470275878906, |
|
"loss": 0.5132, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.9131054878234863, |
|
"rewards/margins": 0.8674660921096802, |
|
"rewards/rejected": -1.7805715799331665, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.706436420722135, |
|
"grad_norm": 24.783115737011485, |
|
"learning_rate": 1.202822736309758e-07, |
|
"logits/chosen": -2.7429165840148926, |
|
"logits/rejected": -2.718522787094116, |
|
"logps/chosen": -374.50323486328125, |
|
"logps/rejected": -395.9026794433594, |
|
"loss": 0.5146, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9068658947944641, |
|
"rewards/margins": 0.5722583532333374, |
|
"rewards/rejected": -1.4791243076324463, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7116692830978545, |
|
"grad_norm": 16.39264775799894, |
|
"learning_rate": 1.1639839614387572e-07, |
|
"logits/chosen": -2.6672797203063965, |
|
"logits/rejected": -2.646698474884033, |
|
"logps/chosen": -431.44305419921875, |
|
"logps/rejected": -459.53387451171875, |
|
"loss": 0.5512, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0058305263519287, |
|
"rewards/margins": 0.6214712262153625, |
|
"rewards/rejected": -1.627301573753357, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.716902145473574, |
|
"grad_norm": 21.609096898964978, |
|
"learning_rate": 1.1255914050717552e-07, |
|
"logits/chosen": -2.791228771209717, |
|
"logits/rejected": -2.6643431186676025, |
|
"logps/chosen": -397.3729553222656, |
|
"logps/rejected": -365.84893798828125, |
|
"loss": 0.4799, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8865281343460083, |
|
"rewards/margins": 0.6994263529777527, |
|
"rewards/rejected": -1.5859544277191162, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.7221350078492935, |
|
"grad_norm": 18.052304623231368, |
|
"learning_rate": 1.0876578900107053e-07, |
|
"logits/chosen": -2.771291494369507, |
|
"logits/rejected": -2.7225091457366943, |
|
"logps/chosen": -280.9075622558594, |
|
"logps/rejected": -402.49908447265625, |
|
"loss": 0.4933, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.6819049119949341, |
|
"rewards/margins": 0.9486406445503235, |
|
"rewards/rejected": -1.6305453777313232, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.727367870225013, |
|
"grad_norm": 19.948472450962296, |
|
"learning_rate": 1.050196085741491e-07, |
|
"logits/chosen": -2.708111047744751, |
|
"logits/rejected": -2.616854667663574, |
|
"logps/chosen": -350.962158203125, |
|
"logps/rejected": -393.85357666015625, |
|
"loss": 0.5001, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9579183459281921, |
|
"rewards/margins": 0.8560064435005188, |
|
"rewards/rejected": -1.81392502784729, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.7326007326007326, |
|
"grad_norm": 19.160243750600223, |
|
"learning_rate": 1.0132185042024246e-07, |
|
"logits/chosen": -2.6855554580688477, |
|
"logits/rejected": -2.6964261531829834, |
|
"logps/chosen": -345.172119140625, |
|
"logps/rejected": -429.0968322753906, |
|
"loss": 0.5215, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.147552728652954, |
|
"rewards/margins": 0.7010320425033569, |
|
"rewards/rejected": -1.848584532737732, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.7378335949764521, |
|
"grad_norm": 20.042088035388982, |
|
"learning_rate": 9.767374956053584e-08, |
|
"logits/chosen": -2.6926827430725098, |
|
"logits/rejected": -2.65360689163208, |
|
"logps/chosen": -353.872314453125, |
|
"logps/rejected": -421.33447265625, |
|
"loss": 0.5652, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.064784049987793, |
|
"rewards/margins": 0.7993738055229187, |
|
"rewards/rejected": -1.864158034324646, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.7430664573521716, |
|
"grad_norm": 21.422151300583028, |
|
"learning_rate": 9.407652443108192e-08, |
|
"logits/chosen": -2.754948616027832, |
|
"logits/rejected": -2.6873762607574463, |
|
"logps/chosen": -410.3388671875, |
|
"logps/rejected": -412.1189880371094, |
|
"loss": 0.5796, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0343502759933472, |
|
"rewards/margins": 0.6344264149665833, |
|
"rewards/rejected": -1.668776512145996, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.7482993197278912, |
|
"grad_norm": 15.10072418425115, |
|
"learning_rate": 9.053137647585229e-08, |
|
"logits/chosen": -2.7673416137695312, |
|
"logits/rejected": -2.683150053024292, |
|
"logps/chosen": -380.3271179199219, |
|
"logps/rejected": -419.106201171875, |
|
"loss": 0.4956, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8902686834335327, |
|
"rewards/margins": 0.8359274864196777, |
|
"rewards/rejected": -1.7261962890625, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.7535321821036107, |
|
"grad_norm": 20.007719817569754, |
|
"learning_rate": 8.70394897454659e-08, |
|
"logits/chosen": -2.7903285026550293, |
|
"logits/rejected": -2.726536750793457, |
|
"logps/chosen": -412.11669921875, |
|
"logps/rejected": -453.485107421875, |
|
"loss": 0.5241, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.6117042899131775, |
|
"rewards/margins": 0.9241981506347656, |
|
"rewards/rejected": -1.5359022617340088, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.7587650444793302, |
|
"grad_norm": 15.725739369442051, |
|
"learning_rate": 8.360203050172488e-08, |
|
"logits/chosen": -2.761046886444092, |
|
"logits/rejected": -2.6572835445404053, |
|
"logps/chosen": -380.8763427734375, |
|
"logps/rejected": -428.72601318359375, |
|
"loss": 0.5176, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9222623705863953, |
|
"rewards/margins": 0.817249596118927, |
|
"rewards/rejected": -1.7395120859146118, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7639979068550498, |
|
"grad_norm": 15.913216171443693, |
|
"learning_rate": 8.022014682809305e-08, |
|
"logits/chosen": -2.680180072784424, |
|
"logits/rejected": -2.6794424057006836, |
|
"logps/chosen": -297.6555480957031, |
|
"logps/rejected": -357.39703369140625, |
|
"loss": 0.5246, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8991565704345703, |
|
"rewards/margins": 0.5045825242996216, |
|
"rewards/rejected": -1.4037392139434814, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 16.251635097636957, |
|
"learning_rate": 7.689496824624525e-08, |
|
"logits/chosen": -2.7646737098693848, |
|
"logits/rejected": -2.642939567565918, |
|
"logps/chosen": -400.4357604980469, |
|
"logps/rejected": -399.53228759765625, |
|
"loss": 0.5194, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.7477626204490662, |
|
"rewards/margins": 0.9188524484634399, |
|
"rewards/rejected": -1.6666151285171509, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.7744636316064888, |
|
"grad_norm": 18.848669382089422, |
|
"learning_rate": 7.362760533881649e-08, |
|
"logits/chosen": -2.682922124862671, |
|
"logits/rejected": -2.678013324737549, |
|
"logps/chosen": -340.21295166015625, |
|
"logps/rejected": -394.0873107910156, |
|
"loss": 0.5165, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.9958998560905457, |
|
"rewards/margins": 0.7448866367340088, |
|
"rewards/rejected": -1.7407863140106201, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.7796964939822083, |
|
"grad_norm": 18.023386643013044, |
|
"learning_rate": 7.041914937847584e-08, |
|
"logits/chosen": -2.489797592163086, |
|
"logits/rejected": -2.509646415710449, |
|
"logps/chosen": -401.4407653808594, |
|
"logps/rejected": -446.206298828125, |
|
"loss": 0.5291, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2514934539794922, |
|
"rewards/margins": 0.5801483392715454, |
|
"rewards/rejected": -1.8316421508789062, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.7849293563579278, |
|
"grad_norm": 16.75491775995985, |
|
"learning_rate": 6.727067196345099e-08, |
|
"logits/chosen": -2.6368460655212402, |
|
"logits/rejected": -2.5974481105804443, |
|
"logps/chosen": -296.484130859375, |
|
"logps/rejected": -450.65826416015625, |
|
"loss": 0.4935, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9263995289802551, |
|
"rewards/margins": 1.0427716970443726, |
|
"rewards/rejected": -1.9691712856292725, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7901622187336473, |
|
"grad_norm": 19.21477092868735, |
|
"learning_rate": 6.418322465962233e-08, |
|
"logits/chosen": -2.6289784908294678, |
|
"logits/rejected": -2.6378707885742188, |
|
"logps/chosen": -389.85357666015625, |
|
"logps/rejected": -506.17926025390625, |
|
"loss": 0.5505, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3396786451339722, |
|
"rewards/margins": 0.8516691327095032, |
|
"rewards/rejected": -2.191347599029541, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.7953950811093669, |
|
"grad_norm": 18.1853712960554, |
|
"learning_rate": 6.115783864930905e-08, |
|
"logits/chosen": -2.6589932441711426, |
|
"logits/rejected": -2.61903715133667, |
|
"logps/chosen": -309.13134765625, |
|
"logps/rejected": -423.0372619628906, |
|
"loss": 0.5275, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9086647033691406, |
|
"rewards/margins": 0.8935861587524414, |
|
"rewards/rejected": -1.802250862121582, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8006279434850864, |
|
"grad_norm": 21.592413720973216, |
|
"learning_rate": 5.8195524386862374e-08, |
|
"logits/chosen": -2.820551872253418, |
|
"logits/rejected": -2.7724428176879883, |
|
"logps/chosen": -423.28363037109375, |
|
"logps/rejected": -555.06298828125, |
|
"loss": 0.4936, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1178683042526245, |
|
"rewards/margins": 0.7847362756729126, |
|
"rewards/rejected": -1.9026046991348267, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.8058608058608059, |
|
"grad_norm": 16.018713976629193, |
|
"learning_rate": 5.529727126118228e-08, |
|
"logits/chosen": -2.716298818588257, |
|
"logits/rejected": -2.724658489227295, |
|
"logps/chosen": -468.96856689453125, |
|
"logps/rejected": -467.7854919433594, |
|
"loss": 0.5403, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1532551050186157, |
|
"rewards/margins": 0.45239201188087463, |
|
"rewards/rejected": -1.605647325515747, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.8110936682365254, |
|
"grad_norm": 14.945499187412246, |
|
"learning_rate": 5.246404726526918e-08, |
|
"logits/chosen": -2.677903652191162, |
|
"logits/rejected": -2.5905513763427734, |
|
"logps/chosen": -396.8397521972656, |
|
"logps/rejected": -377.557861328125, |
|
"loss": 0.5017, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.7912027835845947, |
|
"rewards/margins": 0.7971407771110535, |
|
"rewards/rejected": -1.588343858718872, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 15.362047445414715, |
|
"learning_rate": 4.969679867292276e-08, |
|
"logits/chosen": -2.6830527782440186, |
|
"logits/rejected": -2.6041407585144043, |
|
"logps/chosen": -436.54473876953125, |
|
"logps/rejected": -448.3851623535156, |
|
"loss": 0.5104, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9174336194992065, |
|
"rewards/margins": 0.7332735061645508, |
|
"rewards/rejected": -1.6507068872451782, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.8215593929879644, |
|
"grad_norm": 17.576121150498565, |
|
"learning_rate": 4.6996449722693315e-08, |
|
"logits/chosen": -2.6931352615356445, |
|
"logits/rejected": -2.6350607872009277, |
|
"logps/chosen": -315.1622619628906, |
|
"logps/rejected": -392.06292724609375, |
|
"loss": 0.515, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8371334075927734, |
|
"rewards/margins": 0.6744937896728516, |
|
"rewards/rejected": -1.5116270780563354, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.826792255363684, |
|
"grad_norm": 19.810352695452664, |
|
"learning_rate": 4.436390230919465e-08, |
|
"logits/chosen": -2.766540050506592, |
|
"logits/rejected": -2.647644519805908, |
|
"logps/chosen": -376.6566467285156, |
|
"logps/rejected": -402.571533203125, |
|
"loss": 0.5633, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9764121770858765, |
|
"rewards/margins": 0.7432926297187805, |
|
"rewards/rejected": -1.7197048664093018, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.8320251177394035, |
|
"grad_norm": 23.05429981365939, |
|
"learning_rate": 4.180003568187776e-08, |
|
"logits/chosen": -2.5337836742401123, |
|
"logits/rejected": -2.474341869354248, |
|
"logps/chosen": -308.1025390625, |
|
"logps/rejected": -385.6150817871094, |
|
"loss": 0.5154, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.7418735027313232, |
|
"rewards/margins": 1.1077954769134521, |
|
"rewards/rejected": -1.849669098854065, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.837257980115123, |
|
"grad_norm": 16.3861829287344, |
|
"learning_rate": 3.930570615136919e-08, |
|
"logits/chosen": -2.568213939666748, |
|
"logits/rejected": -2.600006103515625, |
|
"logps/chosen": -364.5643615722656, |
|
"logps/rejected": -459.32794189453125, |
|
"loss": 0.5075, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1484493017196655, |
|
"rewards/margins": 0.7362042665481567, |
|
"rewards/rejected": -1.8846536874771118, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8424908424908425, |
|
"grad_norm": 19.191362805490126, |
|
"learning_rate": 3.6881746803469756e-08, |
|
"logits/chosen": -2.7753946781158447, |
|
"logits/rejected": -2.6937546730041504, |
|
"logps/chosen": -451.4925842285156, |
|
"logps/rejected": -490.1671447753906, |
|
"loss": 0.552, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8986201286315918, |
|
"rewards/margins": 0.839145839214325, |
|
"rewards/rejected": -1.737765908241272, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.847723704866562, |
|
"grad_norm": 13.924933587722114, |
|
"learning_rate": 3.452896722091128e-08, |
|
"logits/chosen": -2.6632466316223145, |
|
"logits/rejected": -2.57369327545166, |
|
"logps/chosen": -392.18756103515625, |
|
"logps/rejected": -403.74053955078125, |
|
"loss": 0.5298, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1617246866226196, |
|
"rewards/margins": 0.6037346124649048, |
|
"rewards/rejected": -1.7654592990875244, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.8529565672422815, |
|
"grad_norm": 20.923127069029967, |
|
"learning_rate": 3.2248153212961677e-08, |
|
"logits/chosen": -2.776475429534912, |
|
"logits/rejected": -2.7884275913238525, |
|
"logps/chosen": -319.9564514160156, |
|
"logps/rejected": -403.90985107421875, |
|
"loss": 0.5115, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.85566246509552, |
|
"rewards/margins": 0.800037682056427, |
|
"rewards/rejected": -1.6557003259658813, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.858189429618001, |
|
"grad_norm": 17.71228621644989, |
|
"learning_rate": 3.004006655297209e-08, |
|
"logits/chosen": -2.704780340194702, |
|
"logits/rejected": -2.690717935562134, |
|
"logps/chosen": -387.93804931640625, |
|
"logps/rejected": -438.994873046875, |
|
"loss": 0.5445, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8455182909965515, |
|
"rewards/margins": 0.7660375833511353, |
|
"rewards/rejected": -1.611555814743042, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.8634222919937206, |
|
"grad_norm": 17.25547789651305, |
|
"learning_rate": 2.7905444723949762e-08, |
|
"logits/chosen": -2.635859251022339, |
|
"logits/rejected": -2.592531681060791, |
|
"logps/chosen": -379.08502197265625, |
|
"logps/rejected": -430.8294372558594, |
|
"loss": 0.5025, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0563156604766846, |
|
"rewards/margins": 0.5091310739517212, |
|
"rewards/rejected": -1.5654467344284058, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.8686551543694401, |
|
"grad_norm": 17.75386295782151, |
|
"learning_rate": 2.5845000672245572e-08, |
|
"logits/chosen": -2.652233839035034, |
|
"logits/rejected": -2.6020119190216064, |
|
"logps/chosen": -291.4010314941406, |
|
"logps/rejected": -421.83251953125, |
|
"loss": 0.5178, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8415043950080872, |
|
"rewards/margins": 1.0191437005996704, |
|
"rewards/rejected": -1.8606479167938232, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.8738880167451596, |
|
"grad_norm": 20.26581048622113, |
|
"learning_rate": 2.385942256943499e-08, |
|
"logits/chosen": -2.7823240756988525, |
|
"logits/rejected": -2.7179102897644043, |
|
"logps/chosen": -368.48687744140625, |
|
"logps/rejected": -421.83831787109375, |
|
"loss": 0.5155, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1159892082214355, |
|
"rewards/margins": 0.6273930668830872, |
|
"rewards/rejected": -1.743382215499878, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.8791208791208791, |
|
"grad_norm": 17.254193295992085, |
|
"learning_rate": 2.194937358247506e-08, |
|
"logits/chosen": -2.6762166023254395, |
|
"logits/rejected": -2.680424213409424, |
|
"logps/chosen": -341.16790771484375, |
|
"logps/rejected": -436.9229431152344, |
|
"loss": 0.4885, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9166282415390015, |
|
"rewards/margins": 0.8245986104011536, |
|
"rewards/rejected": -1.7412269115447998, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.8843537414965986, |
|
"grad_norm": 21.18803904087079, |
|
"learning_rate": 2.011549165221127e-08, |
|
"logits/chosen": -2.5892271995544434, |
|
"logits/rejected": -2.5530495643615723, |
|
"logps/chosen": -343.75274658203125, |
|
"logps/rejected": -402.83343505859375, |
|
"loss": 0.4921, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9632428288459778, |
|
"rewards/margins": 0.8480204343795776, |
|
"rewards/rejected": -1.8112634420394897, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.8895866038723181, |
|
"grad_norm": 18.038523734567626, |
|
"learning_rate": 1.8358389280311303e-08, |
|
"logits/chosen": -2.706275463104248, |
|
"logits/rejected": -2.649019718170166, |
|
"logps/chosen": -383.904296875, |
|
"logps/rejected": -418.57098388671875, |
|
"loss": 0.5314, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.1623432636260986, |
|
"rewards/margins": 0.5158635377883911, |
|
"rewards/rejected": -1.6782068014144897, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.8948194662480377, |
|
"grad_norm": 19.626755939860992, |
|
"learning_rate": 1.6678653324693787e-08, |
|
"logits/chosen": -2.744741916656494, |
|
"logits/rejected": -2.661057710647583, |
|
"logps/chosen": -415.6158142089844, |
|
"logps/rejected": -489.20001220703125, |
|
"loss": 0.5109, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9210155606269836, |
|
"rewards/margins": 0.8741633296012878, |
|
"rewards/rejected": -1.795178771018982, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9000523286237572, |
|
"grad_norm": 19.57980158791374, |
|
"learning_rate": 1.507684480352292e-08, |
|
"logits/chosen": -2.717519998550415, |
|
"logits/rejected": -2.6023640632629395, |
|
"logps/chosen": -398.86126708984375, |
|
"logps/rejected": -399.9064636230469, |
|
"loss": 0.5261, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1560735702514648, |
|
"rewards/margins": 0.4242979884147644, |
|
"rewards/rejected": -1.580371618270874, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.9052851909994767, |
|
"grad_norm": 16.818277672769657, |
|
"learning_rate": 1.3553498707832761e-08, |
|
"logits/chosen": -2.6509828567504883, |
|
"logits/rejected": -2.614760637283325, |
|
"logps/chosen": -306.6497802734375, |
|
"logps/rejected": -347.5115051269531, |
|
"loss": 0.4896, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8303035497665405, |
|
"rewards/margins": 0.8390544652938843, |
|
"rewards/rejected": -1.6693580150604248, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.9105180533751962, |
|
"grad_norm": 17.711495458783695, |
|
"learning_rate": 1.2109123822844653e-08, |
|
"logits/chosen": -2.7017340660095215, |
|
"logits/rejected": -2.539865732192993, |
|
"logps/chosen": -346.7171936035156, |
|
"logps/rejected": -381.2705383300781, |
|
"loss": 0.5339, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.024086356163025, |
|
"rewards/margins": 0.7089843153953552, |
|
"rewards/rejected": -1.7330706119537354, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.9157509157509157, |
|
"grad_norm": 16.25718099914919, |
|
"learning_rate": 1.0744202558037014e-08, |
|
"logits/chosen": -2.7817070484161377, |
|
"logits/rejected": -2.7584192752838135, |
|
"logps/chosen": -422.7364196777344, |
|
"logps/rejected": -449.0599060058594, |
|
"loss": 0.5541, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0160216093063354, |
|
"rewards/margins": 0.6888941526412964, |
|
"rewards/rejected": -1.7049156427383423, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.9209837781266352, |
|
"grad_norm": 19.79616779504436, |
|
"learning_rate": 9.459190786024696e-09, |
|
"logits/chosen": -2.729013442993164, |
|
"logits/rejected": -2.692884683609009, |
|
"logps/chosen": -314.32562255859375, |
|
"logps/rejected": -348.45233154296875, |
|
"loss": 0.4649, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9323883056640625, |
|
"rewards/margins": 0.5154863595962524, |
|
"rewards/rejected": -1.4478747844696045, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.9262166405023547, |
|
"grad_norm": 15.466044673164172, |
|
"learning_rate": 8.254517690300944e-09, |
|
"logits/chosen": -2.5260634422302246, |
|
"logits/rejected": -2.4602036476135254, |
|
"logps/chosen": -379.27996826171875, |
|
"logps/rejected": -432.5758361816406, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.931941032409668, |
|
"rewards/margins": 0.9537526965141296, |
|
"rewards/rejected": -1.8856935501098633, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.9314495028780743, |
|
"grad_norm": 14.105513003072497, |
|
"learning_rate": 7.130585621893809e-09, |
|
"logits/chosen": -2.612941026687622, |
|
"logits/rejected": -2.601433277130127, |
|
"logps/chosen": -337.0567932128906, |
|
"logps/rejected": -356.3701477050781, |
|
"loss": 0.5522, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0178024768829346, |
|
"rewards/margins": 0.5061073303222656, |
|
"rewards/rejected": -1.5239098072052002, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.9366823652537938, |
|
"grad_norm": 17.67412326556611, |
|
"learning_rate": 6.0877699649840574e-09, |
|
"logits/chosen": -2.676441192626953, |
|
"logits/rejected": -2.6985154151916504, |
|
"logps/chosen": -410.93975830078125, |
|
"logps/rejected": -474.650390625, |
|
"loss": 0.5175, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8644577860832214, |
|
"rewards/margins": 0.6866751313209534, |
|
"rewards/rejected": -1.5511329174041748, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.9419152276295133, |
|
"grad_norm": 21.08771075280092, |
|
"learning_rate": 5.126419011529992e-09, |
|
"logits/chosen": -2.514744997024536, |
|
"logits/rejected": -2.462218999862671, |
|
"logps/chosen": -340.0183410644531, |
|
"logps/rejected": -472.80145263671875, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.1128913164138794, |
|
"rewards/margins": 1.0394203662872314, |
|
"rewards/rejected": -2.1523118019104004, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9471480900052328, |
|
"grad_norm": 20.973025190997458, |
|
"learning_rate": 4.246853844940723e-09, |
|
"logits/chosen": -2.711920738220215, |
|
"logits/rejected": -2.6448938846588135, |
|
"logps/chosen": -358.5855407714844, |
|
"logps/rejected": -377.30828857421875, |
|
"loss": 0.5545, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0431839227676392, |
|
"rewards/margins": 0.5513032078742981, |
|
"rewards/rejected": -1.5944870710372925, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 19.679618715790966, |
|
"learning_rate": 3.449368232836869e-09, |
|
"logits/chosen": -2.5656230449676514, |
|
"logits/rejected": -2.5163912773132324, |
|
"logps/chosen": -295.70684814453125, |
|
"logps/rejected": -333.41778564453125, |
|
"loss": 0.5332, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8301293253898621, |
|
"rewards/margins": 0.7227026224136353, |
|
"rewards/rejected": -1.5528318881988525, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.957613814756672, |
|
"grad_norm": 23.243692564813756, |
|
"learning_rate": 2.734228528934679e-09, |
|
"logits/chosen": -2.750614881515503, |
|
"logits/rejected": -2.7145471572875977, |
|
"logps/chosen": -371.66656494140625, |
|
"logps/rejected": -392.2098693847656, |
|
"loss": 0.5439, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9127969741821289, |
|
"rewards/margins": 0.6625674366950989, |
|
"rewards/rejected": -1.575364351272583, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.9628466771323915, |
|
"grad_norm": 19.814979577148716, |
|
"learning_rate": 2.1016735840859447e-09, |
|
"logits/chosen": -2.708528995513916, |
|
"logits/rejected": -2.5766711235046387, |
|
"logps/chosen": -431.52508544921875, |
|
"logps/rejected": -454.4827575683594, |
|
"loss": 0.5108, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0717358589172363, |
|
"rewards/margins": 0.8303524255752563, |
|
"rewards/rejected": -1.9020881652832031, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.968079539508111, |
|
"grad_norm": 17.66585714924501, |
|
"learning_rate": 1.551914666503812e-09, |
|
"logits/chosen": -2.656337261199951, |
|
"logits/rejected": -2.6318295001983643, |
|
"logps/chosen": -453.3070373535156, |
|
"logps/rejected": -438.5719299316406, |
|
"loss": 0.5218, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8247343897819519, |
|
"rewards/margins": 0.6178687810897827, |
|
"rewards/rejected": -1.4426031112670898, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.9733124018838305, |
|
"grad_norm": 19.12046100454679, |
|
"learning_rate": 1.0851353912008642e-09, |
|
"logits/chosen": -2.67498779296875, |
|
"logits/rejected": -2.5374255180358887, |
|
"logps/chosen": -420.9612731933594, |
|
"logps/rejected": -403.1297302246094, |
|
"loss": 0.5131, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0306751728057861, |
|
"rewards/margins": 0.6978103518486023, |
|
"rewards/rejected": -1.7284857034683228, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.97854526425955, |
|
"grad_norm": 23.698665321678508, |
|
"learning_rate": 7.014916586632336e-10, |
|
"logits/chosen": -2.6748759746551514, |
|
"logits/rejected": -2.589711904525757, |
|
"logps/chosen": -330.1553649902344, |
|
"logps/rejected": -370.8642578125, |
|
"loss": 0.5288, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8707270622253418, |
|
"rewards/margins": 0.5380834341049194, |
|
"rewards/rejected": -1.4088103771209717, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.9837781266352695, |
|
"grad_norm": 14.404582481778125, |
|
"learning_rate": 4.011116027811956e-10, |
|
"logits/chosen": -2.7190022468566895, |
|
"logits/rejected": -2.7610325813293457, |
|
"logps/chosen": -337.45513916015625, |
|
"logps/rejected": -476.40069580078125, |
|
"loss": 0.505, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.878827691078186, |
|
"rewards/margins": 0.7724133133888245, |
|
"rewards/rejected": -1.6512409448623657, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.989010989010989, |
|
"grad_norm": 20.80788365647563, |
|
"learning_rate": 1.840955480532924e-10, |
|
"logits/chosen": -2.709712505340576, |
|
"logits/rejected": -2.666142225265503, |
|
"logps/chosen": -482.58636474609375, |
|
"logps/rejected": -484.43060302734375, |
|
"loss": 0.513, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9788215756416321, |
|
"rewards/margins": 0.6121380925178528, |
|
"rewards/rejected": -1.5909597873687744, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.9942438513867086, |
|
"grad_norm": 19.04109274522655, |
|
"learning_rate": 5.051597607894087e-11, |
|
"logits/chosen": -2.6989779472351074, |
|
"logits/rejected": -2.5977814197540283, |
|
"logps/chosen": -321.80120849609375, |
|
"logps/rejected": -401.3192138671875, |
|
"loss": 0.5193, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0180636644363403, |
|
"rewards/margins": 0.8546449542045593, |
|
"rewards/rejected": -1.8727085590362549, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.9994767137624281, |
|
"grad_norm": 21.903615655450324, |
|
"learning_rate": 4.1750135001961117e-13, |
|
"logits/chosen": -2.6709775924682617, |
|
"logits/rejected": -2.6583666801452637, |
|
"logps/chosen": -406.5054626464844, |
|
"logps/rejected": -506.41973876953125, |
|
"loss": 0.5064, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8070088624954224, |
|
"rewards/margins": 1.0546354055404663, |
|
"rewards/rejected": -1.8616443872451782, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1911, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5618298566509827, |
|
"train_runtime": 11326.5504, |
|
"train_samples_per_second": 5.397, |
|
"train_steps_per_second": 0.169 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1911, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|