{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 2000, "global_step": 1911, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005232862375719519, "grad_norm": 8.940283023609332, "learning_rate": 2.6041666666666664e-09, "logits/chosen": -3.4411821365356445, "logits/rejected": -3.41083025932312, "logps/chosen": -501.4610595703125, "logps/rejected": -596.95849609375, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0052328623757195184, "grad_norm": 8.689551063328278, "learning_rate": 2.6041666666666667e-08, "logits/chosen": -3.0671932697296143, "logits/rejected": -3.0745370388031006, "logps/chosen": -335.75750732421875, "logps/rejected": -280.19635009765625, "loss": 0.6931, "rewards/accuracies": 0.4166666567325592, "rewards/chosen": -0.000958989083301276, "rewards/margins": -0.0004560473607853055, "rewards/rejected": -0.0005029416061006486, "step": 10 }, { "epoch": 0.010465724751439037, "grad_norm": 8.212994986770337, "learning_rate": 5.208333333333333e-08, "logits/chosen": -3.061262845993042, "logits/rejected": -3.061492443084717, "logps/chosen": -226.217529296875, "logps/rejected": -215.25961303710938, "loss": 0.6931, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.00020114154904149473, "rewards/margins": -0.0007163770496845245, "rewards/rejected": 0.0005152354133315384, "step": 20 }, { "epoch": 0.015698587127158554, "grad_norm": 8.003664804799678, "learning_rate": 7.812499999999999e-08, "logits/chosen": -2.958186388015747, "logits/rejected": -2.946017026901245, "logps/chosen": -300.97979736328125, "logps/rejected": -276.68634033203125, "loss": 0.6932, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0008273817365989089, "rewards/margins": 0.001302235876210034, "rewards/rejected": -0.00047485390678048134, "step": 30 }, { "epoch": 0.020931449502878074, "grad_norm": 8.325071471418028, "learning_rate": 1.0416666666666667e-07, "logits/chosen": -3.1442675590515137, "logits/rejected": -3.0619583129882812, "logps/chosen": -316.81396484375, "logps/rejected": -308.1053771972656, "loss": 0.6929, "rewards/accuracies": 0.375, "rewards/chosen": -0.0002774396270979196, "rewards/margins": -0.0006835688254795969, "rewards/rejected": 0.000406129052862525, "step": 40 }, { "epoch": 0.026164311878597593, "grad_norm": 8.866152231216553, "learning_rate": 1.3020833333333334e-07, "logits/chosen": -3.1651201248168945, "logits/rejected": -3.0696120262145996, "logps/chosen": -297.9878845214844, "logps/rejected": -271.53094482421875, "loss": 0.6931, "rewards/accuracies": 0.5, "rewards/chosen": -0.00013881332415621728, "rewards/margins": -0.0007885316153988242, "rewards/rejected": 0.0006497182184830308, "step": 50 }, { "epoch": 0.03139717425431711, "grad_norm": 8.702138127987917, "learning_rate": 1.5624999999999999e-07, "logits/chosen": -3.0590338706970215, "logits/rejected": -3.033952236175537, "logps/chosen": -271.07489013671875, "logps/rejected": -266.8861389160156, "loss": 0.6927, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0012097412254661322, "rewards/margins": -0.0009210550342686474, "rewards/rejected": -0.00028868610388599336, "step": 60 }, { "epoch": 0.03663003663003663, "grad_norm": 9.17847945114856, "learning_rate": 1.8229166666666666e-07, "logits/chosen": -3.159991979598999, "logits/rejected": -3.108083963394165, "logps/chosen": -331.7613830566406, "logps/rejected": -266.53192138671875, "loss": 0.6927, "rewards/accuracies": 0.75, "rewards/chosen": 0.0022141693625599146, "rewards/margins": 0.004044364206492901, "rewards/rejected": -0.0018301953095942736, "step": 70 }, { "epoch": 0.04186289900575615, "grad_norm": 8.70085934375087, "learning_rate": 2.0833333333333333e-07, "logits/chosen": -3.105524778366089, "logits/rejected": -3.0857417583465576, "logps/chosen": -309.269287109375, "logps/rejected": -304.593505859375, "loss": 0.6923, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0005593494279310107, "rewards/margins": 0.0006081314058974385, "rewards/rejected": -0.0011674808338284492, "step": 80 }, { "epoch": 0.04709576138147567, "grad_norm": 7.70370023214065, "learning_rate": 2.3437499999999998e-07, "logits/chosen": -3.1123859882354736, "logits/rejected": -3.0873000621795654, "logps/chosen": -244.9102783203125, "logps/rejected": -213.2863311767578, "loss": 0.6921, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.003876964095979929, "rewards/margins": 0.003002858255058527, "rewards/rejected": 0.0008741060155443847, "step": 90 }, { "epoch": 0.052328623757195186, "grad_norm": 7.584430634088357, "learning_rate": 2.604166666666667e-07, "logits/chosen": -3.119906425476074, "logits/rejected": -3.0290744304656982, "logps/chosen": -222.6891326904297, "logps/rejected": -199.02757263183594, "loss": 0.6913, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0010819355957210064, "rewards/margins": 0.0032298602163791656, "rewards/rejected": -0.0021479243878275156, "step": 100 }, { "epoch": 0.0575614861329147, "grad_norm": 8.205907431125933, "learning_rate": 2.864583333333333e-07, "logits/chosen": -3.0188896656036377, "logits/rejected": -2.9393975734710693, "logps/chosen": -261.2885437011719, "logps/rejected": -197.14413452148438, "loss": 0.69, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.00237687723711133, "rewards/margins": 0.006252645049244165, "rewards/rejected": -0.003875765949487686, "step": 110 }, { "epoch": 0.06279434850863422, "grad_norm": 8.123874667175091, "learning_rate": 3.1249999999999997e-07, "logits/chosen": -3.150888681411743, "logits/rejected": -3.160604953765869, "logps/chosen": -369.46343994140625, "logps/rejected": -341.2452392578125, "loss": 0.69, "rewards/accuracies": 0.5, "rewards/chosen": 0.002890329109504819, "rewards/margins": 0.0010580271482467651, "rewards/rejected": 0.0018323017284274101, "step": 120 }, { "epoch": 0.06802721088435375, "grad_norm": 7.876000482657188, "learning_rate": 3.3854166666666667e-07, "logits/chosen": -3.0063118934631348, "logits/rejected": -3.01861834526062, "logps/chosen": -227.5029296875, "logps/rejected": -238.10140991210938, "loss": 0.6884, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.004924282897263765, "rewards/margins": 0.012934369035065174, "rewards/rejected": -0.008010086603462696, "step": 130 }, { "epoch": 0.07326007326007326, "grad_norm": 8.190955614521185, "learning_rate": 3.645833333333333e-07, "logits/chosen": -2.970432996749878, "logits/rejected": -2.9969277381896973, "logps/chosen": -298.6478576660156, "logps/rejected": -310.3978271484375, "loss": 0.6858, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.009556067176163197, "rewards/margins": 0.02303231693804264, "rewards/rejected": -0.013476249761879444, "step": 140 }, { "epoch": 0.07849293563579278, "grad_norm": 9.052012593627612, "learning_rate": 3.9062499999999997e-07, "logits/chosen": -3.103742837905884, "logits/rejected": -3.0245158672332764, "logps/chosen": -263.4891662597656, "logps/rejected": -248.58151245117188, "loss": 0.6844, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.002595582278445363, "rewards/margins": 0.0049351779744029045, "rewards/rejected": -0.007530760020017624, "step": 150 }, { "epoch": 0.0837257980115123, "grad_norm": 8.432630883076405, "learning_rate": 4.1666666666666667e-07, "logits/chosen": -3.114664077758789, "logits/rejected": -3.012159824371338, "logps/chosen": -262.4814453125, "logps/rejected": -236.3439178466797, "loss": 0.6837, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.009164141491055489, "rewards/margins": 0.02413203939795494, "rewards/rejected": -0.014967897906899452, "step": 160 }, { "epoch": 0.08895866038723181, "grad_norm": 9.124712051546432, "learning_rate": 4.427083333333333e-07, "logits/chosen": -3.08809494972229, "logits/rejected": -3.0950043201446533, "logps/chosen": -232.0874481201172, "logps/rejected": -265.0681457519531, "loss": 0.6811, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.01028988417237997, "rewards/margins": 0.015653502196073532, "rewards/rejected": -0.005363619886338711, "step": 170 }, { "epoch": 0.09419152276295134, "grad_norm": 7.433236173747129, "learning_rate": 4.6874999999999996e-07, "logits/chosen": -3.065229892730713, "logits/rejected": -2.972902774810791, "logps/chosen": -295.44805908203125, "logps/rejected": -269.96026611328125, "loss": 0.6817, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.028342243283987045, "rewards/margins": 0.017250288277864456, "rewards/rejected": -0.0455925352871418, "step": 180 }, { "epoch": 0.09942438513867086, "grad_norm": 9.490780191701429, "learning_rate": 4.947916666666667e-07, "logits/chosen": -3.0910661220550537, "logits/rejected": -3.00136137008667, "logps/chosen": -304.7739562988281, "logps/rejected": -260.3194274902344, "loss": 0.6741, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0017203543102368712, "rewards/margins": 0.03843419998884201, "rewards/rejected": -0.036713846027851105, "step": 190 }, { "epoch": 0.10465724751439037, "grad_norm": 8.139099488973592, "learning_rate": 4.999732803821339e-07, "logits/chosen": -2.9885010719299316, "logits/rejected": -2.911945104598999, "logps/chosen": -274.64801025390625, "logps/rejected": -299.32476806640625, "loss": 0.6711, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.015315435826778412, "rewards/margins": 0.0384557843208313, "rewards/rejected": -0.05377122014760971, "step": 200 }, { "epoch": 0.10989010989010989, "grad_norm": 8.597620117357772, "learning_rate": 4.998647417232375e-07, "logits/chosen": -3.049499750137329, "logits/rejected": -2.9921631813049316, "logps/chosen": -214.9530487060547, "logps/rejected": -198.6853485107422, "loss": 0.6828, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.011373547837138176, "rewards/margins": 0.011143224313855171, "rewards/rejected": -0.022516775876283646, "step": 210 }, { "epoch": 0.1151229722658294, "grad_norm": 7.814974338489718, "learning_rate": 4.996727502703357e-07, "logits/chosen": -3.0792040824890137, "logits/rejected": -3.0471749305725098, "logps/chosen": -280.1625671386719, "logps/rejected": -250.3578643798828, "loss": 0.6749, "rewards/accuracies": 0.75, "rewards/chosen": 0.013256365433335304, "rewards/margins": 0.07305942475795746, "rewards/rejected": -0.0598030686378479, "step": 220 }, { "epoch": 0.12035583464154893, "grad_norm": 8.021422865019257, "learning_rate": 4.993973701470142e-07, "logits/chosen": -3.0776336193084717, "logits/rejected": -3.065192461013794, "logps/chosen": -246.03701782226562, "logps/rejected": -330.24395751953125, "loss": 0.6617, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.004228830337524414, "rewards/margins": 0.06871353834867477, "rewards/rejected": -0.07294236868619919, "step": 230 }, { "epoch": 0.12558869701726844, "grad_norm": 8.830102325688348, "learning_rate": 4.990386933279972e-07, "logits/chosen": -3.057614326477051, "logits/rejected": -2.985898971557617, "logps/chosen": -237.7249755859375, "logps/rejected": -227.34384155273438, "loss": 0.6631, "rewards/accuracies": 0.625, "rewards/chosen": -0.05727599188685417, "rewards/margins": 0.04271426051855087, "rewards/rejected": -0.09999025613069534, "step": 240 }, { "epoch": 0.13082155939298795, "grad_norm": 9.25379873829916, "learning_rate": 4.985968396084284e-07, "logits/chosen": -2.9885993003845215, "logits/rejected": -2.986743688583374, "logps/chosen": -295.55853271484375, "logps/rejected": -264.4269104003906, "loss": 0.6579, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.047038041055202484, "rewards/margins": 0.10233273357152939, "rewards/rejected": -0.14937077462673187, "step": 250 }, { "epoch": 0.1360544217687075, "grad_norm": 10.58421450887609, "learning_rate": 4.98071956563861e-07, "logits/chosen": -3.092935085296631, "logits/rejected": -3.026994466781616, "logps/chosen": -293.06158447265625, "logps/rejected": -282.7720642089844, "loss": 0.6588, "rewards/accuracies": 0.75, "rewards/chosen": -0.01732378825545311, "rewards/margins": 0.11920974403619766, "rewards/rejected": -0.13653352856636047, "step": 260 }, { "epoch": 0.141287284144427, "grad_norm": 7.811018005032728, "learning_rate": 4.97464219500968e-07, "logits/chosen": -3.0531859397888184, "logits/rejected": -2.9968655109405518, "logps/chosen": -283.08465576171875, "logps/rejected": -289.63763427734375, "loss": 0.6429, "rewards/accuracies": 0.625, "rewards/chosen": -0.10270702838897705, "rewards/margins": 0.05593450739979744, "rewards/rejected": -0.1586415320634842, "step": 270 }, { "epoch": 0.14652014652014653, "grad_norm": 9.605514735660224, "learning_rate": 4.967738313989918e-07, "logits/chosen": -3.0212960243225098, "logits/rejected": -3.0366883277893066, "logps/chosen": -314.5849914550781, "logps/rejected": -308.0143127441406, "loss": 0.6574, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.0773221030831337, "rewards/margins": 0.13322624564170837, "rewards/rejected": -0.21054835617542267, "step": 280 }, { "epoch": 0.15175300889586604, "grad_norm": 8.389574929032044, "learning_rate": 4.960010228419499e-07, "logits/chosen": -3.1096034049987793, "logits/rejected": -3.0065712928771973, "logps/chosen": -336.0714416503906, "logps/rejected": -258.11334228515625, "loss": 0.6613, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.12762203812599182, "rewards/margins": 0.07918674498796463, "rewards/rejected": -0.20680880546569824, "step": 290 }, { "epoch": 0.15698587127158556, "grad_norm": 8.7670335055176, "learning_rate": 4.951460519416227e-07, "logits/chosen": -3.085927963256836, "logits/rejected": -3.0476162433624268, "logps/chosen": -332.5134582519531, "logps/rejected": -279.91424560546875, "loss": 0.6507, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18624143302440643, "rewards/margins": 0.0757715106010437, "rewards/rejected": -0.26201292872428894, "step": 300 }, { "epoch": 0.16221873364730507, "grad_norm": 8.663994541963719, "learning_rate": 4.942092042513458e-07, "logits/chosen": -3.14369535446167, "logits/rejected": -3.0388572216033936, "logps/chosen": -326.9494934082031, "logps/rejected": -318.8394470214844, "loss": 0.6461, "rewards/accuracies": 0.75, "rewards/chosen": -0.05960199981927872, "rewards/margins": 0.17124859988689423, "rewards/rejected": -0.23085062205791473, "step": 310 }, { "epoch": 0.1674515960230246, "grad_norm": 10.214098303939142, "learning_rate": 4.931907926706373e-07, "logits/chosen": -3.1341705322265625, "logits/rejected": -3.0006356239318848, "logps/chosen": -352.4683837890625, "logps/rejected": -254.84872436523438, "loss": 0.6359, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.207993745803833, "rewards/margins": 0.12400402128696442, "rewards/rejected": -0.3319977819919586, "step": 320 }, { "epoch": 0.1726844583987441, "grad_norm": 11.9926488132874, "learning_rate": 4.920911573406924e-07, "logits/chosen": -2.988920211791992, "logits/rejected": -2.8621726036071777, "logps/chosen": -270.1966247558594, "logps/rejected": -231.31307983398438, "loss": 0.6416, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.17856433987617493, "rewards/margins": 0.1818085014820099, "rewards/rejected": -0.3603728413581848, "step": 330 }, { "epoch": 0.17791732077446362, "grad_norm": 11.181487800996836, "learning_rate": 4.909106655307787e-07, "logits/chosen": -3.071873188018799, "logits/rejected": -3.073513984680176, "logps/chosen": -300.5464172363281, "logps/rejected": -336.34735107421875, "loss": 0.6082, "rewards/accuracies": 0.75, "rewards/chosen": -0.2116507738828659, "rewards/margins": 0.15836475789546967, "rewards/rejected": -0.37001553177833557, "step": 340 }, { "epoch": 0.18315018315018314, "grad_norm": 11.847042665159117, "learning_rate": 4.896497115155709e-07, "logits/chosen": -2.988060235977173, "logits/rejected": -3.0429558753967285, "logps/chosen": -211.3376007080078, "logps/rejected": -275.08087158203125, "loss": 0.6017, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2585861086845398, "rewards/margins": 0.3116615116596222, "rewards/rejected": -0.5702476501464844, "step": 350 }, { "epoch": 0.18838304552590268, "grad_norm": 10.78945471485412, "learning_rate": 4.883087164434672e-07, "logits/chosen": -3.053856372833252, "logits/rejected": -2.9947004318237305, "logps/chosen": -300.0679931640625, "logps/rejected": -320.3852233886719, "loss": 0.6157, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.35630059242248535, "rewards/margins": 0.1511966437101364, "rewards/rejected": -0.5074971914291382, "step": 360 }, { "epoch": 0.1936159079016222, "grad_norm": 12.222844062228763, "learning_rate": 4.868881281959282e-07, "logits/chosen": -3.011864423751831, "logits/rejected": -2.965883255004883, "logps/chosen": -337.5576171875, "logps/rejected": -347.80572509765625, "loss": 0.6028, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4423811435699463, "rewards/margins": 0.4099253714084625, "rewards/rejected": -0.8523064851760864, "step": 370 }, { "epoch": 0.1988487702773417, "grad_norm": 13.768587447328201, "learning_rate": 4.853884212378889e-07, "logits/chosen": -2.873631715774536, "logits/rejected": -2.939685344696045, "logps/chosen": -235.6674346923828, "logps/rejected": -387.4900817871094, "loss": 0.5963, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.33242642879486084, "rewards/margins": 0.29951974749565125, "rewards/rejected": -0.6319462060928345, "step": 380 }, { "epoch": 0.20408163265306123, "grad_norm": 18.448504645275186, "learning_rate": 4.838100964592904e-07, "logits/chosen": -3.0143513679504395, "logits/rejected": -2.915879726409912, "logps/chosen": -423.55926513671875, "logps/rejected": -330.427490234375, "loss": 0.6128, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.36892321705818176, "rewards/margins": 0.2754189372062683, "rewards/rejected": -0.6443422436714172, "step": 390 }, { "epoch": 0.20931449502878074, "grad_norm": 13.04561395514187, "learning_rate": 4.821536810077878e-07, "logits/chosen": -3.05737042427063, "logits/rejected": -2.9584059715270996, "logps/chosen": -340.3260192871094, "logps/rejected": -336.16961669921875, "loss": 0.5796, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5146622061729431, "rewards/margins": 0.4159639775753021, "rewards/rejected": -0.9306262731552124, "step": 400 }, { "epoch": 0.21454735740450026, "grad_norm": 12.961227614879007, "learning_rate": 4.804197281126862e-07, "logits/chosen": -2.953254222869873, "logits/rejected": -2.9386799335479736, "logps/chosen": -332.95013427734375, "logps/rejected": -379.0150146484375, "loss": 0.6141, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5353738069534302, "rewards/margins": 0.16674116253852844, "rewards/rejected": -0.7021149396896362, "step": 410 }, { "epoch": 0.21978021978021978, "grad_norm": 13.525221402164235, "learning_rate": 4.786088169001671e-07, "logits/chosen": -3.0310072898864746, "logits/rejected": -3.0133447647094727, "logps/chosen": -328.05743408203125, "logps/rejected": -359.2528076171875, "loss": 0.5931, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5046383738517761, "rewards/margins": 0.18837007880210876, "rewards/rejected": -0.6930084824562073, "step": 420 }, { "epoch": 0.2250130821559393, "grad_norm": 15.343151149244173, "learning_rate": 4.767215521998648e-07, "logits/chosen": -3.097900867462158, "logits/rejected": -2.9799551963806152, "logps/chosen": -344.5359802246094, "logps/rejected": -340.30279541015625, "loss": 0.5977, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3433271050453186, "rewards/margins": 0.6302416324615479, "rewards/rejected": -0.9735687971115112, "step": 430 }, { "epoch": 0.2302459445316588, "grad_norm": 15.463582386840484, "learning_rate": 4.7475856434285853e-07, "logits/chosen": -3.0013861656188965, "logits/rejected": -2.9684953689575195, "logps/chosen": -315.1118469238281, "logps/rejected": -319.0517578125, "loss": 0.5884, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.44304031133651733, "rewards/margins": 0.31362444162368774, "rewards/rejected": -0.7566647529602051, "step": 440 }, { "epoch": 0.23547880690737832, "grad_norm": 18.270478359234886, "learning_rate": 4.727205089511466e-07, "logits/chosen": -2.8321094512939453, "logits/rejected": -2.848545551300049, "logps/chosen": -289.8956604003906, "logps/rejected": -334.09063720703125, "loss": 0.5472, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5760111808776855, "rewards/margins": 0.3905490040779114, "rewards/rejected": -0.9665601849555969, "step": 450 }, { "epoch": 0.24071166928309787, "grad_norm": 18.394272193166348, "learning_rate": 4.706080667186738e-07, "logits/chosen": -2.9533510208129883, "logits/rejected": -2.85048246383667, "logps/chosen": -339.3458557128906, "logps/rejected": -362.229736328125, "loss": 0.5852, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.527037501335144, "rewards/margins": 0.513110876083374, "rewards/rejected": -1.040148377418518, "step": 460 }, { "epoch": 0.24594453165881738, "grad_norm": 14.635877926172551, "learning_rate": 4.68421943183986e-07, "logits/chosen": -2.9311420917510986, "logits/rejected": -2.9059174060821533, "logps/chosen": -353.56158447265625, "logps/rejected": -397.67315673828125, "loss": 0.5503, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6963423490524292, "rewards/margins": 0.4823771119117737, "rewards/rejected": -1.178719401359558, "step": 470 }, { "epoch": 0.25117739403453687, "grad_norm": 15.746195707332934, "learning_rate": 4.661628684945851e-07, "logits/chosen": -2.9695117473602295, "logits/rejected": -2.9841675758361816, "logps/chosen": -291.04376220703125, "logps/rejected": -328.80816650390625, "loss": 0.5728, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5066032409667969, "rewards/margins": 0.49630409479141235, "rewards/rejected": -1.002907156944275, "step": 480 }, { "epoch": 0.2564102564102564, "grad_norm": 13.427286500899548, "learning_rate": 4.638315971630662e-07, "logits/chosen": -2.9672865867614746, "logits/rejected": -2.9439778327941895, "logps/chosen": -344.151611328125, "logps/rejected": -350.19305419921875, "loss": 0.5485, "rewards/accuracies": 0.625, "rewards/chosen": -0.540307343006134, "rewards/margins": 0.437363862991333, "rewards/rejected": -0.9776712656021118, "step": 490 }, { "epoch": 0.2616431187859759, "grad_norm": 17.8293277249549, "learning_rate": 4.6142890781511635e-07, "logits/chosen": -2.928375720977783, "logits/rejected": -2.9310758113861084, "logps/chosen": -301.03839111328125, "logps/rejected": -364.05670166015625, "loss": 0.5414, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.43716445565223694, "rewards/margins": 0.5820390582084656, "rewards/rejected": -1.0192034244537354, "step": 500 }, { "epoch": 0.2668759811616955, "grad_norm": 13.134882476577088, "learning_rate": 4.5895560292945996e-07, "logits/chosen": -3.02789306640625, "logits/rejected": -2.938778877258301, "logps/chosen": -346.013427734375, "logps/rejected": -349.2925109863281, "loss": 0.5768, "rewards/accuracies": 0.75, "rewards/chosen": -0.6063824892044067, "rewards/margins": 0.4094172418117523, "rewards/rejected": -1.015799641609192, "step": 510 }, { "epoch": 0.272108843537415, "grad_norm": 14.111790777809139, "learning_rate": 4.5641250856983743e-07, "logits/chosen": -2.993448495864868, "logits/rejected": -2.9830093383789062, "logps/chosen": -336.80517578125, "logps/rejected": -378.89544677734375, "loss": 0.5603, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.7271286845207214, "rewards/margins": 0.24076862633228302, "rewards/rejected": -0.9678972959518433, "step": 520 }, { "epoch": 0.2773417059131345, "grad_norm": 23.979569462254332, "learning_rate": 4.5380047410910655e-07, "logits/chosen": -2.937358856201172, "logits/rejected": -2.9407734870910645, "logps/chosen": -383.3623962402344, "logps/rejected": -362.71099853515625, "loss": 0.547, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.45562949776649475, "rewards/margins": 0.7254458665847778, "rewards/rejected": -1.1810753345489502, "step": 530 }, { "epoch": 0.282574568288854, "grad_norm": 19.701709513451974, "learning_rate": 4.5112037194555876e-07, "logits/chosen": -2.910482406616211, "logits/rejected": -2.8749754428863525, "logps/chosen": -393.69268798828125, "logps/rejected": -415.40338134765625, "loss": 0.5233, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.9999048113822937, "rewards/margins": 0.4355775713920593, "rewards/rejected": -1.435482382774353, "step": 540 }, { "epoch": 0.28780743066457354, "grad_norm": 16.547979220851758, "learning_rate": 4.4837309721154536e-07, "logits/chosen": -3.030179977416992, "logits/rejected": -2.925313949584961, "logps/chosen": -398.4112243652344, "logps/rejected": -411.71881103515625, "loss": 0.5809, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.6699625253677368, "rewards/margins": 0.7033188939094543, "rewards/rejected": -1.3732813596725464, "step": 550 }, { "epoch": 0.29304029304029305, "grad_norm": 17.970228607671963, "learning_rate": 4.4555956747451065e-07, "logits/chosen": -3.0206010341644287, "logits/rejected": -2.9801762104034424, "logps/chosen": -325.3530578613281, "logps/rejected": -367.80718994140625, "loss": 0.5519, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3854089379310608, "rewards/margins": 0.6328403353691101, "rewards/rejected": -1.018249273300171, "step": 560 }, { "epoch": 0.29827315541601257, "grad_norm": 14.821894805782252, "learning_rate": 4.426807224305315e-07, "logits/chosen": -3.058014392852783, "logits/rejected": -2.9584782123565674, "logps/chosen": -334.9781494140625, "logps/rejected": -347.7608337402344, "loss": 0.5869, "rewards/accuracies": 0.75, "rewards/chosen": -0.5377572774887085, "rewards/margins": 0.4088308811187744, "rewards/rejected": -0.9465881586074829, "step": 570 }, { "epoch": 0.3035060177917321, "grad_norm": 15.304057814913962, "learning_rate": 4.397375235904669e-07, "logits/chosen": -3.0159687995910645, "logits/rejected": -2.940138339996338, "logps/chosen": -378.4619445800781, "logps/rejected": -325.83795166015625, "loss": 0.5461, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.6433390378952026, "rewards/margins": 0.4729389250278473, "rewards/rejected": -1.1162779331207275, "step": 580 }, { "epoch": 0.3087388801674516, "grad_norm": 15.41023592760314, "learning_rate": 4.3673095395882074e-07, "logits/chosen": -2.826242446899414, "logits/rejected": -2.8306374549865723, "logps/chosen": -297.02642822265625, "logps/rejected": -344.32562255859375, "loss": 0.5647, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8071243166923523, "rewards/margins": 0.44610705971717834, "rewards/rejected": -1.2532315254211426, "step": 590 }, { "epoch": 0.3139717425431711, "grad_norm": 19.465252228371234, "learning_rate": 4.3366201770542687e-07, "logits/chosen": -2.9092609882354736, "logits/rejected": -2.910013198852539, "logps/chosen": -344.72369384765625, "logps/rejected": -383.1976623535156, "loss": 0.5827, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6237624883651733, "rewards/margins": 0.513608455657959, "rewards/rejected": -1.1373710632324219, "step": 600 }, { "epoch": 0.31920460491889063, "grad_norm": 20.05320596442739, "learning_rate": 4.3053173983006395e-07, "logits/chosen": -2.9518351554870605, "logits/rejected": -2.872385263442993, "logps/chosen": -259.81561279296875, "logps/rejected": -335.95587158203125, "loss": 0.5515, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5689873695373535, "rewards/margins": 0.6198877096176147, "rewards/rejected": -1.1888750791549683, "step": 610 }, { "epoch": 0.32443746729461015, "grad_norm": 15.709917499515434, "learning_rate": 4.2734116582011403e-07, "logits/chosen": -2.9943740367889404, "logits/rejected": -2.838672161102295, "logps/chosen": -406.30401611328125, "logps/rejected": -319.8934020996094, "loss": 0.5606, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4886740744113922, "rewards/margins": 0.6085253953933716, "rewards/rejected": -1.097199559211731, "step": 620 }, { "epoch": 0.32967032967032966, "grad_norm": 12.984865579984941, "learning_rate": 4.2409136130137845e-07, "logits/chosen": -2.9008967876434326, "logits/rejected": -2.91571044921875, "logps/chosen": -289.12115478515625, "logps/rejected": -377.608154296875, "loss": 0.6095, "rewards/accuracies": 0.625, "rewards/chosen": -0.6557156443595886, "rewards/margins": 0.4830680787563324, "rewards/rejected": -1.1387838125228882, "step": 630 }, { "epoch": 0.3349031920460492, "grad_norm": 19.717030591939775, "learning_rate": 4.207834116821672e-07, "logits/chosen": -2.958247661590576, "logits/rejected": -2.885899066925049, "logps/chosen": -340.4076232910156, "logps/rejected": -417.45916748046875, "loss": 0.5439, "rewards/accuracies": 0.75, "rewards/chosen": -0.5420235991477966, "rewards/margins": 0.7290440797805786, "rewards/rejected": -1.2710676193237305, "step": 640 }, { "epoch": 0.3401360544217687, "grad_norm": 16.729020989503553, "learning_rate": 4.174184217907818e-07, "logits/chosen": -2.9285740852355957, "logits/rejected": -2.892252206802368, "logps/chosen": -332.4837951660156, "logps/rejected": -364.90606689453125, "loss": 0.5831, "rewards/accuracies": 0.75, "rewards/chosen": -0.6785880923271179, "rewards/margins": 0.5523657202720642, "rewards/rejected": -1.2309538125991821, "step": 650 }, { "epoch": 0.3453689167974882, "grad_norm": 20.83462510110715, "learning_rate": 4.1399751550651084e-07, "logits/chosen": -2.904776096343994, "logits/rejected": -2.8706138134002686, "logps/chosen": -321.9764099121094, "logps/rejected": -347.6967468261719, "loss": 0.561, "rewards/accuracies": 0.625, "rewards/chosen": -0.8602074384689331, "rewards/margins": 0.3864290714263916, "rewards/rejected": -1.2466365098953247, "step": 660 }, { "epoch": 0.35060177917320773, "grad_norm": 16.939482005435558, "learning_rate": 4.1052183538426426e-07, "logits/chosen": -2.8574535846710205, "logits/rejected": -2.8395981788635254, "logps/chosen": -335.42327880859375, "logps/rejected": -357.9546813964844, "loss": 0.5691, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.7099284529685974, "rewards/margins": 0.5589883923530579, "rewards/rejected": -1.2689168453216553, "step": 670 }, { "epoch": 0.35583464154892724, "grad_norm": 14.529627338961385, "learning_rate": 4.0699254227296884e-07, "logits/chosen": -2.7800791263580322, "logits/rejected": -2.7351772785186768, "logps/chosen": -342.0890808105469, "logps/rejected": -360.97552490234375, "loss": 0.5595, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.009031057357788, "rewards/margins": 0.451460063457489, "rewards/rejected": -1.4604910612106323, "step": 680 }, { "epoch": 0.36106750392464676, "grad_norm": 15.776547924608785, "learning_rate": 4.034108149278543e-07, "logits/chosen": -2.926506996154785, "logits/rejected": -2.839150905609131, "logps/chosen": -330.08648681640625, "logps/rejected": -370.6238708496094, "loss": 0.5372, "rewards/accuracies": 0.75, "rewards/chosen": -0.8743458986282349, "rewards/margins": 0.5746399164199829, "rewards/rejected": -1.4489858150482178, "step": 690 }, { "epoch": 0.3663003663003663, "grad_norm": 15.377113085837133, "learning_rate": 3.9977784961675833e-07, "logits/chosen": -2.8555071353912354, "logits/rejected": -2.8349578380584717, "logps/chosen": -342.14251708984375, "logps/rejected": -366.9553527832031, "loss": 0.5548, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8782415390014648, "rewards/margins": 0.4512537121772766, "rewards/rejected": -1.3294951915740967, "step": 700 }, { "epoch": 0.3715332286760858, "grad_norm": 14.215944756798452, "learning_rate": 3.96094859720583e-07, "logits/chosen": -2.897223472595215, "logits/rejected": -2.7988810539245605, "logps/chosen": -398.7936706542969, "logps/rejected": -397.49578857421875, "loss": 0.5165, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6832660436630249, "rewards/margins": 0.5724307298660278, "rewards/rejected": -1.2556967735290527, "step": 710 }, { "epoch": 0.37676609105180536, "grad_norm": 15.259982422665061, "learning_rate": 3.923630753280357e-07, "logits/chosen": -2.7229888439178467, "logits/rejected": -2.7818284034729004, "logps/chosen": -273.22772216796875, "logps/rejected": -326.69171142578125, "loss": 0.5265, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5913980603218079, "rewards/margins": 0.4389529228210449, "rewards/rejected": -1.030350923538208, "step": 720 }, { "epoch": 0.3819989534275249, "grad_norm": 16.540503842805457, "learning_rate": 3.8858374282478893e-07, "logits/chosen": -2.847386360168457, "logits/rejected": -2.822706460952759, "logps/chosen": -340.0333251953125, "logps/rejected": -446.2408752441406, "loss": 0.5689, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7927729487419128, "rewards/margins": 0.775786280632019, "rewards/rejected": -1.5685592889785767, "step": 730 }, { "epoch": 0.3872318158032444, "grad_norm": 14.485113206171722, "learning_rate": 3.8475812447719823e-07, "logits/chosen": -2.7510900497436523, "logits/rejected": -2.770341396331787, "logps/chosen": -311.25360107421875, "logps/rejected": -339.53631591796875, "loss": 0.5326, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7380820512771606, "rewards/margins": 0.47503456473350525, "rewards/rejected": -1.2131164073944092, "step": 740 }, { "epoch": 0.3924646781789639, "grad_norm": 20.826039047439295, "learning_rate": 3.8088749801071496e-07, "logits/chosen": -2.784240245819092, "logits/rejected": -2.7459471225738525, "logps/chosen": -353.0906677246094, "logps/rejected": -405.60406494140625, "loss": 0.5086, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.7327491044998169, "rewards/margins": 0.3788543939590454, "rewards/rejected": -1.1116034984588623, "step": 750 }, { "epoch": 0.3976975405546834, "grad_norm": 17.658078906056954, "learning_rate": 3.7697315618313644e-07, "logits/chosen": -2.7973134517669678, "logits/rejected": -2.7560336589813232, "logps/chosen": -279.4862365722656, "logps/rejected": -299.8984375, "loss": 0.5791, "rewards/accuracies": 0.75, "rewards/chosen": -0.5168331861495972, "rewards/margins": 0.5671908855438232, "rewards/rejected": -1.08402419090271, "step": 760 }, { "epoch": 0.40293040293040294, "grad_norm": 13.415561426957483, "learning_rate": 3.7301640635283584e-07, "logits/chosen": -2.7720260620117188, "logits/rejected": -2.748436450958252, "logps/chosen": -328.19390869140625, "logps/rejected": -405.95709228515625, "loss": 0.5757, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.8876395225524902, "rewards/margins": 0.42753082513809204, "rewards/rejected": -1.315170168876648, "step": 770 }, { "epoch": 0.40816326530612246, "grad_norm": 14.769300667177983, "learning_rate": 3.6901857004211443e-07, "logits/chosen": -2.749990463256836, "logits/rejected": -2.7357590198516846, "logps/chosen": -313.04180908203125, "logps/rejected": -345.66485595703125, "loss": 0.5333, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7682701349258423, "rewards/margins": 0.6885534524917603, "rewards/rejected": -1.4568235874176025, "step": 780 }, { "epoch": 0.413396127681842, "grad_norm": 14.678645322251919, "learning_rate": 3.6498098249582444e-07, "logits/chosen": -2.7600743770599365, "logits/rejected": -2.767582416534424, "logps/chosen": -304.5693664550781, "logps/rejected": -396.30731201171875, "loss": 0.5435, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.837949275970459, "rewards/margins": 0.3060819208621979, "rewards/rejected": -1.144031047821045, "step": 790 }, { "epoch": 0.4186289900575615, "grad_norm": 19.043570676274012, "learning_rate": 3.6090499223540757e-07, "logits/chosen": -2.816871166229248, "logits/rejected": -2.819472551345825, "logps/chosen": -386.77410888671875, "logps/rejected": -432.9602966308594, "loss": 0.594, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.9629328846931458, "rewards/margins": 0.41979989409446716, "rewards/rejected": -1.3827327489852905, "step": 800 }, { "epoch": 0.423861852433281, "grad_norm": 16.07598371652075, "learning_rate": 3.5679196060850034e-07, "logits/chosen": -2.774369478225708, "logits/rejected": -2.704817771911621, "logps/chosen": -381.45367431640625, "logps/rejected": -394.75054931640625, "loss": 0.5502, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.8025716543197632, "rewards/margins": 0.5534734725952148, "rewards/rejected": -1.356045126914978, "step": 810 }, { "epoch": 0.4290947148090005, "grad_norm": 17.575598817541977, "learning_rate": 3.5264326133425464e-07, "logits/chosen": -2.808215856552124, "logits/rejected": -2.7602808475494385, "logps/chosen": -377.1565856933594, "logps/rejected": -380.73577880859375, "loss": 0.5746, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.9608553051948547, "rewards/margins": 0.5722512006759644, "rewards/rejected": -1.5331064462661743, "step": 820 }, { "epoch": 0.43432757718472004, "grad_norm": 16.32115598655498, "learning_rate": 3.4846028004452693e-07, "logits/chosen": -2.8719420433044434, "logits/rejected": -2.8173866271972656, "logps/chosen": -307.88714599609375, "logps/rejected": -346.9542236328125, "loss": 0.6023, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7333047389984131, "rewards/margins": 0.4502180516719818, "rewards/rejected": -1.1835228204727173, "step": 830 }, { "epoch": 0.43956043956043955, "grad_norm": 16.91945418937996, "learning_rate": 3.4424441382108826e-07, "logits/chosen": -2.9018168449401855, "logits/rejected": -2.726928472518921, "logps/chosen": -491.023193359375, "logps/rejected": -441.81494140625, "loss": 0.5438, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.9286457896232605, "rewards/margins": 0.6778661012649536, "rewards/rejected": -1.6065118312835693, "step": 840 }, { "epoch": 0.44479330193615907, "grad_norm": 21.188197365998956, "learning_rate": 3.399970707290105e-07, "logits/chosen": -2.784093141555786, "logits/rejected": -2.6797289848327637, "logps/chosen": -347.7838439941406, "logps/rejected": -357.4574279785156, "loss": 0.5766, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.9829579591751099, "rewards/margins": 0.4461473822593689, "rewards/rejected": -1.4291054010391235, "step": 850 }, { "epoch": 0.4500261643118786, "grad_norm": 16.855672588469375, "learning_rate": 3.3571966934638376e-07, "logits/chosen": -2.818727731704712, "logits/rejected": -2.8157877922058105, "logps/chosen": -258.5130615234375, "logps/rejected": -407.8525085449219, "loss": 0.5452, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7257243394851685, "rewards/margins": 0.8329108953475952, "rewards/rejected": -1.5586349964141846, "step": 860 }, { "epoch": 0.4552590266875981, "grad_norm": 13.734802575828237, "learning_rate": 3.314136382905234e-07, "logits/chosen": -2.577549457550049, "logits/rejected": -2.634438991546631, "logps/chosen": -296.9052429199219, "logps/rejected": -361.5945739746094, "loss": 0.5641, "rewards/accuracies": 0.75, "rewards/chosen": -0.766598641872406, "rewards/margins": 0.615802526473999, "rewards/rejected": -1.3824012279510498, "step": 870 }, { "epoch": 0.4604918890633176, "grad_norm": 16.099032009321178, "learning_rate": 3.270804157408225e-07, "logits/chosen": -2.836336612701416, "logits/rejected": -2.819361925125122, "logps/chosen": -354.25347900390625, "logps/rejected": -366.732421875, "loss": 0.539, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.8732249140739441, "rewards/margins": 0.35818585753440857, "rewards/rejected": -1.2314107418060303, "step": 880 }, { "epoch": 0.46572475143903713, "grad_norm": 17.106740036561465, "learning_rate": 3.227214489584128e-07, "logits/chosen": -2.8799033164978027, "logits/rejected": -2.8418822288513184, "logps/chosen": -386.0233459472656, "logps/rejected": -369.4678039550781, "loss": 0.5312, "rewards/accuracies": 0.75, "rewards/chosen": -0.7949320077896118, "rewards/margins": 0.5880209803581238, "rewards/rejected": -1.3829529285430908, "step": 890 }, { "epoch": 0.47095761381475665, "grad_norm": 21.890320275591346, "learning_rate": 3.1833819380279023e-07, "logits/chosen": -2.721087694168091, "logits/rejected": -2.697716474533081, "logps/chosen": -314.33087158203125, "logps/rejected": -464.30413818359375, "loss": 0.5303, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7493630051612854, "rewards/margins": 1.0829684734344482, "rewards/rejected": -1.8323314189910889, "step": 900 }, { "epoch": 0.47619047619047616, "grad_norm": 18.614163560193642, "learning_rate": 3.139321142455703e-07, "logits/chosen": -2.7572057247161865, "logits/rejected": -2.706200122833252, "logps/chosen": -257.91607666015625, "logps/rejected": -347.5008239746094, "loss": 0.577, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.8465842008590698, "rewards/margins": 0.8367371559143066, "rewards/rejected": -1.683321237564087, "step": 910 }, { "epoch": 0.48142333856619574, "grad_norm": 15.269349106291124, "learning_rate": 3.095046818815331e-07, "logits/chosen": -2.8949315547943115, "logits/rejected": -2.79899525642395, "logps/chosen": -398.53765869140625, "logps/rejected": -398.479736328125, "loss": 0.5602, "rewards/accuracies": 0.75, "rewards/chosen": -0.9853115081787109, "rewards/margins": 0.5784602761268616, "rewards/rejected": -1.5637718439102173, "step": 920 }, { "epoch": 0.48665620094191525, "grad_norm": 15.355520412605172, "learning_rate": 3.0505737543712275e-07, "logits/chosen": -2.806957960128784, "logits/rejected": -2.785641670227051, "logps/chosen": -353.44854736328125, "logps/rejected": -384.927490234375, "loss": 0.5432, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.9327161908149719, "rewards/margins": 0.3711306154727936, "rewards/rejected": -1.303847074508667, "step": 930 }, { "epoch": 0.49188906331763477, "grad_norm": 15.67157273698381, "learning_rate": 3.0059168027656475e-07, "logits/chosen": -2.888259172439575, "logits/rejected": -2.8196072578430176, "logps/chosen": -374.23443603515625, "logps/rejected": -388.24578857421875, "loss": 0.4706, "rewards/accuracies": 0.75, "rewards/chosen": -0.8297722935676575, "rewards/margins": 0.6955471038818359, "rewards/rejected": -1.5253194570541382, "step": 940 }, { "epoch": 0.4971219256933543, "grad_norm": 14.462678359824464, "learning_rate": 2.9610908790576663e-07, "logits/chosen": -2.7698135375976562, "logits/rejected": -2.661236524581909, "logps/chosen": -364.80810546875, "logps/rejected": -443.79425048828125, "loss": 0.5139, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.7954778671264648, "rewards/margins": 0.9584125280380249, "rewards/rejected": -1.7538903951644897, "step": 950 }, { "epoch": 0.5023547880690737, "grad_norm": 13.595756965461069, "learning_rate": 2.9161109547416667e-07, "logits/chosen": -2.8679168224334717, "logits/rejected": -2.795522689819336, "logps/chosen": -391.12115478515625, "logps/rejected": -440.32940673828125, "loss": 0.5418, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.1392987966537476, "rewards/margins": 0.6391605734825134, "rewards/rejected": -1.7784591913223267, "step": 960 }, { "epoch": 0.5075876504447933, "grad_norm": 17.796999860342627, "learning_rate": 2.8709920527469834e-07, "logits/chosen": -2.7307040691375732, "logits/rejected": -2.691157341003418, "logps/chosen": -373.8775939941406, "logps/rejected": -425.29400634765625, "loss": 0.5328, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.8613603711128235, "rewards/margins": 0.9478839039802551, "rewards/rejected": -1.80924391746521, "step": 970 }, { "epoch": 0.5128205128205128, "grad_norm": 11.72784157089814, "learning_rate": 2.8257492424203685e-07, "logits/chosen": -2.8900365829467773, "logits/rejected": -2.731393337249756, "logps/chosen": -380.15576171875, "logps/rejected": -384.27630615234375, "loss": 0.53, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.7285529971122742, "rewards/margins": 0.7510075569152832, "rewards/rejected": -1.4795606136322021, "step": 980 }, { "epoch": 0.5180533751962323, "grad_norm": 17.896633281401765, "learning_rate": 2.780397634492949e-07, "logits/chosen": -2.7340455055236816, "logits/rejected": -2.663761615753174, "logps/chosen": -312.6453552246094, "logps/rejected": -369.53790283203125, "loss": 0.5483, "rewards/accuracies": 0.75, "rewards/chosen": -0.68805330991745, "rewards/margins": 0.8142625093460083, "rewards/rejected": -1.5023157596588135, "step": 990 }, { "epoch": 0.5232862375719518, "grad_norm": 18.34859064941708, "learning_rate": 2.7349523760333674e-07, "logits/chosen": -2.7495155334472656, "logits/rejected": -2.6967289447784424, "logps/chosen": -327.76214599609375, "logps/rejected": -376.7933044433594, "loss": 0.5416, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.9751359820365906, "rewards/margins": 0.5826241970062256, "rewards/rejected": -1.55776047706604, "step": 1000 }, { "epoch": 0.5285190999476713, "grad_norm": 17.334821505661097, "learning_rate": 2.6894286453887827e-07, "logits/chosen": -2.7708683013916016, "logits/rejected": -2.763948440551758, "logps/chosen": -315.4136047363281, "logps/rejected": -405.6109924316406, "loss": 0.5349, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.835088849067688, "rewards/margins": 0.722664475440979, "rewards/rejected": -1.5577532052993774, "step": 1010 }, { "epoch": 0.533751962323391, "grad_norm": 16.461462996228413, "learning_rate": 2.6438416471154273e-07, "logits/chosen": -2.8052124977111816, "logits/rejected": -2.7714879512786865, "logps/chosen": -361.3446044921875, "logps/rejected": -400.1153564453125, "loss": 0.4871, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.7003945112228394, "rewards/margins": 0.8514345288276672, "rewards/rejected": -1.5518289804458618, "step": 1020 }, { "epoch": 0.5389848246991105, "grad_norm": 16.993297156547936, "learning_rate": 2.598206606900406e-07, "logits/chosen": -2.812356472015381, "logits/rejected": -2.779200553894043, "logps/chosen": -351.882568359375, "logps/rejected": -349.3376159667969, "loss": 0.5472, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.9720566868782043, "rewards/margins": 0.37396326661109924, "rewards/rejected": -1.3460201025009155, "step": 1030 }, { "epoch": 0.54421768707483, "grad_norm": 16.369305843640827, "learning_rate": 2.552538766476443e-07, "logits/chosen": -2.807950973510742, "logits/rejected": -2.8600311279296875, "logps/chosen": -336.7136535644531, "logps/rejected": -404.81646728515625, "loss": 0.5614, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.8792396783828735, "rewards/margins": 0.5810363292694092, "rewards/rejected": -1.4602760076522827, "step": 1040 }, { "epoch": 0.5494505494505495, "grad_norm": 18.03446272194485, "learning_rate": 2.5068533785312666e-07, "logits/chosen": -2.873033046722412, "logits/rejected": -2.799868583679199, "logps/chosen": -392.97955322265625, "logps/rejected": -442.9043884277344, "loss": 0.5167, "rewards/accuracies": 0.875, "rewards/chosen": -0.6563512086868286, "rewards/margins": 0.8989084959030151, "rewards/rejected": -1.5552598237991333, "step": 1050 }, { "epoch": 0.554683411826269, "grad_norm": 20.315404295924612, "learning_rate": 2.461165701613333e-07, "logits/chosen": -2.759457588195801, "logits/rejected": -2.7653160095214844, "logps/chosen": -315.7008056640625, "logps/rejected": -420.513671875, "loss": 0.5364, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5269359946250916, "rewards/margins": 1.0427325963974, "rewards/rejected": -1.5696685314178467, "step": 1060 }, { "epoch": 0.5599162742019885, "grad_norm": 14.713699575864963, "learning_rate": 2.415490995035596e-07, "logits/chosen": -2.7564265727996826, "logits/rejected": -2.7757070064544678, "logps/chosen": -408.191162109375, "logps/rejected": -409.313232421875, "loss": 0.5296, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.8357181549072266, "rewards/margins": 0.5712189674377441, "rewards/rejected": -1.4069370031356812, "step": 1070 }, { "epoch": 0.565149136577708, "grad_norm": 18.09014309365496, "learning_rate": 2.3698445137790258e-07, "logits/chosen": -2.8548378944396973, "logits/rejected": -2.7975635528564453, "logps/chosen": -307.1778869628906, "logps/rejected": -374.8013610839844, "loss": 0.5449, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7622523307800293, "rewards/margins": 0.7371785640716553, "rewards/rejected": -1.4994310140609741, "step": 1080 }, { "epoch": 0.5703819989534276, "grad_norm": 24.666086366933353, "learning_rate": 2.3242415033975575e-07, "logits/chosen": -2.7526750564575195, "logits/rejected": -2.6710307598114014, "logps/chosen": -406.1108093261719, "logps/rejected": -334.859375, "loss": 0.5465, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0330530405044556, "rewards/margins": 0.43497997522354126, "rewards/rejected": -1.4680330753326416, "step": 1090 }, { "epoch": 0.5756148613291471, "grad_norm": 19.70750428042825, "learning_rate": 2.2786971949262134e-07, "logits/chosen": -2.7635788917541504, "logits/rejected": -2.740485668182373, "logps/chosen": -350.9692077636719, "logps/rejected": -427.8968811035156, "loss": 0.5149, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9213571548461914, "rewards/margins": 0.6530935764312744, "rewards/rejected": -1.5744506120681763, "step": 1100 }, { "epoch": 0.5808477237048666, "grad_norm": 19.421047147213937, "learning_rate": 2.2332267997940513e-07, "logits/chosen": -2.593143939971924, "logits/rejected": -2.5872962474823, "logps/chosen": -264.0029296875, "logps/rejected": -365.4161682128906, "loss": 0.561, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.9193994402885437, "rewards/margins": 0.7727819681167603, "rewards/rejected": -1.6921813488006592, "step": 1110 }, { "epoch": 0.5860805860805861, "grad_norm": 20.25715075172959, "learning_rate": 2.1878455047436753e-07, "logits/chosen": -2.7330455780029297, "logits/rejected": -2.701078176498413, "logps/chosen": -377.7864074707031, "logps/rejected": -417.6946716308594, "loss": 0.5272, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.0114750862121582, "rewards/margins": 0.6698529124259949, "rewards/rejected": -1.6813280582427979, "step": 1120 }, { "epoch": 0.5913134484563056, "grad_norm": 18.65929450263775, "learning_rate": 2.1425684667589852e-07, "logits/chosen": -2.640451669692993, "logits/rejected": -2.5956547260284424, "logps/chosen": -344.3413391113281, "logps/rejected": -445.24627685546875, "loss": 0.5213, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.2718535661697388, "rewards/margins": 0.5820540189743042, "rewards/rejected": -1.8539073467254639, "step": 1130 }, { "epoch": 0.5965463108320251, "grad_norm": 23.91921863916153, "learning_rate": 2.0974108080028692e-07, "logits/chosen": -2.876276969909668, "logits/rejected": -2.8425920009613037, "logps/chosen": -358.00347900390625, "logps/rejected": -435.3857421875, "loss": 0.5311, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.0057141780853271, "rewards/margins": 0.6711603403091431, "rewards/rejected": -1.6768745183944702, "step": 1140 }, { "epoch": 0.6017791732077447, "grad_norm": 18.381841775077824, "learning_rate": 2.0523876107665194e-07, "logits/chosen": -2.829072952270508, "logits/rejected": -2.6755383014678955, "logps/chosen": -359.2571105957031, "logps/rejected": -397.68743896484375, "loss": 0.5262, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.9120863080024719, "rewards/margins": 0.7489384412765503, "rewards/rejected": -1.6610246896743774, "step": 1150 }, { "epoch": 0.6070120355834642, "grad_norm": 20.516414043729537, "learning_rate": 2.0075139124320787e-07, "logits/chosen": -2.6632769107818604, "logits/rejected": -2.6845157146453857, "logps/chosen": -317.9290771484375, "logps/rejected": -327.5867614746094, "loss": 0.553, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.0191724300384521, "rewards/margins": 0.4334065020084381, "rewards/rejected": -1.452579140663147, "step": 1160 }, { "epoch": 0.6122448979591837, "grad_norm": 18.076421208226538, "learning_rate": 1.962804700450265e-07, "logits/chosen": -2.7555832862854004, "logits/rejected": -2.7026753425598145, "logps/chosen": -358.18212890625, "logps/rejected": -462.0071716308594, "loss": 0.5437, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.876019299030304, "rewards/margins": 1.0211801528930664, "rewards/rejected": -1.8971996307373047, "step": 1170 }, { "epoch": 0.6174777603349032, "grad_norm": 16.308484308025697, "learning_rate": 1.9182749073346943e-07, "logits/chosen": -2.806734085083008, "logits/rejected": -2.7705514430999756, "logps/chosen": -416.02520751953125, "logps/rejected": -418.12347412109375, "loss": 0.482, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.0437430143356323, "rewards/margins": 0.47317224740982056, "rewards/rejected": -1.516915202140808, "step": 1180 }, { "epoch": 0.6227106227106227, "grad_norm": 19.5121441695378, "learning_rate": 1.8739394056745372e-07, "logits/chosen": -2.85368013381958, "logits/rejected": -2.792527675628662, "logps/chosen": -444.78948974609375, "logps/rejected": -417.74078369140625, "loss": 0.4903, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.825792670249939, "rewards/margins": 0.635454535484314, "rewards/rejected": -1.461247205734253, "step": 1190 }, { "epoch": 0.6279434850863422, "grad_norm": 16.627989570364402, "learning_rate": 1.8298130031671972e-07, "logits/chosen": -2.568850517272949, "logits/rejected": -2.4875073432922363, "logps/chosen": -408.0639953613281, "logps/rejected": -431.19329833984375, "loss": 0.5083, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.0868682861328125, "rewards/margins": 0.5747972726821899, "rewards/rejected": -1.6616655588150024, "step": 1200 }, { "epoch": 0.6331763474620618, "grad_norm": 20.140246761449887, "learning_rate": 1.785910437672658e-07, "logits/chosen": -2.8672873973846436, "logits/rejected": -2.822535514831543, "logps/chosen": -386.6443786621094, "logps/rejected": -416.11328125, "loss": 0.5426, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.953647792339325, "rewards/margins": 0.6560716032981873, "rewards/rejected": -1.6097195148468018, "step": 1210 }, { "epoch": 0.6384092098377813, "grad_norm": 20.20724844042649, "learning_rate": 1.7422463722911624e-07, "logits/chosen": -2.8543591499328613, "logits/rejected": -2.8314263820648193, "logps/chosen": -403.2071228027344, "logps/rejected": -456.78924560546875, "loss": 0.5261, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.862319827079773, "rewards/margins": 0.9367027282714844, "rewards/rejected": -1.7990226745605469, "step": 1220 }, { "epoch": 0.6436420722135008, "grad_norm": 17.62902811045846, "learning_rate": 1.6988353904658492e-07, "logits/chosen": -2.796889543533325, "logits/rejected": -2.7177813053131104, "logps/chosen": -430.34515380859375, "logps/rejected": -412.7137145996094, "loss": 0.4988, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.9104453921318054, "rewards/margins": 0.9096380472183228, "rewards/rejected": -1.8200836181640625, "step": 1230 }, { "epoch": 0.6488749345892203, "grad_norm": 20.291430758326484, "learning_rate": 1.6556919911120081e-07, "logits/chosen": -2.7235121726989746, "logits/rejected": -2.704380512237549, "logps/chosen": -316.89495849609375, "logps/rejected": -354.3640441894531, "loss": 0.5081, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8046097755432129, "rewards/margins": 0.6576355695724487, "rewards/rejected": -1.4622454643249512, "step": 1240 }, { "epoch": 0.6541077969649398, "grad_norm": 16.634847998265094, "learning_rate": 1.6128305837745546e-07, "logits/chosen": -2.8713958263397217, "logits/rejected": -2.780726909637451, "logps/chosen": -357.17352294921875, "logps/rejected": -450.5567321777344, "loss": 0.5163, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.8796365857124329, "rewards/margins": 0.8217870593070984, "rewards/rejected": -1.7014236450195312, "step": 1250 }, { "epoch": 0.6593406593406593, "grad_norm": 13.440845355698691, "learning_rate": 1.570265483815364e-07, "logits/chosen": -2.7988827228546143, "logits/rejected": -2.7722063064575195, "logps/chosen": -336.47259521484375, "logps/rejected": -320.25933837890625, "loss": 0.5074, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7505493760108948, "rewards/margins": 0.728230357170105, "rewards/rejected": -1.4787797927856445, "step": 1260 }, { "epoch": 0.6645735217163788, "grad_norm": 17.012464455361766, "learning_rate": 1.5280109076320506e-07, "logits/chosen": -2.7736287117004395, "logits/rejected": -2.7175097465515137, "logps/chosen": -305.7979736328125, "logps/rejected": -366.69677734375, "loss": 0.5134, "rewards/accuracies": 0.875, "rewards/chosen": -0.756097137928009, "rewards/margins": 0.8465806841850281, "rewards/rejected": -1.6026777029037476, "step": 1270 }, { "epoch": 0.6698063840920984, "grad_norm": 20.946223504777155, "learning_rate": 1.4860809679098158e-07, "logits/chosen": -2.7644202709198, "logits/rejected": -2.674136161804199, "logps/chosen": -328.1389465332031, "logps/rejected": -375.33331298828125, "loss": 0.5189, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.8715450167655945, "rewards/margins": 0.7798687219619751, "rewards/rejected": -1.6514136791229248, "step": 1280 }, { "epoch": 0.6750392464678179, "grad_norm": 16.38862564822018, "learning_rate": 1.444489668907914e-07, "logits/chosen": -2.688934087753296, "logits/rejected": -2.6829075813293457, "logps/chosen": -314.0862121582031, "logps/rejected": -436.2018127441406, "loss": 0.5481, "rewards/accuracies": 0.75, "rewards/chosen": -0.7365253567695618, "rewards/margins": 0.8902775645256042, "rewards/rejected": -1.6268030405044556, "step": 1290 }, { "epoch": 0.6802721088435374, "grad_norm": 18.868265740047633, "learning_rate": 1.403250901782354e-07, "logits/chosen": -2.7281813621520996, "logits/rejected": -2.748370409011841, "logps/chosen": -362.42266845703125, "logps/rejected": -434.9849548339844, "loss": 0.5109, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.9654933214187622, "rewards/margins": 0.6172199249267578, "rewards/rejected": -1.58271324634552, "step": 1300 }, { "epoch": 0.6855049712192569, "grad_norm": 22.766524494989497, "learning_rate": 1.3623784399463584e-07, "logits/chosen": -2.8341379165649414, "logits/rejected": -2.8009238243103027, "logps/chosen": -321.3329772949219, "logps/rejected": -359.7023010253906, "loss": 0.4985, "rewards/accuracies": 0.875, "rewards/chosen": -0.7877645492553711, "rewards/margins": 0.7746297717094421, "rewards/rejected": -1.562394380569458, "step": 1310 }, { "epoch": 0.6907378335949764, "grad_norm": 18.14832280420879, "learning_rate": 1.3218859344701632e-07, "logits/chosen": -2.7510125637054443, "logits/rejected": -2.7323222160339355, "logps/chosen": -392.82965087890625, "logps/rejected": -459.291015625, "loss": 0.4874, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.0846540927886963, "rewards/margins": 0.6299933195114136, "rewards/rejected": -1.7146475315093994, "step": 1320 }, { "epoch": 0.6959706959706959, "grad_norm": 22.852759896856153, "learning_rate": 1.2817869095216624e-07, "logits/chosen": -2.7460663318634033, "logits/rejected": -2.7363333702087402, "logps/chosen": -344.58636474609375, "logps/rejected": -462.2234802246094, "loss": 0.4831, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7783873677253723, "rewards/margins": 0.8798893094062805, "rewards/rejected": -1.6582765579223633, "step": 1330 }, { "epoch": 0.7012035583464155, "grad_norm": 23.6570903287471, "learning_rate": 1.2420947578494522e-07, "logits/chosen": -2.689542293548584, "logits/rejected": -2.6176483631134033, "logps/chosen": -351.36468505859375, "logps/rejected": -383.5470275878906, "loss": 0.5132, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.9131054878234863, "rewards/margins": 0.8674660921096802, "rewards/rejected": -1.7805715799331665, "step": 1340 }, { "epoch": 0.706436420722135, "grad_norm": 24.783115737011485, "learning_rate": 1.202822736309758e-07, "logits/chosen": -2.7429165840148926, "logits/rejected": -2.718522787094116, "logps/chosen": -374.50323486328125, "logps/rejected": -395.9026794433594, "loss": 0.5146, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9068658947944641, "rewards/margins": 0.5722583532333374, "rewards/rejected": -1.4791243076324463, "step": 1350 }, { "epoch": 0.7116692830978545, "grad_norm": 16.39264775799894, "learning_rate": 1.1639839614387572e-07, "logits/chosen": -2.6672797203063965, "logits/rejected": -2.646698474884033, "logps/chosen": -431.44305419921875, "logps/rejected": -459.53387451171875, "loss": 0.5512, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0058305263519287, "rewards/margins": 0.6214712262153625, "rewards/rejected": -1.627301573753357, "step": 1360 }, { "epoch": 0.716902145473574, "grad_norm": 21.609096898964978, "learning_rate": 1.1255914050717552e-07, "logits/chosen": -2.791228771209717, "logits/rejected": -2.6643431186676025, "logps/chosen": -397.3729553222656, "logps/rejected": -365.84893798828125, "loss": 0.4799, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.8865281343460083, "rewards/margins": 0.6994263529777527, "rewards/rejected": -1.5859544277191162, "step": 1370 }, { "epoch": 0.7221350078492935, "grad_norm": 18.052304623231368, "learning_rate": 1.0876578900107053e-07, "logits/chosen": -2.771291494369507, "logits/rejected": -2.7225091457366943, "logps/chosen": -280.9075622558594, "logps/rejected": -402.49908447265625, "loss": 0.4933, "rewards/accuracies": 0.875, "rewards/chosen": -0.6819049119949341, "rewards/margins": 0.9486406445503235, "rewards/rejected": -1.6305453777313232, "step": 1380 }, { "epoch": 0.727367870225013, "grad_norm": 19.948472450962296, "learning_rate": 1.050196085741491e-07, "logits/chosen": -2.708111047744751, "logits/rejected": -2.616854667663574, "logps/chosen": -350.962158203125, "logps/rejected": -393.85357666015625, "loss": 0.5001, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.9579183459281921, "rewards/margins": 0.8560064435005188, "rewards/rejected": -1.81392502784729, "step": 1390 }, { "epoch": 0.7326007326007326, "grad_norm": 19.160243750600223, "learning_rate": 1.0132185042024246e-07, "logits/chosen": -2.6855554580688477, "logits/rejected": -2.6964261531829834, "logps/chosen": -345.172119140625, "logps/rejected": -429.0968322753906, "loss": 0.5215, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.147552728652954, "rewards/margins": 0.7010320425033569, "rewards/rejected": -1.848584532737732, "step": 1400 }, { "epoch": 0.7378335949764521, "grad_norm": 20.042088035388982, "learning_rate": 9.767374956053584e-08, "logits/chosen": -2.6926827430725098, "logits/rejected": -2.65360689163208, "logps/chosen": -353.872314453125, "logps/rejected": -421.33447265625, "loss": 0.5652, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.064784049987793, "rewards/margins": 0.7993738055229187, "rewards/rejected": -1.864158034324646, "step": 1410 }, { "epoch": 0.7430664573521716, "grad_norm": 21.422151300583028, "learning_rate": 9.407652443108192e-08, "logits/chosen": -2.754948616027832, "logits/rejected": -2.6873762607574463, "logps/chosen": -410.3388671875, "logps/rejected": -412.1189880371094, "loss": 0.5796, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.0343502759933472, "rewards/margins": 0.6344264149665833, "rewards/rejected": -1.668776512145996, "step": 1420 }, { "epoch": 0.7482993197278912, "grad_norm": 15.10072418425115, "learning_rate": 9.053137647585229e-08, "logits/chosen": -2.7673416137695312, "logits/rejected": -2.683150053024292, "logps/chosen": -380.3271179199219, "logps/rejected": -419.106201171875, "loss": 0.4956, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8902686834335327, "rewards/margins": 0.8359274864196777, "rewards/rejected": -1.7261962890625, "step": 1430 }, { "epoch": 0.7535321821036107, "grad_norm": 20.007719817569754, "learning_rate": 8.70394897454659e-08, "logits/chosen": -2.7903285026550293, "logits/rejected": -2.726536750793457, "logps/chosen": -412.11669921875, "logps/rejected": -453.485107421875, "loss": 0.5241, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6117042899131775, "rewards/margins": 0.9241981506347656, "rewards/rejected": -1.5359022617340088, "step": 1440 }, { "epoch": 0.7587650444793302, "grad_norm": 15.725739369442051, "learning_rate": 8.360203050172488e-08, "logits/chosen": -2.761046886444092, "logits/rejected": -2.6572835445404053, "logps/chosen": -380.8763427734375, "logps/rejected": -428.72601318359375, "loss": 0.5176, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.9222623705863953, "rewards/margins": 0.817249596118927, "rewards/rejected": -1.7395120859146118, "step": 1450 }, { "epoch": 0.7639979068550498, "grad_norm": 15.913216171443693, "learning_rate": 8.022014682809305e-08, "logits/chosen": -2.680180072784424, "logits/rejected": -2.6794424057006836, "logps/chosen": -297.6555480957031, "logps/rejected": -357.39703369140625, "loss": 0.5246, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.8991565704345703, "rewards/margins": 0.5045825242996216, "rewards/rejected": -1.4037392139434814, "step": 1460 }, { "epoch": 0.7692307692307693, "grad_norm": 16.251635097636957, "learning_rate": 7.689496824624525e-08, "logits/chosen": -2.7646737098693848, "logits/rejected": -2.642939567565918, "logps/chosen": -400.4357604980469, "logps/rejected": -399.53228759765625, "loss": 0.5194, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.7477626204490662, "rewards/margins": 0.9188524484634399, "rewards/rejected": -1.6666151285171509, "step": 1470 }, { "epoch": 0.7744636316064888, "grad_norm": 18.848669382089422, "learning_rate": 7.362760533881649e-08, "logits/chosen": -2.682922124862671, "logits/rejected": -2.678013324737549, "logps/chosen": -340.21295166015625, "logps/rejected": -394.0873107910156, "loss": 0.5165, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.9958998560905457, "rewards/margins": 0.7448866367340088, "rewards/rejected": -1.7407863140106201, "step": 1480 }, { "epoch": 0.7796964939822083, "grad_norm": 18.023386643013044, "learning_rate": 7.041914937847584e-08, "logits/chosen": -2.489797592163086, "logits/rejected": -2.509646415710449, "logps/chosen": -401.4407653808594, "logps/rejected": -446.206298828125, "loss": 0.5291, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.2514934539794922, "rewards/margins": 0.5801483392715454, "rewards/rejected": -1.8316421508789062, "step": 1490 }, { "epoch": 0.7849293563579278, "grad_norm": 16.75491775995985, "learning_rate": 6.727067196345099e-08, "logits/chosen": -2.6368460655212402, "logits/rejected": -2.5974481105804443, "logps/chosen": -296.484130859375, "logps/rejected": -450.65826416015625, "loss": 0.4935, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.9263995289802551, "rewards/margins": 1.0427716970443726, "rewards/rejected": -1.9691712856292725, "step": 1500 }, { "epoch": 0.7901622187336473, "grad_norm": 19.21477092868735, "learning_rate": 6.418322465962233e-08, "logits/chosen": -2.6289784908294678, "logits/rejected": -2.6378707885742188, "logps/chosen": -389.85357666015625, "logps/rejected": -506.17926025390625, "loss": 0.5505, "rewards/accuracies": 0.625, "rewards/chosen": -1.3396786451339722, "rewards/margins": 0.8516691327095032, "rewards/rejected": -2.191347599029541, "step": 1510 }, { "epoch": 0.7953950811093669, "grad_norm": 18.1853712960554, "learning_rate": 6.115783864930905e-08, "logits/chosen": -2.6589932441711426, "logits/rejected": -2.61903715133667, "logps/chosen": -309.13134765625, "logps/rejected": -423.0372619628906, "loss": 0.5275, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.9086647033691406, "rewards/margins": 0.8935861587524414, "rewards/rejected": -1.802250862121582, "step": 1520 }, { "epoch": 0.8006279434850864, "grad_norm": 21.592413720973216, "learning_rate": 5.8195524386862374e-08, "logits/chosen": -2.820551872253418, "logits/rejected": -2.7724428176879883, "logps/chosen": -423.28363037109375, "logps/rejected": -555.06298828125, "loss": 0.4936, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.1178683042526245, "rewards/margins": 0.7847362756729126, "rewards/rejected": -1.9026046991348267, "step": 1530 }, { "epoch": 0.8058608058608059, "grad_norm": 16.018713976629193, "learning_rate": 5.529727126118228e-08, "logits/chosen": -2.716298818588257, "logits/rejected": -2.724658489227295, "logps/chosen": -468.96856689453125, "logps/rejected": -467.7854919433594, "loss": 0.5403, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1532551050186157, "rewards/margins": 0.45239201188087463, "rewards/rejected": -1.605647325515747, "step": 1540 }, { "epoch": 0.8110936682365254, "grad_norm": 14.945499187412246, "learning_rate": 5.246404726526918e-08, "logits/chosen": -2.677903652191162, "logits/rejected": -2.5905513763427734, "logps/chosen": -396.8397521972656, "logps/rejected": -377.557861328125, "loss": 0.5017, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.7912027835845947, "rewards/margins": 0.7971407771110535, "rewards/rejected": -1.588343858718872, "step": 1550 }, { "epoch": 0.8163265306122449, "grad_norm": 15.362047445414715, "learning_rate": 4.969679867292276e-08, "logits/chosen": -2.6830527782440186, "logits/rejected": -2.6041407585144043, "logps/chosen": -436.54473876953125, "logps/rejected": -448.3851623535156, "loss": 0.5104, "rewards/accuracies": 0.75, "rewards/chosen": -0.9174336194992065, "rewards/margins": 0.7332735061645508, "rewards/rejected": -1.6507068872451782, "step": 1560 }, { "epoch": 0.8215593929879644, "grad_norm": 17.576121150498565, "learning_rate": 4.6996449722693315e-08, "logits/chosen": -2.6931352615356445, "logits/rejected": -2.6350607872009277, "logps/chosen": -315.1622619628906, "logps/rejected": -392.06292724609375, "loss": 0.515, "rewards/accuracies": 0.75, "rewards/chosen": -0.8371334075927734, "rewards/margins": 0.6744937896728516, "rewards/rejected": -1.5116270780563354, "step": 1570 }, { "epoch": 0.826792255363684, "grad_norm": 19.810352695452664, "learning_rate": 4.436390230919465e-08, "logits/chosen": -2.766540050506592, "logits/rejected": -2.647644519805908, "logps/chosen": -376.6566467285156, "logps/rejected": -402.571533203125, "loss": 0.5633, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.9764121770858765, "rewards/margins": 0.7432926297187805, "rewards/rejected": -1.7197048664093018, "step": 1580 }, { "epoch": 0.8320251177394035, "grad_norm": 23.05429981365939, "learning_rate": 4.180003568187776e-08, "logits/chosen": -2.5337836742401123, "logits/rejected": -2.474341869354248, "logps/chosen": -308.1025390625, "logps/rejected": -385.6150817871094, "loss": 0.5154, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.7418735027313232, "rewards/margins": 1.1077954769134521, "rewards/rejected": -1.849669098854065, "step": 1590 }, { "epoch": 0.837257980115123, "grad_norm": 16.3861829287344, "learning_rate": 3.930570615136919e-08, "logits/chosen": -2.568213939666748, "logits/rejected": -2.600006103515625, "logps/chosen": -364.5643615722656, "logps/rejected": -459.32794189453125, "loss": 0.5075, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1484493017196655, "rewards/margins": 0.7362042665481567, "rewards/rejected": -1.8846536874771118, "step": 1600 }, { "epoch": 0.8424908424908425, "grad_norm": 19.191362805490126, "learning_rate": 3.6881746803469756e-08, "logits/chosen": -2.7753946781158447, "logits/rejected": -2.6937546730041504, "logps/chosen": -451.4925842285156, "logps/rejected": -490.1671447753906, "loss": 0.552, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.8986201286315918, "rewards/margins": 0.839145839214325, "rewards/rejected": -1.737765908241272, "step": 1610 }, { "epoch": 0.847723704866562, "grad_norm": 13.924933587722114, "learning_rate": 3.452896722091128e-08, "logits/chosen": -2.6632466316223145, "logits/rejected": -2.57369327545166, "logps/chosen": -392.18756103515625, "logps/rejected": -403.74053955078125, "loss": 0.5298, "rewards/accuracies": 0.625, "rewards/chosen": -1.1617246866226196, "rewards/margins": 0.6037346124649048, "rewards/rejected": -1.7654592990875244, "step": 1620 }, { "epoch": 0.8529565672422815, "grad_norm": 20.923127069029967, "learning_rate": 3.2248153212961677e-08, "logits/chosen": -2.776475429534912, "logits/rejected": -2.7884275913238525, "logps/chosen": -319.9564514160156, "logps/rejected": -403.90985107421875, "loss": 0.5115, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.85566246509552, "rewards/margins": 0.800037682056427, "rewards/rejected": -1.6557003259658813, "step": 1630 }, { "epoch": 0.858189429618001, "grad_norm": 17.71228621644989, "learning_rate": 3.004006655297209e-08, "logits/chosen": -2.704780340194702, "logits/rejected": -2.690717935562134, "logps/chosen": -387.93804931640625, "logps/rejected": -438.994873046875, "loss": 0.5445, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.8455182909965515, "rewards/margins": 0.7660375833511353, "rewards/rejected": -1.611555814743042, "step": 1640 }, { "epoch": 0.8634222919937206, "grad_norm": 17.25547789651305, "learning_rate": 2.7905444723949762e-08, "logits/chosen": -2.635859251022339, "logits/rejected": -2.592531681060791, "logps/chosen": -379.08502197265625, "logps/rejected": -430.8294372558594, "loss": 0.5025, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0563156604766846, "rewards/margins": 0.5091310739517212, "rewards/rejected": -1.5654467344284058, "step": 1650 }, { "epoch": 0.8686551543694401, "grad_norm": 17.75386295782151, "learning_rate": 2.5845000672245572e-08, "logits/chosen": -2.652233839035034, "logits/rejected": -2.6020119190216064, "logps/chosen": -291.4010314941406, "logps/rejected": -421.83251953125, "loss": 0.5178, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.8415043950080872, "rewards/margins": 1.0191437005996704, "rewards/rejected": -1.8606479167938232, "step": 1660 }, { "epoch": 0.8738880167451596, "grad_norm": 20.26581048622113, "learning_rate": 2.385942256943499e-08, "logits/chosen": -2.7823240756988525, "logits/rejected": -2.7179102897644043, "logps/chosen": -368.48687744140625, "logps/rejected": -421.83831787109375, "loss": 0.5155, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.1159892082214355, "rewards/margins": 0.6273930668830872, "rewards/rejected": -1.743382215499878, "step": 1670 }, { "epoch": 0.8791208791208791, "grad_norm": 17.254193295992085, "learning_rate": 2.194937358247506e-08, "logits/chosen": -2.6762166023254395, "logits/rejected": -2.680424213409424, "logps/chosen": -341.16790771484375, "logps/rejected": -436.9229431152344, "loss": 0.4885, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.9166282415390015, "rewards/margins": 0.8245986104011536, "rewards/rejected": -1.7412269115447998, "step": 1680 }, { "epoch": 0.8843537414965986, "grad_norm": 21.18803904087079, "learning_rate": 2.011549165221127e-08, "logits/chosen": -2.5892271995544434, "logits/rejected": -2.5530495643615723, "logps/chosen": -343.75274658203125, "logps/rejected": -402.83343505859375, "loss": 0.4921, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.9632428288459778, "rewards/margins": 0.8480204343795776, "rewards/rejected": -1.8112634420394897, "step": 1690 }, { "epoch": 0.8895866038723181, "grad_norm": 18.038523734567626, "learning_rate": 1.8358389280311303e-08, "logits/chosen": -2.706275463104248, "logits/rejected": -2.649019718170166, "logps/chosen": -383.904296875, "logps/rejected": -418.57098388671875, "loss": 0.5314, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.1623432636260986, "rewards/margins": 0.5158635377883911, "rewards/rejected": -1.6782068014144897, "step": 1700 }, { "epoch": 0.8948194662480377, "grad_norm": 19.626755939860992, "learning_rate": 1.6678653324693787e-08, "logits/chosen": -2.744741916656494, "logits/rejected": -2.661057710647583, "logps/chosen": -415.6158142089844, "logps/rejected": -489.20001220703125, "loss": 0.5109, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.9210155606269836, "rewards/margins": 0.8741633296012878, "rewards/rejected": -1.795178771018982, "step": 1710 }, { "epoch": 0.9000523286237572, "grad_norm": 19.57980158791374, "learning_rate": 1.507684480352292e-08, "logits/chosen": -2.717519998550415, "logits/rejected": -2.6023640632629395, "logps/chosen": -398.86126708984375, "logps/rejected": -399.9064636230469, "loss": 0.5261, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.1560735702514648, "rewards/margins": 0.4242979884147644, "rewards/rejected": -1.580371618270874, "step": 1720 }, { "epoch": 0.9052851909994767, "grad_norm": 16.818277672769657, "learning_rate": 1.3553498707832761e-08, "logits/chosen": -2.6509828567504883, "logits/rejected": -2.614760637283325, "logps/chosen": -306.6497802734375, "logps/rejected": -347.5115051269531, "loss": 0.4896, "rewards/accuracies": 0.75, "rewards/chosen": -0.8303035497665405, "rewards/margins": 0.8390544652938843, "rewards/rejected": -1.6693580150604248, "step": 1730 }, { "epoch": 0.9105180533751962, "grad_norm": 17.711495458783695, "learning_rate": 1.2109123822844653e-08, "logits/chosen": -2.7017340660095215, "logits/rejected": -2.539865732192993, "logps/chosen": -346.7171936035156, "logps/rejected": -381.2705383300781, "loss": 0.5339, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.024086356163025, "rewards/margins": 0.7089843153953552, "rewards/rejected": -1.7330706119537354, "step": 1740 }, { "epoch": 0.9157509157509157, "grad_norm": 16.25718099914919, "learning_rate": 1.0744202558037014e-08, "logits/chosen": -2.7817070484161377, "logits/rejected": -2.7584192752838135, "logps/chosen": -422.7364196777344, "logps/rejected": -449.0599060058594, "loss": 0.5541, "rewards/accuracies": 0.75, "rewards/chosen": -1.0160216093063354, "rewards/margins": 0.6888941526412964, "rewards/rejected": -1.7049156427383423, "step": 1750 }, { "epoch": 0.9209837781266352, "grad_norm": 19.79616779504436, "learning_rate": 9.459190786024696e-09, "logits/chosen": -2.729013442993164, "logits/rejected": -2.692884683609009, "logps/chosen": -314.32562255859375, "logps/rejected": -348.45233154296875, "loss": 0.4649, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.9323883056640625, "rewards/margins": 0.5154863595962524, "rewards/rejected": -1.4478747844696045, "step": 1760 }, { "epoch": 0.9262166405023547, "grad_norm": 15.466044673164172, "learning_rate": 8.254517690300944e-09, "logits/chosen": -2.5260634422302246, "logits/rejected": -2.4602036476135254, "logps/chosen": -379.27996826171875, "logps/rejected": -432.5758361816406, "loss": 0.4982, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.931941032409668, "rewards/margins": 0.9537526965141296, "rewards/rejected": -1.8856935501098633, "step": 1770 }, { "epoch": 0.9314495028780743, "grad_norm": 14.105513003072497, "learning_rate": 7.130585621893809e-09, "logits/chosen": -2.612941026687622, "logits/rejected": -2.601433277130127, "logps/chosen": -337.0567932128906, "logps/rejected": -356.3701477050781, "loss": 0.5522, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0178024768829346, "rewards/margins": 0.5061073303222656, "rewards/rejected": -1.5239098072052002, "step": 1780 }, { "epoch": 0.9366823652537938, "grad_norm": 17.67412326556611, "learning_rate": 6.0877699649840574e-09, "logits/chosen": -2.676441192626953, "logits/rejected": -2.6985154151916504, "logps/chosen": -410.93975830078125, "logps/rejected": -474.650390625, "loss": 0.5175, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.8644577860832214, "rewards/margins": 0.6866751313209534, "rewards/rejected": -1.5511329174041748, "step": 1790 }, { "epoch": 0.9419152276295133, "grad_norm": 21.08771075280092, "learning_rate": 5.126419011529992e-09, "logits/chosen": -2.514744997024536, "logits/rejected": -2.462218999862671, "logps/chosen": -340.0183410644531, "logps/rejected": -472.80145263671875, "loss": 0.5, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.1128913164138794, "rewards/margins": 1.0394203662872314, "rewards/rejected": -2.1523118019104004, "step": 1800 }, { "epoch": 0.9471480900052328, "grad_norm": 20.973025190997458, "learning_rate": 4.246853844940723e-09, "logits/chosen": -2.711920738220215, "logits/rejected": -2.6448938846588135, "logps/chosen": -358.5855407714844, "logps/rejected": -377.30828857421875, "loss": 0.5545, "rewards/accuracies": 0.75, "rewards/chosen": -1.0431839227676392, "rewards/margins": 0.5513032078742981, "rewards/rejected": -1.5944870710372925, "step": 1810 }, { "epoch": 0.9523809523809523, "grad_norm": 19.679618715790966, "learning_rate": 3.449368232836869e-09, "logits/chosen": -2.5656230449676514, "logits/rejected": -2.5163912773132324, "logps/chosen": -295.70684814453125, "logps/rejected": -333.41778564453125, "loss": 0.5332, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8301293253898621, "rewards/margins": 0.7227026224136353, "rewards/rejected": -1.5528318881988525, "step": 1820 }, { "epoch": 0.957613814756672, "grad_norm": 23.243692564813756, "learning_rate": 2.734228528934679e-09, "logits/chosen": -2.750614881515503, "logits/rejected": -2.7145471572875977, "logps/chosen": -371.66656494140625, "logps/rejected": -392.2098693847656, "loss": 0.5439, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.9127969741821289, "rewards/margins": 0.6625674366950989, "rewards/rejected": -1.575364351272583, "step": 1830 }, { "epoch": 0.9628466771323915, "grad_norm": 19.814979577148716, "learning_rate": 2.1016735840859447e-09, "logits/chosen": -2.708528995513916, "logits/rejected": -2.5766711235046387, "logps/chosen": -431.52508544921875, "logps/rejected": -454.4827575683594, "loss": 0.5108, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.0717358589172363, "rewards/margins": 0.8303524255752563, "rewards/rejected": -1.9020881652832031, "step": 1840 }, { "epoch": 0.968079539508111, "grad_norm": 17.66585714924501, "learning_rate": 1.551914666503812e-09, "logits/chosen": -2.656337261199951, "logits/rejected": -2.6318295001983643, "logps/chosen": -453.3070373535156, "logps/rejected": -438.5719299316406, "loss": 0.5218, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.8247343897819519, "rewards/margins": 0.6178687810897827, "rewards/rejected": -1.4426031112670898, "step": 1850 }, { "epoch": 0.9733124018838305, "grad_norm": 19.12046100454679, "learning_rate": 1.0851353912008642e-09, "logits/chosen": -2.67498779296875, "logits/rejected": -2.5374255180358887, "logps/chosen": -420.9612731933594, "logps/rejected": -403.1297302246094, "loss": 0.5131, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.0306751728057861, "rewards/margins": 0.6978103518486023, "rewards/rejected": -1.7284857034683228, "step": 1860 }, { "epoch": 0.97854526425955, "grad_norm": 23.698665321678508, "learning_rate": 7.014916586632336e-10, "logits/chosen": -2.6748759746551514, "logits/rejected": -2.589711904525757, "logps/chosen": -330.1553649902344, "logps/rejected": -370.8642578125, "loss": 0.5288, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8707270622253418, "rewards/margins": 0.5380834341049194, "rewards/rejected": -1.4088103771209717, "step": 1870 }, { "epoch": 0.9837781266352695, "grad_norm": 14.404582481778125, "learning_rate": 4.011116027811956e-10, "logits/chosen": -2.7190022468566895, "logits/rejected": -2.7610325813293457, "logps/chosen": -337.45513916015625, "logps/rejected": -476.40069580078125, "loss": 0.505, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.878827691078186, "rewards/margins": 0.7724133133888245, "rewards/rejected": -1.6512409448623657, "step": 1880 }, { "epoch": 0.989010989010989, "grad_norm": 20.80788365647563, "learning_rate": 1.840955480532924e-10, "logits/chosen": -2.709712505340576, "logits/rejected": -2.666142225265503, "logps/chosen": -482.58636474609375, "logps/rejected": -484.43060302734375, "loss": 0.513, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.9788215756416321, "rewards/margins": 0.6121380925178528, "rewards/rejected": -1.5909597873687744, "step": 1890 }, { "epoch": 0.9942438513867086, "grad_norm": 19.04109274522655, "learning_rate": 5.051597607894087e-11, "logits/chosen": -2.6989779472351074, "logits/rejected": -2.5977814197540283, "logps/chosen": -321.80120849609375, "logps/rejected": -401.3192138671875, "loss": 0.5193, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.0180636644363403, "rewards/margins": 0.8546449542045593, "rewards/rejected": -1.8727085590362549, "step": 1900 }, { "epoch": 0.9994767137624281, "grad_norm": 21.903615655450324, "learning_rate": 4.1750135001961117e-13, "logits/chosen": -2.6709775924682617, "logits/rejected": -2.6583666801452637, "logps/chosen": -406.5054626464844, "logps/rejected": -506.41973876953125, "loss": 0.5064, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.8070088624954224, "rewards/margins": 1.0546354055404663, "rewards/rejected": -1.8616443872451782, "step": 1910 }, { "epoch": 1.0, "step": 1911, "total_flos": 0.0, "train_loss": 0.5618298566509827, "train_runtime": 11326.5504, "train_samples_per_second": 5.397, "train_steps_per_second": 0.169 } ], "logging_steps": 10, "max_steps": 1911, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }