{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999333733093477, "eval_steps": 400, "global_step": 469, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0021320541008728097, "grad_norm": 4.17070478980581, "learning_rate": 1.0638297872340425e-08, "logits/chosen": -0.4388880133628845, "logits/rejected": -0.6813962459564209, "logps/chosen": -137.1171112060547, "logps/rejected": -114.13969421386719, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.010660270504364048, "grad_norm": 3.7299717491618436, "learning_rate": 5.3191489361702123e-08, "logits/chosen": -0.4889238774776459, "logits/rejected": -0.6665000319480896, "logps/chosen": -169.8695068359375, "logps/rejected": -153.95947265625, "loss": 0.6932, "rewards/accuracies": 0.3671875, "rewards/chosen": 0.00029664667090401053, "rewards/margins": -0.00023018479987513274, "rewards/rejected": 0.0005268314271233976, "step": 5 }, { "epoch": 0.021320541008728097, "grad_norm": 3.95978205732512, "learning_rate": 1.0638297872340425e-07, "logits/chosen": -0.46806925535202026, "logits/rejected": -0.6404483318328857, "logps/chosen": -160.8107147216797, "logps/rejected": -149.25921630859375, "loss": 0.6928, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.0006372839561663568, "rewards/margins": 0.0015358469681814313, "rewards/rejected": -0.0008985629538074136, "step": 10 }, { "epoch": 0.03198081151309214, "grad_norm": 4.070738919050114, "learning_rate": 1.5957446808510638e-07, "logits/chosen": -0.5198644399642944, "logits/rejected": -0.7026724219322205, "logps/chosen": -148.3934783935547, "logps/rejected": -137.8568878173828, "loss": 0.6932, "rewards/accuracies": 0.518750011920929, "rewards/chosen": 0.00037692085606977344, "rewards/margins": 9.87994353636168e-05, "rewards/rejected": 0.00027812132611870766, "step": 15 }, { "epoch": 0.04264108201745619, "grad_norm": 4.076698141198564, "learning_rate": 2.127659574468085e-07, "logits/chosen": -0.5080031156539917, "logits/rejected": -0.6844709515571594, "logps/chosen": -163.26565551757812, "logps/rejected": -144.93130493164062, "loss": 0.6929, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": 0.0008511164924129844, "rewards/margins": 0.0010705896420404315, "rewards/rejected": -0.00021947314962744713, "step": 20 }, { "epoch": 0.05330135252182024, "grad_norm": 4.091883356232605, "learning_rate": 2.659574468085106e-07, "logits/chosen": -0.45363473892211914, "logits/rejected": -0.6415150761604309, "logps/chosen": -160.65203857421875, "logps/rejected": -139.57582092285156, "loss": 0.6925, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0009880407014861703, "rewards/margins": 0.0012083369074389338, "rewards/rejected": -0.00022029613319318742, "step": 25 }, { "epoch": 0.06396162302618429, "grad_norm": 4.4267622202574675, "learning_rate": 3.1914893617021275e-07, "logits/chosen": -0.5177901983261108, "logits/rejected": -0.6321993470191956, "logps/chosen": -165.01699829101562, "logps/rejected": -151.71261596679688, "loss": 0.6921, "rewards/accuracies": 0.625, "rewards/chosen": 0.0023814309388399124, "rewards/margins": 0.002116392133757472, "rewards/rejected": 0.0002650389797054231, "step": 30 }, { "epoch": 0.07462189353054834, "grad_norm": 4.269424985466007, "learning_rate": 3.7234042553191484e-07, "logits/chosen": -0.4782675802707672, "logits/rejected": -0.7104529738426208, "logps/chosen": -163.6421356201172, "logps/rejected": -143.2295379638672, "loss": 0.6913, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.004739758092910051, "rewards/margins": 0.0038230004720389843, "rewards/rejected": 0.000916757620871067, "step": 35 }, { "epoch": 0.08528216403491239, "grad_norm": 4.2880363073067365, "learning_rate": 4.25531914893617e-07, "logits/chosen": -0.5303796529769897, "logits/rejected": -0.7106837630271912, "logps/chosen": -174.71463012695312, "logps/rejected": -153.29507446289062, "loss": 0.6903, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.008925501257181168, "rewards/margins": 0.006593695841729641, "rewards/rejected": 0.0023318054154515266, "step": 40 }, { "epoch": 0.09594243453927644, "grad_norm": 4.016438849908063, "learning_rate": 4.787234042553192e-07, "logits/chosen": -0.522494375705719, "logits/rejected": -0.7226734757423401, "logps/chosen": -165.866455078125, "logps/rejected": -144.34194946289062, "loss": 0.6886, "rewards/accuracies": 0.706250011920929, "rewards/chosen": 0.010274471715092659, "rewards/margins": 0.011223495937883854, "rewards/rejected": -0.0009490237571299076, "step": 45 }, { "epoch": 0.10660270504364049, "grad_norm": 4.3216596095930235, "learning_rate": 4.999376538968061e-07, "logits/chosen": -0.5761003494262695, "logits/rejected": -0.7390087842941284, "logps/chosen": -161.60655212402344, "logps/rejected": -144.6966552734375, "loss": 0.6868, "rewards/accuracies": 0.71875, "rewards/chosen": 0.009824760258197784, "rewards/margins": 0.014007952995598316, "rewards/rejected": -0.004183194134384394, "step": 50 }, { "epoch": 0.11726297554800454, "grad_norm": 4.305829979355763, "learning_rate": 4.99556762539107e-07, "logits/chosen": -0.5275800824165344, "logits/rejected": -0.7155976891517639, "logps/chosen": -172.5618133544922, "logps/rejected": -159.7906494140625, "loss": 0.6842, "rewards/accuracies": 0.6875, "rewards/chosen": 0.007245404180139303, "rewards/margins": 0.016996894031763077, "rewards/rejected": -0.009751489385962486, "step": 55 }, { "epoch": 0.12792324605236857, "grad_norm": 3.919812332975093, "learning_rate": 4.988301435819852e-07, "logits/chosen": -0.528161883354187, "logits/rejected": -0.7242938280105591, "logps/chosen": -163.2517547607422, "logps/rejected": -152.65904235839844, "loss": 0.6833, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -4.5745400711894035e-05, "rewards/margins": 0.017660435289144516, "rewards/rejected": -0.01770617999136448, "step": 60 }, { "epoch": 0.13858351655673262, "grad_norm": 4.26787115297138, "learning_rate": 4.977588036590624e-07, "logits/chosen": -0.6125078797340393, "logits/rejected": -0.7909122109413147, "logps/chosen": -157.07858276367188, "logps/rejected": -142.1239776611328, "loss": 0.6787, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.011157763190567493, "rewards/margins": 0.029583096504211426, "rewards/rejected": -0.04074086248874664, "step": 65 }, { "epoch": 0.14924378706109667, "grad_norm": 4.32141025222622, "learning_rate": 4.96344226968867e-07, "logits/chosen": -0.6417307257652283, "logits/rejected": -0.8415061235427856, "logps/chosen": -177.39974975585938, "logps/rejected": -156.98171997070312, "loss": 0.6761, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.018069323152303696, "rewards/margins": 0.04366481304168701, "rewards/rejected": -0.061734139919281006, "step": 70 }, { "epoch": 0.15990405756546072, "grad_norm": 4.745633736375277, "learning_rate": 4.945883732186751e-07, "logits/chosen": -0.6420779824256897, "logits/rejected": -0.8456922769546509, "logps/chosen": -175.96359252929688, "logps/rejected": -160.39553833007812, "loss": 0.6753, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.049303699284791946, "rewards/margins": 0.04190283641219139, "rewards/rejected": -0.09120653569698334, "step": 75 }, { "epoch": 0.17056432806982477, "grad_norm": 4.4046157142215705, "learning_rate": 4.924936749095969e-07, "logits/chosen": -0.6506496071815491, "logits/rejected": -0.8331305384635925, "logps/chosen": -170.9277801513672, "logps/rejected": -157.8987579345703, "loss": 0.6764, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.07082077115774155, "rewards/margins": 0.044193871319293976, "rewards/rejected": -0.11501463502645493, "step": 80 }, { "epoch": 0.18122459857418882, "grad_norm": 5.024858873122934, "learning_rate": 4.900630339666717e-07, "logits/chosen": -0.6046501994132996, "logits/rejected": -0.879498302936554, "logps/chosen": -172.4420928955078, "logps/rejected": -155.1177215576172, "loss": 0.6708, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.08710388094186783, "rewards/margins": 0.05091012641787529, "rewards/rejected": -0.13801398873329163, "step": 85 }, { "epoch": 0.19188486907855287, "grad_norm": 4.906760943250142, "learning_rate": 4.872998177186375e-07, "logits/chosen": -0.6804112195968628, "logits/rejected": -0.9185736775398254, "logps/chosen": -173.2130126953125, "logps/rejected": -157.01849365234375, "loss": 0.6656, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.09927495568990707, "rewards/margins": 0.056527040898799896, "rewards/rejected": -0.15580201148986816, "step": 90 }, { "epoch": 0.20254513958291692, "grad_norm": 4.854322224106784, "learning_rate": 4.842078542329463e-07, "logits/chosen": -0.6420129537582397, "logits/rejected": -0.8440741300582886, "logps/chosen": -172.54263305664062, "logps/rejected": -160.012939453125, "loss": 0.6636, "rewards/accuracies": 0.6875, "rewards/chosen": -0.11956344544887543, "rewards/margins": 0.0651877298951149, "rewards/rejected": -0.18475116789340973, "step": 95 }, { "epoch": 0.21320541008728097, "grad_norm": 5.020847639274401, "learning_rate": 4.807914270124876e-07, "logits/chosen": -0.6584053635597229, "logits/rejected": -0.8369486927986145, "logps/chosen": -158.8271484375, "logps/rejected": -151.04791259765625, "loss": 0.6622, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13495273888111115, "rewards/margins": 0.06916390359401703, "rewards/rejected": -0.20411665737628937, "step": 100 }, { "epoch": 0.22386568059164502, "grad_norm": 5.1518931973507875, "learning_rate": 4.770552690613665e-07, "logits/chosen": -0.7008846998214722, "logits/rejected": -0.9158443212509155, "logps/chosen": -181.6995391845703, "logps/rejected": -168.43638610839844, "loss": 0.6531, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.14559721946716309, "rewards/margins": 0.08520212024450302, "rewards/rejected": -0.2307993471622467, "step": 105 }, { "epoch": 0.23452595109600907, "grad_norm": 4.93222468686984, "learning_rate": 4.730045563279577e-07, "logits/chosen": -0.7327751517295837, "logits/rejected": -0.9426084756851196, "logps/chosen": -184.8527069091797, "logps/rejected": -169.2633056640625, "loss": 0.6536, "rewards/accuracies": 0.6875, "rewards/chosen": -0.18423308432102203, "rewards/margins": 0.08043086528778076, "rewards/rejected": -0.2646639347076416, "step": 110 }, { "epoch": 0.24518622160037312, "grad_norm": 5.321285521863998, "learning_rate": 4.6864490053432e-07, "logits/chosen": -0.7645201683044434, "logits/rejected": -0.9136350750923157, "logps/chosen": -184.50399780273438, "logps/rejected": -182.33792114257812, "loss": 0.6467, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.1797805279493332, "rewards/margins": 0.10915856063365936, "rewards/rejected": -0.28893908858299255, "step": 115 }, { "epoch": 0.25584649210473714, "grad_norm": 5.62424898876036, "learning_rate": 4.6398234140190413e-07, "logits/chosen": -0.7312062978744507, "logits/rejected": -0.9342387318611145, "logps/chosen": -189.24227905273438, "logps/rejected": -181.2150115966797, "loss": 0.6404, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.22928175330162048, "rewards/margins": 0.1005432978272438, "rewards/rejected": -0.3298250436782837, "step": 120 }, { "epoch": 0.2665067626091012, "grad_norm": 5.848008736661893, "learning_rate": 4.5902333828432416e-07, "logits/chosen": -0.7402585744857788, "logits/rejected": -0.9469724893569946, "logps/chosen": -188.2518768310547, "logps/rejected": -183.68360900878906, "loss": 0.6314, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2475469410419464, "rewards/margins": 0.15488557517528534, "rewards/rejected": -0.40243250131607056, "step": 125 }, { "epoch": 0.27716703311346524, "grad_norm": 5.62435510068984, "learning_rate": 4.537747612187848e-07, "logits/chosen": -0.6827915906906128, "logits/rejected": -0.9053131341934204, "logps/chosen": -176.27835083007812, "logps/rejected": -177.09768676757812, "loss": 0.6331, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2656404376029968, "rewards/margins": 0.14400802552700043, "rewards/rejected": -0.40964850783348083, "step": 130 }, { "epoch": 0.2878273036178293, "grad_norm": 5.883733263408107, "learning_rate": 4.4824388140856194e-07, "logits/chosen": -0.813726544380188, "logits/rejected": -0.9863494634628296, "logps/chosen": -193.75765991210938, "logps/rejected": -192.6829833984375, "loss": 0.6258, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.32872524857521057, "rewards/margins": 0.16848836839199066, "rewards/rejected": -0.49721360206604004, "step": 135 }, { "epoch": 0.29848757412219334, "grad_norm": 6.222829798884928, "learning_rate": 4.4243836114972003e-07, "logits/chosen": -0.7957421541213989, "logits/rejected": -0.9675641059875488, "logps/chosen": -185.958251953125, "logps/rejected": -190.2810516357422, "loss": 0.6259, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.36352983117103577, "rewards/margins": 0.1679573506116867, "rewards/rejected": -0.5314871072769165, "step": 140 }, { "epoch": 0.3091478446265574, "grad_norm": 6.026406045285321, "learning_rate": 4.3636624321602354e-07, "logits/chosen": -0.7669280171394348, "logits/rejected": -1.0013420581817627, "logps/chosen": -199.62496948242188, "logps/rejected": -198.5312957763672, "loss": 0.6139, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.41982731223106384, "rewards/margins": 0.1919022500514984, "rewards/rejected": -0.611729621887207, "step": 145 }, { "epoch": 0.31980811513092144, "grad_norm": 6.938366915650047, "learning_rate": 4.300359397167469e-07, "logits/chosen": -0.78579181432724, "logits/rejected": -1.0266155004501343, "logps/chosen": -190.5222625732422, "logps/rejected": -191.94302368164062, "loss": 0.6191, "rewards/accuracies": 0.78125, "rewards/chosen": -0.4288663864135742, "rewards/margins": 0.1750030219554901, "rewards/rejected": -0.6038694381713867, "step": 150 }, { "epoch": 0.3304683856352855, "grad_norm": 6.503433628260907, "learning_rate": 4.2345622044281914e-07, "logits/chosen": -0.7738896608352661, "logits/rejected": -0.9923878908157349, "logps/chosen": -201.4437255859375, "logps/rejected": -201.36099243164062, "loss": 0.6073, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.46533137559890747, "rewards/margins": 0.18831129372119904, "rewards/rejected": -0.6536425948143005, "step": 155 }, { "epoch": 0.34112865613964954, "grad_norm": 6.951278659773283, "learning_rate": 4.1663620071744896e-07, "logits/chosen": -0.8082219958305359, "logits/rejected": -1.0701286792755127, "logps/chosen": -221.80789184570312, "logps/rejected": -220.5237274169922, "loss": 0.6108, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.5697073340415955, "rewards/margins": 0.196958988904953, "rewards/rejected": -0.7666663527488708, "step": 160 }, { "epoch": 0.35178892664401357, "grad_norm": 7.107245594085975, "learning_rate": 4.0958532876806036e-07, "logits/chosen": -0.9068414568901062, "logits/rejected": -1.0665959119796753, "logps/chosen": -223.1608428955078, "logps/rejected": -228.6382598876953, "loss": 0.6007, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.6051439046859741, "rewards/margins": 0.22736486792564392, "rewards/rejected": -0.8325088620185852, "step": 165 }, { "epoch": 0.36244919714837764, "grad_norm": 7.5558158008023355, "learning_rate": 4.023133726370341e-07, "logits/chosen": -0.7768110036849976, "logits/rejected": -1.023694634437561, "logps/chosen": -230.20028686523438, "logps/rejected": -237.296630859375, "loss": 0.6005, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.6818786859512329, "rewards/margins": 0.2647910714149475, "rewards/rejected": -0.9466696977615356, "step": 170 }, { "epoch": 0.37310946765274167, "grad_norm": 7.748401207711855, "learning_rate": 3.9483040664938844e-07, "logits/chosen": -0.8651229739189148, "logits/rejected": -1.1080349683761597, "logps/chosen": -239.4313201904297, "logps/rejected": -245.35641479492188, "loss": 0.5827, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.7178173065185547, "rewards/margins": 0.29743796586990356, "rewards/rejected": -1.015255331993103, "step": 175 }, { "epoch": 0.38376973815710574, "grad_norm": 7.833168702083219, "learning_rate": 3.8714679745614556e-07, "logits/chosen": -0.9112879633903503, "logits/rejected": -1.1001932621002197, "logps/chosen": -251.1482391357422, "logps/rejected": -257.7167053222656, "loss": 0.5869, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8083968162536621, "rewards/margins": 0.26524096727371216, "rewards/rejected": -1.073637843132019, "step": 180 }, { "epoch": 0.39443000866146977, "grad_norm": 7.402036456357543, "learning_rate": 3.792731896727196e-07, "logits/chosen": -0.8897370100021362, "logits/rejected": -1.091963768005371, "logps/chosen": -246.6190948486328, "logps/rejected": -268.6842041015625, "loss": 0.5851, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.8738805651664734, "rewards/margins": 0.3643074929714203, "rewards/rejected": -1.2381881475448608, "step": 185 }, { "epoch": 0.40509027916583384, "grad_norm": 7.32634230041485, "learning_rate": 3.712204911322228e-07, "logits/chosen": -0.8557780981063843, "logits/rejected": -1.057023286819458, "logps/chosen": -217.1138916015625, "logps/rejected": -232.2842254638672, "loss": 0.5838, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.7771707773208618, "rewards/margins": 0.2797245681285858, "rewards/rejected": -1.05689537525177, "step": 190 }, { "epoch": 0.41575054967019787, "grad_norm": 9.45088347010784, "learning_rate": 3.629998577741174e-07, "logits/chosen": -0.8742257952690125, "logits/rejected": -1.0490225553512573, "logps/chosen": -240.11489868164062, "logps/rejected": -265.6509094238281, "loss": 0.5864, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.8606696128845215, "rewards/margins": 0.3593491315841675, "rewards/rejected": -1.2200186252593994, "step": 195 }, { "epoch": 0.42641082017456194, "grad_norm": 8.652861206718594, "learning_rate": 3.546226781891501e-07, "logits/chosen": -0.8858518600463867, "logits/rejected": -1.0868691205978394, "logps/chosen": -266.2615051269531, "logps/rejected": -285.27703857421875, "loss": 0.5821, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.975814938545227, "rewards/margins": 0.4038930833339691, "rewards/rejected": -1.3797080516815186, "step": 200 }, { "epoch": 0.43707109067892597, "grad_norm": 9.648919264403354, "learning_rate": 3.461005578419791e-07, "logits/chosen": -0.8321302533149719, "logits/rejected": -1.0552650690078735, "logps/chosen": -253.7904815673828, "logps/rejected": -272.8400573730469, "loss": 0.588, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.9785162210464478, "rewards/margins": 0.3188565969467163, "rewards/rejected": -1.297372817993164, "step": 205 }, { "epoch": 0.44773136118329004, "grad_norm": 8.305774901520081, "learning_rate": 3.374453029933509e-07, "logits/chosen": -0.9058141708374023, "logits/rejected": -1.0458682775497437, "logps/chosen": -258.77069091796875, "logps/rejected": -279.82977294921875, "loss": 0.5823, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.9745637774467468, "rewards/margins": 0.3414529263973236, "rewards/rejected": -1.3160169124603271, "step": 210 }, { "epoch": 0.45839163168765407, "grad_norm": 8.730250055075079, "learning_rate": 3.286689043441015e-07, "logits/chosen": -0.8889232873916626, "logits/rejected": -1.12659752368927, "logps/chosen": -264.6424255371094, "logps/rejected": -273.76092529296875, "loss": 0.5905, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.9881819486618042, "rewards/margins": 0.31245288252830505, "rewards/rejected": -1.3006350994110107, "step": 215 }, { "epoch": 0.46905190219201814, "grad_norm": 9.464259902697126, "learning_rate": 3.197835204236402e-07, "logits/chosen": -0.9472643136978149, "logits/rejected": -1.142138123512268, "logps/chosen": -279.47662353515625, "logps/rejected": -311.5118103027344, "loss": 0.5629, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -1.133866548538208, "rewards/margins": 0.4763459265232086, "rewards/rejected": -1.6102125644683838, "step": 220 }, { "epoch": 0.47971217269638217, "grad_norm": 9.53110205637003, "learning_rate": 3.1080146074592877e-07, "logits/chosen": -0.8609586954116821, "logits/rejected": -1.1460800170898438, "logps/chosen": -280.66595458984375, "logps/rejected": -307.8553771972656, "loss": 0.5514, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.1233617067337036, "rewards/margins": 0.49458152055740356, "rewards/rejected": -1.6179431676864624, "step": 225 }, { "epoch": 0.49037244320074624, "grad_norm": 10.766670968073823, "learning_rate": 3.017351687562928e-07, "logits/chosen": -0.869361400604248, "logits/rejected": -1.071195125579834, "logps/chosen": -287.5640869140625, "logps/rejected": -315.25347900390625, "loss": 0.5665, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.2507811784744263, "rewards/margins": 0.4507381319999695, "rewards/rejected": -1.7015190124511719, "step": 230 }, { "epoch": 0.5010327137051103, "grad_norm": 8.57346401837084, "learning_rate": 2.925972045926878e-07, "logits/chosen": -0.9069381952285767, "logits/rejected": -1.0885123014450073, "logps/chosen": -276.06878662109375, "logps/rejected": -302.81072998046875, "loss": 0.5677, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.1936795711517334, "rewards/margins": 0.44402870535850525, "rewards/rejected": -1.6377084255218506, "step": 235 }, { "epoch": 0.5116929842094743, "grad_norm": 8.335769499664682, "learning_rate": 2.83400227685304e-07, "logits/chosen": -0.926740288734436, "logits/rejected": -1.188207983970642, "logps/chosen": -272.0440979003906, "logps/rejected": -291.0050964355469, "loss": 0.5609, "rewards/accuracies": 0.75, "rewards/chosen": -1.1271604299545288, "rewards/margins": 0.37117230892181396, "rewards/rejected": -1.4983327388763428, "step": 240 }, { "epoch": 0.5223532547138383, "grad_norm": 8.95305553011223, "learning_rate": 2.7415697921861525e-07, "logits/chosen": -0.8435291051864624, "logits/rejected": -1.072458028793335, "logps/chosen": -263.8363952636719, "logps/rejected": -289.58270263671875, "loss": 0.552, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.0684736967086792, "rewards/margins": 0.43612685799598694, "rewards/rejected": -1.5046006441116333, "step": 245 }, { "epoch": 0.5330135252182024, "grad_norm": 10.305199478555215, "learning_rate": 2.6488026448016686e-07, "logits/chosen": -0.9254539608955383, "logits/rejected": -1.1660327911376953, "logps/chosen": -287.7872009277344, "logps/rejected": -306.3985290527344, "loss": 0.5594, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.1574687957763672, "rewards/margins": 0.37755414843559265, "rewards/rejected": -1.5350229740142822, "step": 250 }, { "epoch": 0.5436737957225665, "grad_norm": 9.11035884736237, "learning_rate": 2.5558293512055923e-07, "logits/chosen": -0.8859409093856812, "logits/rejected": -1.1229826211929321, "logps/chosen": -278.84051513671875, "logps/rejected": -311.79669189453125, "loss": 0.5571, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.2464487552642822, "rewards/margins": 0.48425453901290894, "rewards/rejected": -1.730703353881836, "step": 255 }, { "epoch": 0.5543340662269305, "grad_norm": 9.443455019352353, "learning_rate": 2.4627787134919946e-07, "logits/chosen": -0.8607537150382996, "logits/rejected": -1.067083716392517, "logps/chosen": -306.5609130859375, "logps/rejected": -340.9252014160156, "loss": 0.559, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4955613613128662, "rewards/margins": 0.5148967504501343, "rewards/rejected": -2.01045823097229, "step": 260 }, { "epoch": 0.5649943367312945, "grad_norm": 10.020105882711649, "learning_rate": 2.369779640904909e-07, "logits/chosen": -0.9872435331344604, "logits/rejected": -1.1790921688079834, "logps/chosen": -301.1463928222656, "logps/rejected": -326.53509521484375, "loss": 0.5522, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.46715247631073, "rewards/margins": 0.45322275161743164, "rewards/rejected": -1.9203754663467407, "step": 265 }, { "epoch": 0.5756546072356586, "grad_norm": 9.230369920285517, "learning_rate": 2.2769609712517602e-07, "logits/chosen": -0.9972273707389832, "logits/rejected": -1.139904499053955, "logps/chosen": -310.1788635253906, "logps/rejected": -328.85455322265625, "loss": 0.5693, "rewards/accuracies": 0.71875, "rewards/chosen": -1.3879780769348145, "rewards/margins": 0.4023415446281433, "rewards/rejected": -1.7903196811676025, "step": 270 }, { "epoch": 0.5863148777400227, "grad_norm": 9.773551123939216, "learning_rate": 2.184451292415778e-07, "logits/chosen": -0.9245126843452454, "logits/rejected": -1.0917091369628906, "logps/chosen": -265.5910949707031, "logps/rejected": -292.25726318359375, "loss": 0.5625, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.123450517654419, "rewards/margins": 0.4249204099178314, "rewards/rejected": -1.5483709573745728, "step": 275 }, { "epoch": 0.5969751482443867, "grad_norm": 9.944866138311095, "learning_rate": 2.0923787642146434e-07, "logits/chosen": -0.8810575604438782, "logits/rejected": -1.0941672325134277, "logps/chosen": -280.61279296875, "logps/rejected": -312.9557800292969, "loss": 0.552, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.2670402526855469, "rewards/margins": 0.519837498664856, "rewards/rejected": -1.7868778705596924, "step": 280 }, { "epoch": 0.6076354187487507, "grad_norm": 9.880910925618455, "learning_rate": 2.0008709408521507e-07, "logits/chosen": -0.9383381009101868, "logits/rejected": -1.1827994585037231, "logps/chosen": -295.6000671386719, "logps/rejected": -324.3331604003906, "loss": 0.5407, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.2447686195373535, "rewards/margins": 0.5489395260810852, "rewards/rejected": -1.793708086013794, "step": 285 }, { "epoch": 0.6182956892531148, "grad_norm": 10.071491320024812, "learning_rate": 1.9100545942088848e-07, "logits/chosen": -0.9224274754524231, "logits/rejected": -1.1538960933685303, "logps/chosen": -289.017578125, "logps/rejected": -325.94952392578125, "loss": 0.5457, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.2537972927093506, "rewards/margins": 0.5672923922538757, "rewards/rejected": -1.821089744567871, "step": 290 }, { "epoch": 0.6289559597574789, "grad_norm": 11.845857689113707, "learning_rate": 1.8200555382166898e-07, "logits/chosen": -0.9387105107307434, "logits/rejected": -1.1250282526016235, "logps/chosen": -318.4964294433594, "logps/rejected": -338.69696044921875, "loss": 0.5696, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.5140787363052368, "rewards/margins": 0.4427851140499115, "rewards/rejected": -1.9568637609481812, "step": 295 }, { "epoch": 0.6396162302618429, "grad_norm": 10.971903527074975, "learning_rate": 1.7309984545602528e-07, "logits/chosen": -0.9286500215530396, "logits/rejected": -1.1137937307357788, "logps/chosen": -279.747802734375, "logps/rejected": -307.8285217285156, "loss": 0.5376, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.323687195777893, "rewards/margins": 0.48056259751319885, "rewards/rejected": -1.8042497634887695, "step": 300 }, { "epoch": 0.6502765007662069, "grad_norm": 10.964118734413244, "learning_rate": 1.6430067199472657e-07, "logits/chosen": -0.9661188125610352, "logits/rejected": -1.1719661951065063, "logps/chosen": -294.7871398925781, "logps/rejected": -329.8990783691406, "loss": 0.5342, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.3090574741363525, "rewards/margins": 0.5292733907699585, "rewards/rejected": -1.838330864906311, "step": 305 }, { "epoch": 0.660936771270571, "grad_norm": 11.086382549521785, "learning_rate": 1.5562022351864534e-07, "logits/chosen": -0.9217275381088257, "logits/rejected": -1.1163594722747803, "logps/chosen": -266.56402587890625, "logps/rejected": -306.4192810058594, "loss": 0.5437, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.1430429220199585, "rewards/margins": 0.5940698981285095, "rewards/rejected": -1.7371127605438232, "step": 310 }, { "epoch": 0.6715970417749351, "grad_norm": 10.957109584007643, "learning_rate": 1.4707052563102748e-07, "logits/chosen": -0.8743804097175598, "logits/rejected": -1.0983814001083374, "logps/chosen": -285.22607421875, "logps/rejected": -317.2628173828125, "loss": 0.5298, "rewards/accuracies": 0.75, "rewards/chosen": -1.3059532642364502, "rewards/margins": 0.5242554545402527, "rewards/rejected": -1.8302087783813477, "step": 315 }, { "epoch": 0.6822573122792991, "grad_norm": 10.507330109558843, "learning_rate": 1.386634227976224e-07, "logits/chosen": -0.9597967863082886, "logits/rejected": -1.124963402748108, "logps/chosen": -286.6432189941406, "logps/rejected": -315.79937744140625, "loss": 0.5378, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.3352241516113281, "rewards/margins": 0.4382667541503906, "rewards/rejected": -1.7734909057617188, "step": 320 }, { "epoch": 0.6929175827836631, "grad_norm": 9.804790546339078, "learning_rate": 1.3041056193775665e-07, "logits/chosen": -0.888710618019104, "logits/rejected": -1.0851693153381348, "logps/chosen": -311.01544189453125, "logps/rejected": -332.7283020019531, "loss": 0.5475, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.5570933818817139, "rewards/margins": 0.4053064286708832, "rewards/rejected": -1.9623997211456299, "step": 325 }, { "epoch": 0.7035778532880271, "grad_norm": 9.630550808372668, "learning_rate": 1.2232337628908103e-07, "logits/chosen": -0.9582077264785767, "logits/rejected": -1.1537044048309326, "logps/chosen": -326.71221923828125, "logps/rejected": -377.6993713378906, "loss": 0.5435, "rewards/accuracies": 0.71875, "rewards/chosen": -1.4935967922210693, "rewards/margins": 0.7231054902076721, "rewards/rejected": -2.2167022228240967, "step": 330 }, { "epoch": 0.7142381237923913, "grad_norm": 9.172032682717258, "learning_rate": 1.1441306956834504e-07, "logits/chosen": -0.9413734674453735, "logits/rejected": -1.1069329977035522, "logps/chosen": -306.80218505859375, "logps/rejected": -357.0929870605469, "loss": 0.5238, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.4035927057266235, "rewards/margins": 0.6626663208007812, "rewards/rejected": -2.0662589073181152, "step": 335 }, { "epoch": 0.7248983942967553, "grad_norm": 10.907598822157487, "learning_rate": 1.0669060045014214e-07, "logits/chosen": -1.0222991704940796, "logits/rejected": -1.228389024734497, "logps/chosen": -316.627197265625, "logps/rejected": -357.66229248046875, "loss": 0.5388, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -1.4493268728256226, "rewards/margins": 0.5827343463897705, "rewards/rejected": -2.0320611000061035, "step": 340 }, { "epoch": 0.7355586648011193, "grad_norm": 10.97300975462713, "learning_rate": 9.9166667385128e-08, "logits/chosen": -0.963638186454773, "logits/rejected": -1.1757190227508545, "logps/chosen": -304.3102722167969, "logps/rejected": -354.2998962402344, "loss": 0.5432, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.4618219137191772, "rewards/margins": 0.7080960273742676, "rewards/rejected": -2.1699178218841553, "step": 345 }, { "epoch": 0.7462189353054833, "grad_norm": 9.89897013382996, "learning_rate": 9.185169377874488e-08, "logits/chosen": -0.9903243780136108, "logits/rejected": -1.1469306945800781, "logps/chosen": -312.1212158203125, "logps/rejected": -346.9307861328125, "loss": 0.5252, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.5106861591339111, "rewards/margins": 0.49892768263816833, "rewards/rejected": -2.0096137523651123, "step": 350 }, { "epoch": 0.7568792058098475, "grad_norm": 10.018680833325265, "learning_rate": 8.475581355098379e-08, "logits/chosen": -0.9698395729064941, "logits/rejected": -1.1572554111480713, "logps/chosen": -304.4853820800781, "logps/rejected": -342.16827392578125, "loss": 0.5462, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -1.4320096969604492, "rewards/margins": 0.5366055965423584, "rewards/rejected": -1.968615174293518, "step": 355 }, { "epoch": 0.7675394763142115, "grad_norm": 11.03385142626086, "learning_rate": 7.788885709719033e-08, "logits/chosen": -0.9215399622917175, "logits/rejected": -1.1144723892211914, "logps/chosen": -316.9365234375, "logps/rejected": -359.6341857910156, "loss": 0.5392, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.580185890197754, "rewards/margins": 0.564557671546936, "rewards/rejected": -2.1447434425354004, "step": 360 }, { "epoch": 0.7781997468185755, "grad_norm": 9.523737016870674, "learning_rate": 7.126033766936365e-08, "logits/chosen": -0.9409270286560059, "logits/rejected": -1.124208688735962, "logps/chosen": -311.7746276855469, "logps/rejected": -355.46343994140625, "loss": 0.536, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.5002214908599854, "rewards/margins": 0.5499864816665649, "rewards/rejected": -2.05020809173584, "step": 365 }, { "epoch": 0.7888600173229395, "grad_norm": 11.210638577879926, "learning_rate": 6.487943819681488e-08, "logits/chosen": -0.9616110920906067, "logits/rejected": -1.0974061489105225, "logps/chosen": -315.260009765625, "logps/rejected": -357.67059326171875, "loss": 0.5533, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.509570837020874, "rewards/margins": 0.537238597869873, "rewards/rejected": -2.046809434890747, "step": 370 }, { "epoch": 0.7995202878273037, "grad_norm": 9.781063018210089, "learning_rate": 5.875499856444358e-08, "logits/chosen": -0.9564340710639954, "logits/rejected": -1.1133265495300293, "logps/chosen": -314.17535400390625, "logps/rejected": -351.45001220703125, "loss": 0.5458, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.493622064590454, "rewards/margins": 0.5427702069282532, "rewards/rejected": -2.0363922119140625, "step": 375 }, { "epoch": 0.8101805583316677, "grad_norm": 11.983119955061767, "learning_rate": 5.289550336625731e-08, "logits/chosen": -1.0206782817840576, "logits/rejected": -1.2104320526123047, "logps/chosen": -327.4963684082031, "logps/rejected": -353.74603271484375, "loss": 0.5474, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -1.506259560585022, "rewards/margins": 0.49152374267578125, "rewards/rejected": -1.9977830648422241, "step": 380 }, { "epoch": 0.8208408288360317, "grad_norm": 10.83148544527409, "learning_rate": 4.730907015109759e-08, "logits/chosen": -0.9245961308479309, "logits/rejected": -1.1795787811279297, "logps/chosen": -309.1303405761719, "logps/rejected": -346.46051025390625, "loss": 0.5403, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.5297610759735107, "rewards/margins": 0.5533354878425598, "rewards/rejected": -2.083096742630005, "step": 385 }, { "epoch": 0.8315010993403957, "grad_norm": 9.500539654945461, "learning_rate": 4.200343817685981e-08, "logits/chosen": -0.9566155672073364, "logits/rejected": -1.0963544845581055, "logps/chosen": -313.0601501464844, "logps/rejected": -343.36773681640625, "loss": 0.547, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.5300524234771729, "rewards/margins": 0.4933779835700989, "rewards/rejected": -2.023430347442627, "step": 390 }, { "epoch": 0.8421613698447599, "grad_norm": 9.955855605589283, "learning_rate": 3.698595768878363e-08, "logits/chosen": -0.9913743734359741, "logits/rejected": -1.180884599685669, "logps/chosen": -311.83636474609375, "logps/rejected": -356.932373046875, "loss": 0.5178, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.429694414138794, "rewards/margins": 0.6187530755996704, "rewards/rejected": -2.048447370529175, "step": 395 }, { "epoch": 0.8528216403491239, "grad_norm": 11.149747005186983, "learning_rate": 3.226357973666888e-08, "logits/chosen": -1.0238213539123535, "logits/rejected": -1.1811949014663696, "logps/chosen": -332.1514587402344, "logps/rejected": -359.03167724609375, "loss": 0.5505, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.6280012130737305, "rewards/margins": 0.43937546014785767, "rewards/rejected": -2.0673766136169434, "step": 400 }, { "epoch": 0.8528216403491239, "eval_logits/chosen": -0.9705477356910706, "eval_logits/rejected": -1.165926456451416, "eval_logps/chosen": -307.21051025390625, "eval_logps/rejected": -356.52508544921875, "eval_loss": 0.5049245953559875, "eval_rewards/accuracies": 0.7932573556900024, "eval_rewards/chosen": -1.4455755949020386, "eval_rewards/margins": 0.6763937473297119, "eval_rewards/rejected": -2.12196946144104, "eval_runtime": 11441.6179, "eval_samples_per_second": 5.247, "eval_steps_per_second": 1.312, "step": 400 }, { "epoch": 0.8634819108534879, "grad_norm": 9.468787134199466, "learning_rate": 2.7842846545123505e-08, "logits/chosen": -0.9555789232254028, "logits/rejected": -1.1705703735351562, "logps/chosen": -289.6531677246094, "logps/rejected": -345.7925720214844, "loss": 0.5233, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -1.3922350406646729, "rewards/margins": 0.6980171203613281, "rewards/rejected": -2.090252161026001, "step": 405 }, { "epoch": 0.8741421813578519, "grad_norm": 10.178761020491258, "learning_rate": 2.372988245018401e-08, "logits/chosen": -0.9851318597793579, "logits/rejected": -1.1668522357940674, "logps/chosen": -316.6786193847656, "logps/rejected": -362.8905944824219, "loss": 0.5423, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.598661184310913, "rewards/margins": 0.608306884765625, "rewards/rejected": -2.206967830657959, "step": 410 }, { "epoch": 0.884802451862216, "grad_norm": 9.329485481095736, "learning_rate": 1.9930385414865386e-08, "logits/chosen": -1.0145405530929565, "logits/rejected": -1.2289698123931885, "logps/chosen": -336.15087890625, "logps/rejected": -373.11309814453125, "loss": 0.5293, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.558721899986267, "rewards/margins": 0.6198412775993347, "rewards/rejected": -2.178563356399536, "step": 415 }, { "epoch": 0.8954627223665801, "grad_norm": 9.690686562397088, "learning_rate": 1.6449619135393084e-08, "logits/chosen": -0.9239746928215027, "logits/rejected": -1.1881077289581299, "logps/chosen": -296.87200927734375, "logps/rejected": -329.9718017578125, "loss": 0.5513, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.472847580909729, "rewards/margins": 0.5113754868507385, "rewards/rejected": -1.9842230081558228, "step": 420 }, { "epoch": 0.9061229928709441, "grad_norm": 10.862769817255897, "learning_rate": 1.329240574905452e-08, "logits/chosen": -0.9023639559745789, "logits/rejected": -1.0890004634857178, "logps/chosen": -324.7179260253906, "logps/rejected": -374.7180480957031, "loss": 0.5149, "rewards/accuracies": 0.78125, "rewards/chosen": -1.5423232316970825, "rewards/margins": 0.6671528816223145, "rewards/rejected": -2.2094759941101074, "step": 425 }, { "epoch": 0.9167832633753081, "grad_norm": 11.35977235393007, "learning_rate": 1.0463119153770989e-08, "logits/chosen": -0.9444347620010376, "logits/rejected": -1.1702197790145874, "logps/chosen": -298.4215393066406, "logps/rejected": -328.64215087890625, "loss": 0.5404, "rewards/accuracies": 0.65625, "rewards/chosen": -1.4311974048614502, "rewards/margins": 0.5026859045028687, "rewards/rejected": -1.9338833093643188, "step": 430 }, { "epoch": 0.9274435338796722, "grad_norm": 10.068213055827782, "learning_rate": 7.965678948645832e-09, "logits/chosen": -0.9912747144699097, "logits/rejected": -1.2084077596664429, "logps/chosen": -336.46929931640625, "logps/rejected": -379.56640625, "loss": 0.538, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6182082891464233, "rewards/margins": 0.6836891174316406, "rewards/rejected": -2.3018975257873535, "step": 435 }, { "epoch": 0.9381038043840363, "grad_norm": 12.790282190393167, "learning_rate": 5.803545003882554e-09, "logits/chosen": -0.9938758015632629, "logits/rejected": -1.17817223072052, "logps/chosen": -326.2915954589844, "logps/rejected": -371.28631591796875, "loss": 0.5377, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.5600776672363281, "rewards/margins": 0.5917671918869019, "rewards/rejected": -2.1518447399139404, "step": 440 }, { "epoch": 0.9487640748884003, "grad_norm": 9.050016131957404, "learning_rate": 3.979712667596669e-09, "logits/chosen": -0.9720270037651062, "logits/rejected": -1.1488044261932373, "logps/chosen": -304.312255859375, "logps/rejected": -351.5962219238281, "loss": 0.5199, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -1.4655094146728516, "rewards/margins": 0.6790416240692139, "rewards/rejected": -2.1445512771606445, "step": 445 }, { "epoch": 0.9594243453927643, "grad_norm": 13.159010993827899, "learning_rate": 2.4967086161600814e-09, "logits/chosen": -0.994873046875, "logits/rejected": -1.1672512292861938, "logps/chosen": -314.894287109375, "logps/rejected": -354.23223876953125, "loss": 0.5276, "rewards/accuracies": 0.75, "rewards/chosen": -1.5018284320831299, "rewards/margins": 0.5567340850830078, "rewards/rejected": -2.0585622787475586, "step": 450 }, { "epoch": 0.9700846158971284, "grad_norm": 9.906738715572994, "learning_rate": 1.3565873538283757e-09, "logits/chosen": -0.9630732536315918, "logits/rejected": -1.1276707649230957, "logps/chosen": -306.04345703125, "logps/rejected": -351.21099853515625, "loss": 0.5208, "rewards/accuracies": 0.75, "rewards/chosen": -1.395446538925171, "rewards/margins": 0.6138492822647095, "rewards/rejected": -2.009295701980591, "step": 455 }, { "epoch": 0.9807448864014925, "grad_norm": 10.687835024200046, "learning_rate": 5.609283664990693e-10, "logits/chosen": -0.9506285786628723, "logits/rejected": -1.20163094997406, "logps/chosen": -323.80657958984375, "logps/rejected": -370.2672424316406, "loss": 0.5199, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -1.5296146869659424, "rewards/margins": 0.6610507369041443, "rewards/rejected": -2.1906654834747314, "step": 460 }, { "epoch": 0.9914051569058565, "grad_norm": 11.797447945184583, "learning_rate": 1.1083393354488491e-10, "logits/chosen": -0.9356955289840698, "logits/rejected": -1.1217402219772339, "logps/chosen": -326.0872497558594, "logps/rejected": -382.658203125, "loss": 0.5263, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.588428020477295, "rewards/margins": 0.7401828169822693, "rewards/rejected": -2.328610897064209, "step": 465 }, { "epoch": 0.9999333733093477, "step": 469, "total_flos": 0.0, "train_loss": 0.5891387982409138, "train_runtime": 37343.5856, "train_samples_per_second": 1.608, "train_steps_per_second": 0.013 } ], "logging_steps": 5, "max_steps": 469, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }