{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9765925925925925, "eval_steps": 500, "global_step": 315, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.047407407407407405, "grad_norm": 1313.8543701171875, "learning_rate": 7.8125e-06, "log_odds_chosen": 1.6327810287475586, "log_odds_ratio": -11.146058082580566, "logps/chosen": -21.960407257080078, "logps/rejected": -23.59285545349121, "loss": 881.0415, "nll_loss": 8.637601852416992, "rewards/accuracies": 0.534375011920929, "rewards/chosen": -10.980203628540039, "rewards/margins": 0.8162234425544739, "rewards/rejected": -11.796427726745605, "step": 5 }, { "epoch": 0.09481481481481481, "grad_norm": 958.0737915039062, "learning_rate": 1.5625e-05, "log_odds_chosen": 1.0805047750473022, "log_odds_ratio": -8.258191108703613, "logps/chosen": -19.551382064819336, "logps/rejected": -20.631277084350586, "loss": 757.7502, "nll_loss": 7.813385009765625, "rewards/accuracies": 0.528124988079071, "rewards/chosen": -9.775691032409668, "rewards/margins": 0.5399460792541504, "rewards/rejected": -10.315638542175293, "step": 10 }, { "epoch": 0.14222222222222222, "grad_norm": 1503.8668212890625, "learning_rate": 2.34375e-05, "log_odds_chosen": 4.2659807205200195, "log_odds_ratio": -6.569916725158691, "logps/chosen": -17.423053741455078, "logps/rejected": -21.687484741210938, "loss": 662.6386, "nll_loss": 7.951455116271973, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -8.711526870727539, "rewards/margins": 2.132215976715088, "rewards/rejected": -10.843742370605469, "step": 15 }, { "epoch": 0.18962962962962962, "grad_norm": 5612.318359375, "learning_rate": 3.125e-05, "log_odds_chosen": 0.5963099598884583, "log_odds_ratio": -4.979976177215576, "logps/chosen": -11.640253067016602, "logps/rejected": -12.238527297973633, "loss": 452.0445, "nll_loss": 6.280893802642822, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -5.820126533508301, "rewards/margins": 0.29913684725761414, "rewards/rejected": -6.119263648986816, "step": 20 }, { "epoch": 0.23703703703703705, "grad_norm": 448.8934326171875, "learning_rate": 3.90625e-05, "log_odds_chosen": 0.5738601684570312, "log_odds_ratio": -0.9728918075561523, "logps/chosen": -2.5826029777526855, "logps/rejected": -3.132516860961914, "loss": 97.8864, "nll_loss": 3.112938404083252, "rewards/accuracies": 0.59375, "rewards/chosen": -1.2913014888763428, "rewards/margins": 0.27495700120925903, "rewards/rejected": -1.566258430480957, "step": 25 }, { "epoch": 0.28444444444444444, "grad_norm": 193.44644165039062, "learning_rate": 4.6875e-05, "log_odds_chosen": 0.14733314514160156, "log_odds_ratio": -0.8188334703445435, "logps/chosen": -1.7538366317749023, "logps/rejected": -1.880671739578247, "loss": 68.7921, "nll_loss": 2.9175949096679688, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.8769183158874512, "rewards/margins": 0.0634174793958664, "rewards/rejected": -0.9403358697891235, "step": 30 }, { "epoch": 0.33185185185185184, "grad_norm": 169.11569213867188, "learning_rate": 4.998613757348784e-05, "log_odds_chosen": 0.1972377598285675, "log_odds_ratio": -0.7700116038322449, "logps/chosen": -1.5413159132003784, "logps/rejected": -1.7179752588272095, "loss": 61.1452, "nll_loss": 2.3813323974609375, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.7706579566001892, "rewards/margins": 0.08832962810993195, "rewards/rejected": -0.8589876294136047, "step": 35 }, { "epoch": 0.37925925925925924, "grad_norm": 250.76620483398438, "learning_rate": 4.990147841143462e-05, "log_odds_chosen": 0.23389403522014618, "log_odds_ratio": -0.7099635004997253, "logps/chosen": -1.4198099374771118, "logps/rejected": -1.616281270980835, "loss": 56.518, "nll_loss": 2.3243794441223145, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.7099049687385559, "rewards/margins": 0.09823578596115112, "rewards/rejected": -0.8081406354904175, "step": 40 }, { "epoch": 0.4266666666666667, "grad_norm": 279.9685363769531, "learning_rate": 4.97401218720448e-05, "log_odds_chosen": 0.1506054848432541, "log_odds_ratio": -0.7535517811775208, "logps/chosen": -1.3751205205917358, "logps/rejected": -1.4878621101379395, "loss": 55.5771, "nll_loss": 2.230128049850464, "rewards/accuracies": 0.534375011920929, "rewards/chosen": -0.6875602602958679, "rewards/margins": 0.056370723992586136, "rewards/rejected": -0.7439310550689697, "step": 45 }, { "epoch": 0.4740740740740741, "grad_norm": 382.7770080566406, "learning_rate": 4.9502564938797946e-05, "log_odds_chosen": 0.19314703345298767, "log_odds_ratio": -0.7266248464584351, "logps/chosen": -1.3842805624008179, "logps/rejected": -1.5405880212783813, "loss": 55.5381, "nll_loss": 2.407309055328369, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.6921402812004089, "rewards/margins": 0.07815368473529816, "rewards/rejected": -0.7702940106391907, "step": 50 }, { "epoch": 0.5214814814814814, "grad_norm": 130.2970733642578, "learning_rate": 4.918953929490768e-05, "log_odds_chosen": 0.1594429314136505, "log_odds_ratio": -0.7218093276023865, "logps/chosen": -1.2733328342437744, "logps/rejected": -1.3996423482894897, "loss": 51.8507, "nll_loss": 2.147927761077881, "rewards/accuracies": 0.534375011920929, "rewards/chosen": -0.6366664171218872, "rewards/margins": 0.06315477192401886, "rewards/rejected": -0.6998211741447449, "step": 55 }, { "epoch": 0.5688888888888889, "grad_norm": 134.74240112304688, "learning_rate": 4.88020090697132e-05, "log_odds_chosen": 0.2484438121318817, "log_odds_ratio": -0.6789853572845459, "logps/chosen": -1.2255313396453857, "logps/rejected": -1.4233750104904175, "loss": 49.8412, "nll_loss": 2.2157835960388184, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.6127656698226929, "rewards/margins": 0.09892191737890244, "rewards/rejected": -0.7116875052452087, "step": 60 }, { "epoch": 0.6162962962962963, "grad_norm": 118.31177520751953, "learning_rate": 4.834116786912897e-05, "log_odds_chosen": 0.2591857612133026, "log_odds_ratio": -0.6711713075637817, "logps/chosen": -1.2236008644104004, "logps/rejected": -1.4167249202728271, "loss": 49.7015, "nll_loss": 2.0784411430358887, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.6118004322052002, "rewards/margins": 0.0965619757771492, "rewards/rejected": -0.7083624601364136, "step": 65 }, { "epoch": 0.6637037037037037, "grad_norm": 102.92163848876953, "learning_rate": 4.7808435099299045e-05, "log_odds_chosen": 0.2674064040184021, "log_odds_ratio": -0.675905168056488, "logps/chosen": -1.2073343992233276, "logps/rejected": -1.4260364770889282, "loss": 49.125, "nll_loss": 1.9744670391082764, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.6036671996116638, "rewards/margins": 0.10935105383396149, "rewards/rejected": -0.7130182385444641, "step": 70 }, { "epoch": 0.7111111111111111, "grad_norm": 305.7223815917969, "learning_rate": 4.720545159477922e-05, "log_odds_chosen": 0.28773313760757446, "log_odds_ratio": -0.6539745926856995, "logps/chosen": -1.1219004392623901, "logps/rejected": -1.327695608139038, "loss": 46.259, "nll_loss": 1.9466793537139893, "rewards/accuracies": 0.625, "rewards/chosen": -0.5609502196311951, "rewards/margins": 0.1028975397348404, "rewards/rejected": -0.663847804069519, "step": 75 }, { "epoch": 0.7585185185185185, "grad_norm": 444.95172119140625, "learning_rate": 4.653407456471222e-05, "log_odds_chosen": 0.2160220444202423, "log_odds_ratio": -0.6895222663879395, "logps/chosen": -1.1895593404769897, "logps/rejected": -1.3464289903640747, "loss": 48.854, "nll_loss": 1.8920223712921143, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.5947796702384949, "rewards/margins": 0.07843481004238129, "rewards/rejected": -0.6732144951820374, "step": 80 }, { "epoch": 0.8059259259259259, "grad_norm": 134.1718292236328, "learning_rate": 4.579637187256222e-05, "log_odds_chosen": 0.31953853368759155, "log_odds_ratio": -0.650363564491272, "logps/chosen": -1.1080071926116943, "logps/rejected": -1.349273681640625, "loss": 45.7072, "nll_loss": 1.8154582977294922, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.5540035963058472, "rewards/margins": 0.12063322216272354, "rewards/rejected": -0.6746368408203125, "step": 85 }, { "epoch": 0.8533333333333334, "grad_norm": 288.4284362792969, "learning_rate": 4.499461566702685e-05, "log_odds_chosen": 0.21705381572246552, "log_odds_ratio": -0.6870957016944885, "logps/chosen": -1.1290249824523926, "logps/rejected": -1.2838109731674194, "loss": 46.8203, "nll_loss": 1.9802055358886719, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.5645124912261963, "rewards/margins": 0.07739301770925522, "rewards/rejected": -0.6419054865837097, "step": 90 }, { "epoch": 0.9007407407407407, "grad_norm": 234.74221801757812, "learning_rate": 4.413127538374411e-05, "log_odds_chosen": 0.23379310965538025, "log_odds_ratio": -0.6703908443450928, "logps/chosen": -1.0899484157562256, "logps/rejected": -1.263106346130371, "loss": 45.3973, "nll_loss": 1.8875010013580322, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.5449742078781128, "rewards/margins": 0.08657898008823395, "rewards/rejected": -0.6315531730651855, "step": 95 }, { "epoch": 0.9481481481481482, "grad_norm": 112.56975555419922, "learning_rate": 4.320901013934887e-05, "log_odds_chosen": 0.16518335044384003, "log_odds_ratio": -0.7109084129333496, "logps/chosen": -1.1009365320205688, "logps/rejected": -1.238239049911499, "loss": 46.0418, "nll_loss": 1.8534952402114868, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.5504682660102844, "rewards/margins": 0.06865125149488449, "rewards/rejected": -0.6191195249557495, "step": 100 }, { "epoch": 0.9955555555555555, "grad_norm": 136.2017059326172, "learning_rate": 4.223066054130568e-05, "log_odds_chosen": 0.23913511633872986, "log_odds_ratio": -0.6585836410522461, "logps/chosen": -1.0759801864624023, "logps/rejected": -1.2537977695465088, "loss": 44.7828, "nll_loss": 1.795069694519043, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5379900932312012, "rewards/margins": 0.08890879154205322, "rewards/rejected": -0.6268988847732544, "step": 105 }, { "epoch": 1.037925925925926, "grad_norm": 67.5390625, "learning_rate": 4.1199239938743797e-05, "log_odds_chosen": 0.6602018475532532, "log_odds_ratio": -0.5334572196006775, "logps/chosen": -0.9451561570167542, "logps/rejected": -1.404151439666748, "loss": 35.0306, "nll_loss": 1.7614768743515015, "rewards/accuracies": 0.7307692170143127, "rewards/chosen": -0.4725780785083771, "rewards/margins": 0.22949755191802979, "rewards/rejected": -0.702075719833374, "step": 110 }, { "epoch": 1.0853333333333333, "grad_norm": 89.68997192382812, "learning_rate": 4.0117925141242174e-05, "log_odds_chosen": 0.8264390230178833, "log_odds_ratio": -0.46329426765441895, "logps/chosen": -0.9025434255599976, "logps/rejected": -1.4511505365371704, "loss": 37.0855, "nll_loss": 1.7627713680267334, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4512717127799988, "rewards/margins": 0.27430346608161926, "rewards/rejected": -0.7255752682685852, "step": 115 }, { "epoch": 1.1327407407407408, "grad_norm": 85.83277130126953, "learning_rate": 3.899004663415084e-05, "log_odds_chosen": 0.888095498085022, "log_odds_ratio": -0.45123091340065, "logps/chosen": -0.8698997497558594, "logps/rejected": -1.456084966659546, "loss": 35.8665, "nll_loss": 2.0131936073303223, "rewards/accuracies": 0.7906249761581421, "rewards/chosen": -0.4349498748779297, "rewards/margins": 0.2930925786495209, "rewards/rejected": -0.728042483329773, "step": 120 }, { "epoch": 1.1801481481481482, "grad_norm": 75.02178955078125, "learning_rate": 3.781907832058587e-05, "log_odds_chosen": 0.8515494465827942, "log_odds_ratio": -0.46829432249069214, "logps/chosen": -0.8703139424324036, "logps/rejected": -1.4456019401550293, "loss": 36.0252, "nll_loss": 1.8564857244491577, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.4351569712162018, "rewards/margins": 0.28764399886131287, "rewards/rejected": -0.7228009700775146, "step": 125 }, { "epoch": 1.2275555555555555, "grad_norm": 93.19149017333984, "learning_rate": 3.660862682169282e-05, "log_odds_chosen": 0.7404316663742065, "log_odds_ratio": -0.49976396560668945, "logps/chosen": -0.8461529016494751, "logps/rejected": -1.305490255355835, "loss": 35.7775, "nll_loss": 1.7167637348175049, "rewards/accuracies": 0.746874988079071, "rewards/chosen": -0.42307645082473755, "rewards/margins": 0.22966866195201874, "rewards/rejected": -0.6527451276779175, "step": 130 }, { "epoch": 1.274962962962963, "grad_norm": 79.68480682373047, "learning_rate": 3.5362420368134356e-05, "log_odds_chosen": 0.8560611009597778, "log_odds_ratio": -0.4482923150062561, "logps/chosen": -0.8573166728019714, "logps/rejected": -1.4259978532791138, "loss": 35.4271, "nll_loss": 1.7739051580429077, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.4286583364009857, "rewards/margins": 0.2843405604362488, "rewards/rejected": -0.7129989266395569, "step": 135 }, { "epoch": 1.3223703703703704, "grad_norm": 97.05864715576172, "learning_rate": 3.408429731701635e-05, "log_odds_chosen": 0.7707556486129761, "log_odds_ratio": -0.5003089308738708, "logps/chosen": -0.8969907760620117, "logps/rejected": -1.404831886291504, "loss": 37.3019, "nll_loss": 1.8181276321411133, "rewards/accuracies": 0.753125011920929, "rewards/chosen": -0.44849538803100586, "rewards/margins": 0.2539205849170685, "rewards/rejected": -0.702415943145752, "step": 140 }, { "epoch": 1.3697777777777778, "grad_norm": 94.64373016357422, "learning_rate": 3.2778194329621104e-05, "log_odds_chosen": 0.9273589253425598, "log_odds_ratio": -0.4503125548362732, "logps/chosen": -0.8693191409111023, "logps/rejected": -1.50619375705719, "loss": 35.7555, "nll_loss": 1.7783292531967163, "rewards/accuracies": 0.7906249761581421, "rewards/chosen": -0.43465957045555115, "rewards/margins": 0.31843727827072144, "rewards/rejected": -0.753096878528595, "step": 145 }, { "epoch": 1.417185185185185, "grad_norm": 112.79119110107422, "learning_rate": 3.144813424636031e-05, "log_odds_chosen": 0.733902096748352, "log_odds_ratio": -0.49021005630493164, "logps/chosen": -0.8194792866706848, "logps/rejected": -1.2824211120605469, "loss": 34.8116, "nll_loss": 1.736271619796753, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.4097396433353424, "rewards/margins": 0.23147086799144745, "rewards/rejected": -0.6412105560302734, "step": 150 }, { "epoch": 1.4645925925925927, "grad_norm": 120.8626480102539, "learning_rate": 3.0098213696293542e-05, "log_odds_chosen": 0.8592801094055176, "log_odds_ratio": -0.46693143248558044, "logps/chosen": -0.8628988265991211, "logps/rejected": -1.428763508796692, "loss": 35.8416, "nll_loss": 1.7393659353256226, "rewards/accuracies": 0.78125, "rewards/chosen": -0.43144941329956055, "rewards/margins": 0.2829323410987854, "rewards/rejected": -0.714381754398346, "step": 155 }, { "epoch": 1.512, "grad_norm": 128.55426025390625, "learning_rate": 2.8732590479375165e-05, "log_odds_chosen": 0.7666479349136353, "log_odds_ratio": -0.48797711730003357, "logps/chosen": -0.8693684339523315, "logps/rejected": -1.35294771194458, "loss": 36.3933, "nll_loss": 1.7232725620269775, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.43468421697616577, "rewards/margins": 0.2417895793914795, "rewards/rejected": -0.67647385597229, "step": 160 }, { "epoch": 1.5594074074074074, "grad_norm": 123.65845489501953, "learning_rate": 2.7355470760292956e-05, "log_odds_chosen": 0.9002155065536499, "log_odds_ratio": -0.4618147909641266, "logps/chosen": -0.85200035572052, "logps/rejected": -1.435723066329956, "loss": 35.4562, "nll_loss": 1.7452001571655273, "rewards/accuracies": 0.7906249761581421, "rewards/chosen": -0.42600017786026, "rewards/margins": 0.291861355304718, "rewards/rejected": -0.717861533164978, "step": 165 }, { "epoch": 1.6068148148148147, "grad_norm": 77.17584228515625, "learning_rate": 2.597109611334169e-05, "log_odds_chosen": 0.8772485852241516, "log_odds_ratio": -0.4688163697719574, "logps/chosen": -0.8404110074043274, "logps/rejected": -1.4133893251419067, "loss": 35.1622, "nll_loss": 1.7097526788711548, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4202055037021637, "rewards/margins": 0.28648921847343445, "rewards/rejected": -0.7066946625709534, "step": 170 }, { "epoch": 1.6542222222222223, "grad_norm": 91.94951629638672, "learning_rate": 2.458373045823404e-05, "log_odds_chosen": 0.859915554523468, "log_odds_ratio": -0.4548751711845398, "logps/chosen": -0.8396957516670227, "logps/rejected": -1.4033467769622803, "loss": 34.974, "nll_loss": 1.7587263584136963, "rewards/accuracies": 0.7906249761581421, "rewards/chosen": -0.41984787583351135, "rewards/margins": 0.2818255126476288, "rewards/rejected": -0.7016733884811401, "step": 175 }, { "epoch": 1.7016296296296296, "grad_norm": 75.54816436767578, "learning_rate": 2.3197646927086697e-05, "log_odds_chosen": 0.7710874080657959, "log_odds_ratio": -0.4820574223995209, "logps/chosen": -0.8513079881668091, "logps/rejected": -1.341399073600769, "loss": 35.7398, "nll_loss": 1.7565553188323975, "rewards/accuracies": 0.78125, "rewards/chosen": -0.42565399408340454, "rewards/margins": 0.24504557251930237, "rewards/rejected": -0.6706995368003845, "step": 180 }, { "epoch": 1.749037037037037, "grad_norm": 60.633155822753906, "learning_rate": 2.1817114703032176e-05, "log_odds_chosen": 0.9204598665237427, "log_odds_ratio": -0.4521242678165436, "logps/chosen": -0.8442584276199341, "logps/rejected": -1.4724090099334717, "loss": 35.0333, "nll_loss": 1.7535591125488281, "rewards/accuracies": 0.796875, "rewards/chosen": -0.42212921380996704, "rewards/margins": 0.3140752613544464, "rewards/rejected": -0.7362045049667358, "step": 185 }, { "epoch": 1.7964444444444445, "grad_norm": 58.7163200378418, "learning_rate": 2.0446385870993467e-05, "log_odds_chosen": 0.6730726361274719, "log_odds_ratio": -0.5413838624954224, "logps/chosen": -0.9189823865890503, "logps/rejected": -1.3468341827392578, "loss": 38.5132, "nll_loss": 1.7065455913543701, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.45949119329452515, "rewards/margins": 0.21392583847045898, "rewards/rejected": -0.6734170913696289, "step": 190 }, { "epoch": 1.8438518518518519, "grad_norm": 60.383541107177734, "learning_rate": 1.9089682321121834e-05, "log_odds_chosen": 0.9475343823432922, "log_odds_ratio": -0.4484768509864807, "logps/chosen": -0.8385717272758484, "logps/rejected": -1.4635182619094849, "loss": 34.8245, "nll_loss": 1.713822603225708, "rewards/accuracies": 0.784375011920929, "rewards/chosen": -0.4192858636379242, "rewards/margins": 0.31247326731681824, "rewards/rejected": -0.7317591309547424, "step": 195 }, { "epoch": 1.8912592592592592, "grad_norm": 66.11405944824219, "learning_rate": 1.775118274523545e-05, "log_odds_chosen": 0.8329303860664368, "log_odds_ratio": -0.49859505891799927, "logps/chosen": -0.893582820892334, "logps/rejected": -1.451395034790039, "loss": 37.0308, "nll_loss": 1.732862114906311, "rewards/accuracies": 0.784375011920929, "rewards/chosen": -0.446791410446167, "rewards/margins": 0.27890610694885254, "rewards/rejected": -0.7256975173950195, "step": 200 }, { "epoch": 1.9386666666666668, "grad_norm": 54.821868896484375, "learning_rate": 1.643500976631037e-05, "log_odds_chosen": 0.6610826849937439, "log_odds_ratio": -0.5171926617622375, "logps/chosen": -0.8425942659378052, "logps/rejected": -1.258576512336731, "loss": 35.923, "nll_loss": 1.6366369724273682, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": -0.4212971329689026, "rewards/margins": 0.20799115300178528, "rewards/rejected": -0.6292882561683655, "step": 205 }, { "epoch": 1.986074074074074, "grad_norm": 410.4480895996094, "learning_rate": 1.514521724066537e-05, "log_odds_chosen": 0.773653507232666, "log_odds_ratio": -0.4857940673828125, "logps/chosen": -0.8226664662361145, "logps/rejected": -1.3210365772247314, "loss": 34.8322, "nll_loss": 1.650650978088379, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.41133323311805725, "rewards/margins": 0.24918513000011444, "rewards/rejected": -0.6605182886123657, "step": 210 }, { "epoch": 2.0284444444444443, "grad_norm": 81.6287841796875, "learning_rate": 1.3885777771950348e-05, "log_odds_chosen": 1.237162470817566, "log_odds_ratio": -0.37088167667388916, "logps/chosen": -0.7396840453147888, "logps/rejected": -1.494255542755127, "loss": 27.6202, "nll_loss": 1.6762900352478027, "rewards/accuracies": 0.8531468510627747, "rewards/chosen": -0.3698420226573944, "rewards/margins": 0.3772856593132019, "rewards/rejected": -0.7471277713775635, "step": 215 }, { "epoch": 2.075851851851852, "grad_norm": 63.87202453613281, "learning_rate": 1.2660570475395683e-05, "log_odds_chosen": 1.8257486820220947, "log_odds_ratio": -0.26709312200546265, "logps/chosen": -0.6293801665306091, "logps/rejected": -1.7724393606185913, "loss": 25.9158, "nll_loss": 1.6067278385162354, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.31469008326530457, "rewards/margins": 0.5715296268463135, "rewards/rejected": -0.8862196803092957, "step": 220 }, { "epoch": 2.1232592592592594, "grad_norm": 63.67515563964844, "learning_rate": 1.1473369030008974e-05, "log_odds_chosen": 1.9475319385528564, "log_odds_ratio": -0.22811241447925568, "logps/chosen": -0.6124777793884277, "logps/rejected": -1.8340566158294678, "loss": 24.8892, "nll_loss": 1.6936416625976562, "rewards/accuracies": 0.953125, "rewards/chosen": -0.30623888969421387, "rewards/margins": 0.61078941822052, "rewards/rejected": -0.9170283079147339, "step": 225 }, { "epoch": 2.1706666666666665, "grad_norm": 73.4637222290039, "learning_rate": 1.0327830055518842e-05, "log_odds_chosen": 1.9068591594696045, "log_odds_ratio": -0.23920920491218567, "logps/chosen": -0.6302188634872437, "logps/rejected": -1.859368085861206, "loss": 25.4943, "nll_loss": 1.6373430490493774, "rewards/accuracies": 0.9468749761581421, "rewards/chosen": -0.3151094317436218, "rewards/margins": 0.6145747900009155, "rewards/rejected": -0.929684042930603, "step": 230 }, { "epoch": 2.218074074074074, "grad_norm": 94.13182067871094, "learning_rate": 9.227481849865235e-06, "log_odds_chosen": 1.905515432357788, "log_odds_ratio": -0.2525004744529724, "logps/chosen": -0.6512196063995361, "logps/rejected": -1.868032455444336, "loss": 26.2984, "nll_loss": 1.6863908767700195, "rewards/accuracies": 0.921875, "rewards/chosen": -0.32560980319976807, "rewards/margins": 0.6084063649177551, "rewards/rejected": -0.934016227722168, "step": 235 }, { "epoch": 2.2654814814814817, "grad_norm": 61.78620147705078, "learning_rate": 8.175713521924978e-06, "log_odds_chosen": 1.8288015127182007, "log_odds_ratio": -0.2607673108577728, "logps/chosen": -0.6412376165390015, "logps/rejected": -1.772962212562561, "loss": 26.2439, "nll_loss": 1.679369568824768, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.32061880826950073, "rewards/margins": 0.565862238407135, "rewards/rejected": -0.8864811062812805, "step": 240 }, { "epoch": 2.3128888888888888, "grad_norm": 72.63899993896484, "learning_rate": 7.1757645529443665e-06, "log_odds_chosen": 1.9564971923828125, "log_odds_ratio": -0.222591370344162, "logps/chosen": -0.6008509397506714, "logps/rejected": -1.823952078819275, "loss": 24.3832, "nll_loss": 1.634280800819397, "rewards/accuracies": 0.953125, "rewards/chosen": -0.3004254698753357, "rewards/margins": 0.6115506291389465, "rewards/rejected": -0.9119760394096375, "step": 245 }, { "epoch": 2.3602962962962963, "grad_norm": 59.229347229003906, "learning_rate": 6.230714818829733e-06, "log_odds_chosen": 2.0835893154144287, "log_odds_ratio": -0.2164476215839386, "logps/chosen": -0.6128490567207336, "logps/rejected": -1.9331867694854736, "loss": 24.5665, "nll_loss": 1.6675183773040771, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.3064245283603668, "rewards/margins": 0.6601688861846924, "rewards/rejected": -0.9665933847427368, "step": 250 }, { "epoch": 2.407703703703704, "grad_norm": 62.527137756347656, "learning_rate": 5.343475104027743e-06, "log_odds_chosen": 2.1743245124816895, "log_odds_ratio": -0.21075662970542908, "logps/chosen": -0.5725008249282837, "logps/rejected": -1.979318380355835, "loss": 23.177, "nll_loss": 1.6942886114120483, "rewards/accuracies": 0.9593750238418579, "rewards/chosen": -0.28625041246414185, "rewards/margins": 0.7034087777137756, "rewards/rejected": -0.9896591901779175, "step": 255 }, { "epoch": 2.455111111111111, "grad_norm": 83.86973571777344, "learning_rate": 4.516778136213037e-06, "log_odds_chosen": 2.1156704425811768, "log_odds_ratio": -0.22012558579444885, "logps/chosen": -0.6010316610336304, "logps/rejected": -1.9568220376968384, "loss": 24.2239, "nll_loss": 1.6713542938232422, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.3005158305168152, "rewards/margins": 0.677895188331604, "rewards/rejected": -0.9784110188484192, "step": 260 }, { "epoch": 2.5025185185185186, "grad_norm": 78.56597900390625, "learning_rate": 3.7531701693965554e-06, "log_odds_chosen": 1.9914929866790771, "log_odds_ratio": -0.2501711845397949, "logps/chosen": -0.6486467123031616, "logps/rejected": -1.941457748413086, "loss": 26.055, "nll_loss": 1.6230090856552124, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.3243233561515808, "rewards/margins": 0.6464055776596069, "rewards/rejected": -0.970728874206543, "step": 265 }, { "epoch": 2.549925925925926, "grad_norm": 63.49893569946289, "learning_rate": 3.055003141378948e-06, "log_odds_chosen": 2.094613552093506, "log_odds_ratio": -0.2102334052324295, "logps/chosen": -0.5994306802749634, "logps/rejected": -1.9197509288787842, "loss": 24.16, "nll_loss": 1.6677443981170654, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.2997153401374817, "rewards/margins": 0.6601601839065552, "rewards/rejected": -0.9598754644393921, "step": 270 }, { "epoch": 2.5973333333333333, "grad_norm": 79.37230682373047, "learning_rate": 2.424427429704365e-06, "log_odds_chosen": 2.1252670288085938, "log_odds_ratio": -0.2258034646511078, "logps/chosen": -0.638454794883728, "logps/rejected": -2.0011186599731445, "loss": 25.4827, "nll_loss": 1.6783549785614014, "rewards/accuracies": 0.9375, "rewards/chosen": -0.319227397441864, "rewards/margins": 0.6813319325447083, "rewards/rejected": -1.0005593299865723, "step": 275 }, { "epoch": 2.644740740740741, "grad_norm": 66.57933807373047, "learning_rate": 1.8633852284264508e-06, "log_odds_chosen": 2.1056222915649414, "log_odds_ratio": -0.2241026908159256, "logps/chosen": -0.5848366022109985, "logps/rejected": -1.9145119190216064, "loss": 23.795, "nll_loss": 1.665006399154663, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.29241830110549927, "rewards/margins": 0.664837658405304, "rewards/rejected": -0.9572559595108032, "step": 280 }, { "epoch": 2.6921481481481484, "grad_norm": 72.5078353881836, "learning_rate": 1.3736045660864034e-06, "log_odds_chosen": 2.1603641510009766, "log_odds_ratio": -0.212470144033432, "logps/chosen": -0.6109951138496399, "logps/rejected": -2.0010976791381836, "loss": 24.4558, "nll_loss": 1.7145074605941772, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.30549755692481995, "rewards/margins": 0.695051372051239, "rewards/rejected": -1.0005488395690918, "step": 285 }, { "epoch": 2.7395555555555555, "grad_norm": 66.63565063476562, "learning_rate": 9.565939833279192e-07, "log_odds_chosen": 2.2116315364837646, "log_odds_ratio": -0.216557115316391, "logps/chosen": -0.6101894378662109, "logps/rejected": -2.0354068279266357, "loss": 24.4504, "nll_loss": 1.7398754358291626, "rewards/accuracies": 0.9468749761581421, "rewards/chosen": -0.30509471893310547, "rewards/margins": 0.7126085758209229, "rewards/rejected": -1.0177034139633179, "step": 290 }, { "epoch": 2.786962962962963, "grad_norm": 72.43724060058594, "learning_rate": 6.136378865420872e-07, "log_odds_chosen": 2.0436155796051025, "log_odds_ratio": -0.25939661264419556, "logps/chosen": -0.6374012231826782, "logps/rejected": -1.9478752613067627, "loss": 25.9435, "nll_loss": 1.6763683557510376, "rewards/accuracies": 0.934374988079071, "rewards/chosen": -0.3187006115913391, "rewards/margins": 0.6552368998527527, "rewards/rejected": -0.9739376306533813, "step": 295 }, { "epoch": 2.83437037037037, "grad_norm": 63.837345123291016, "learning_rate": 3.45792591853214e-07, "log_odds_chosen": 2.2019195556640625, "log_odds_ratio": -0.22682932019233704, "logps/chosen": -0.6202256679534912, "logps/rejected": -2.0658164024353027, "loss": 24.8148, "nll_loss": 1.7285759449005127, "rewards/accuracies": 0.934374988079071, "rewards/chosen": -0.3101128339767456, "rewards/margins": 0.7227953672409058, "rewards/rejected": -1.0329082012176514, "step": 300 }, { "epoch": 2.8817777777777778, "grad_norm": 70.43053436279297, "learning_rate": 1.538830716302092e-07, "log_odds_chosen": 2.2171826362609863, "log_odds_ratio": -0.2098480463027954, "logps/chosen": -0.5961582064628601, "logps/rejected": -2.024376392364502, "loss": 23.9484, "nll_loss": 1.663637399673462, "rewards/accuracies": 0.9468749761581421, "rewards/chosen": -0.29807910323143005, "rewards/margins": 0.7141090631484985, "rewards/rejected": -1.012188196182251, "step": 305 }, { "epoch": 2.9291851851851853, "grad_norm": 95.00191497802734, "learning_rate": 3.8500413544415025e-08, "log_odds_chosen": 2.249077081680298, "log_odds_ratio": -0.1899929940700531, "logps/chosen": -0.6121601462364197, "logps/rejected": -2.043703317642212, "loss": 24.1381, "nll_loss": 1.6788402795791626, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.30608007311820984, "rewards/margins": 0.7157715559005737, "rewards/rejected": -1.021851658821106, "step": 310 }, { "epoch": 2.9765925925925925, "grad_norm": 54.38016891479492, "learning_rate": 0.0, "log_odds_chosen": 2.183474063873291, "log_odds_ratio": -0.20934459567070007, "logps/chosen": -0.6058934926986694, "logps/rejected": -2.00223445892334, "loss": 24.3016, "nll_loss": 1.6638948917388916, "rewards/accuracies": 0.971875011920929, "rewards/chosen": -0.3029467463493347, "rewards/margins": 0.69817054271698, "rewards/rejected": -1.00111722946167, "step": 315 }, { "epoch": 2.9765925925925925, "step": 315, "total_flos": 0.0, "train_loss": 78.5789802187965, "train_runtime": 9244.4601, "train_samples_per_second": 2.191, "train_steps_per_second": 0.034 } ], "logging_steps": 5, "max_steps": 315, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }